Compare commits
5 Commits
d921d56bad
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
932e265e3f
|
|||
|
f1519b55c2
|
|||
|
3b21bd113e
|
|||
|
cea856ab60
|
|||
|
7dff28d7d0
|
112
docs/Summary.org
Normal file
112
docs/Summary.org
Normal file
@@ -0,0 +1,112 @@
|
||||
#+TITLE: Práctica final
|
||||
#+SUBTITLE: Recuperación de Información
|
||||
#+AUTHOR: Amin Kasrou Aouam
|
||||
#+DATE: 2021-01-11
|
||||
#+PANDOC_OPTIONS: template:~/.pandoc/templates/eisvogel.latex
|
||||
#+PANDOC_OPTIONS: listings:t
|
||||
#+PANDOC_OPTIONS: toc:t
|
||||
#+PANDOC_METADATA: lang=es
|
||||
#+PANDOC_METADATA: titlepage:t
|
||||
#+PANDOC_METADATA: listings-no-page-break:t
|
||||
#+PANDOC_METADATA: toc-own-page:t
|
||||
#+PANDOC_METADATA: table-use-row-colors:t
|
||||
#+PANDOC_METADATA: logo:/home/coolneng/Photos/Logos/UGR.png
|
||||
* Práctica final
|
||||
|
||||
En esta práctica, vamos a implementar un buscar de información en una serie de documentos /JSON/ usando /Apache Lucene/.
|
||||
|
||||
** /Parsing/
|
||||
|
||||
Parseamos los documentos /JSON/ mediante la librería /GSON/, dado que ésta nos ofrece la posibilidad de serializar el documento en una clase Java.
|
||||
|
||||
Esta funcionalidad nos facilita la extracción de información en /JSON/ complejos. Para ellos, creamos la clase *Paper*:
|
||||
|
||||
#+begin_src java
|
||||
package org.RI.P2;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.google.gson.annotations.SerializedName;
|
||||
|
||||
class Affiliation {
|
||||
String laboratory;
|
||||
String institution;
|
||||
}
|
||||
|
||||
class Author {
|
||||
String first;
|
||||
List<String> middle;
|
||||
String last;
|
||||
String suffix;
|
||||
Affiliation affiliation;
|
||||
String email;
|
||||
}
|
||||
|
||||
class Metadata {
|
||||
String title;
|
||||
List<Author> authors;
|
||||
}
|
||||
|
||||
class Abstract {
|
||||
String text;
|
||||
}
|
||||
|
||||
public class Paper {
|
||||
String paper_id;
|
||||
Metadata metadata;
|
||||
@SerializedName("abstract") List<Abstract> abstr;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
Es esencial utilizar el mismo nombre de atributo en nuestra clase, dado que esto permitirá un /mapping/ correcto. En caso de que no fuera posible, podemos hacer uso del decorador *@SerializedName*.
|
||||
|
||||
El único paso que nos falta es indicarle a la librería la entrada (/JSON/) y la clase.
|
||||
|
||||
#+begin_src java
|
||||
Gson gson = new Gson();
|
||||
Paper data = gson.fromJson(readerJson, Paper.class);
|
||||
#+end_src
|
||||
|
||||
** Indexación
|
||||
|
||||
Para la indexación, hemos elegido los siguientes atributos:
|
||||
|
||||
- =paper_id=
|
||||
- title
|
||||
- authors
|
||||
- institution
|
||||
- emails
|
||||
- abstract
|
||||
|
||||
Optamos por crear un índice en cada ejecución, para evitar obtener valores repetidos.
|
||||
|
||||
** Buscador
|
||||
|
||||
Implementamos un buscador con interfaz gráfica (GUI), basada en el proyecto de ejemplo disponible en la plataforma Prado.
|
||||
|
||||
Podemos optar por buscar en diferentes campos, mediante un argumento pasado por línea de comandos. Especificamos las distintas opciones en la sección siguiente.
|
||||
|
||||
** Ejecución
|
||||
|
||||
En el caso que deseemos utilizar /Maven/, debemos ejecutar los siguientes comandos:
|
||||
|
||||
1. Compilar el proyecto
|
||||
|
||||
#+BEGIN_SRC shell
|
||||
mvn compile
|
||||
#+END_SRC
|
||||
|
||||
2. Ejecutar el proyecto
|
||||
|
||||
#+BEGIN_SRC shell
|
||||
mvn exec:java -Dexec.mainClass="org.RI.P2.Searcher" -Dexec.args="data title"
|
||||
#+END_SRC
|
||||
|
||||
Debemos modificar el argumento *title* según la salida que deseemos:
|
||||
|
||||
- *title*
|
||||
- *authors*
|
||||
- *abstract*
|
||||
- *institutions*
|
||||
- *emails*
|
||||
|
||||
BIN
docs/Summary.pdf
Normal file
BIN
docs/Summary.pdf
Normal file
Binary file not shown.
5
pom.xml
5
pom.xml
@@ -29,6 +29,11 @@
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<version>8.6.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queryparser</artifactId>
|
||||
<version>8.6.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.code.gson</groupId>
|
||||
<artifactId>gson</artifactId>
|
||||
|
||||
@@ -132,18 +132,4 @@ public class Indexer {
|
||||
commitChanges();
|
||||
}
|
||||
|
||||
private static void usage() {
|
||||
System.out.println("Usage: Indexer <directory>");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws ParseException, IOException {
|
||||
if (args.length != 1) {
|
||||
usage();
|
||||
}
|
||||
String dataDirectory = args[0];
|
||||
String indexDirectory = ".index";
|
||||
Indexer indexer = new Indexer(dataDirectory, indexDirectory);
|
||||
indexer.populateIndex();
|
||||
}
|
||||
}
|
||||
|
||||
196
src/main/java/org/RI/P2/Searcher.java
Normal file
196
src/main/java/org/RI/P2/Searcher.java
Normal file
@@ -0,0 +1,196 @@
|
||||
package org.RI.P2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import javax.swing.table.DefaultTableModel;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
|
||||
public class Searcher extends javax.swing.JFrame {
|
||||
IndexSearcher searcher;
|
||||
String dataPath;
|
||||
String indexPath;
|
||||
String field;
|
||||
|
||||
Searcher(String dataPath, String indexPath, String field) throws IOException {
|
||||
this.dataPath = dataPath;
|
||||
this.indexPath = indexPath;
|
||||
this.field = field;
|
||||
searcher = createIndexSearcher();
|
||||
initComponents();
|
||||
}
|
||||
|
||||
private void initComponents() {
|
||||
jTextField1 = new javax.swing.JTextField();
|
||||
jButton1 = new javax.swing.JButton();
|
||||
jScrollPane1 = new javax.swing.JScrollPane();
|
||||
jTable1 = new javax.swing.JTable();
|
||||
|
||||
setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
|
||||
|
||||
jTextField1.setToolTipText("");
|
||||
|
||||
jButton1.setText("Buscar");
|
||||
jButton1.setToolTipText("");
|
||||
jButton1.addActionListener(new java.awt.event.ActionListener() {
|
||||
public void actionPerformed(java.awt.event.ActionEvent evt) {
|
||||
try {
|
||||
jButton1ActionPerformed(evt);
|
||||
} catch (IOException exp) {
|
||||
System.err.println(exp);
|
||||
} catch (org.apache.lucene.queryparser.classic.ParseException exp) {
|
||||
System.err.println(exp);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
jTable1.setModel(new javax.swing.table.DefaultTableModel(new Object[][] {
|
||||
|
||||
}, new String[] { "Titulo", "Autores" }));
|
||||
jScrollPane1.setViewportView(jTable1);
|
||||
|
||||
javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane());
|
||||
getContentPane().setLayout(layout);
|
||||
layout.setHorizontalGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
||||
.addGroup(layout.createSequentialGroup().addContainerGap(19, Short.MAX_VALUE)
|
||||
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING, false)
|
||||
.addGroup(layout.createSequentialGroup().addComponent(jTextField1)
|
||||
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
|
||||
.addComponent(jButton1))
|
||||
.addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 375,
|
||||
javax.swing.GroupLayout.PREFERRED_SIZE))
|
||||
.addContainerGap()));
|
||||
layout.setVerticalGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
||||
.addGroup(layout.createSequentialGroup().addGap(15, 15, 15)
|
||||
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
|
||||
.addComponent(jTextField1, javax.swing.GroupLayout.PREFERRED_SIZE,
|
||||
javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
|
||||
.addComponent(jButton1))
|
||||
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
|
||||
.addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 275,
|
||||
javax.swing.GroupLayout.PREFERRED_SIZE)
|
||||
.addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)));
|
||||
|
||||
pack();
|
||||
}
|
||||
|
||||
private void jButton1ActionPerformed(java.awt.event.ActionEvent evt)
|
||||
throws IOException, org.apache.lucene.queryparser.classic.ParseException {
|
||||
searchFiles(jTextField1.getText(), field);
|
||||
}
|
||||
|
||||
IndexSearcher createIndexSearcher() throws IOException {
|
||||
Directory indexDirectory = FSDirectory.open(Paths.get(indexPath));
|
||||
IndexReader indexReader = DirectoryReader.open(indexDirectory);
|
||||
searcher = new IndexSearcher(indexReader);
|
||||
return searcher;
|
||||
}
|
||||
|
||||
TopDocs queryIndex(String queryString, String field, int resultNumber)
|
||||
throws IOException, org.apache.lucene.queryparser.classic.ParseException {
|
||||
Query query = new QueryParser(field, new StandardAnalyzer()).parse(queryString);
|
||||
TopDocs topDocs = searcher.search(query, resultNumber);
|
||||
return topDocs;
|
||||
}
|
||||
|
||||
void showResults(TopDocs docs) throws IOException {
|
||||
System.out.println(docs.totalHits);
|
||||
DefaultTableModel model = (DefaultTableModel) jTable1.getModel();
|
||||
for (ScoreDoc scoreDoc : docs.scoreDocs) {
|
||||
Document doc = searcher.doc(scoreDoc.doc);
|
||||
model.addRow(new Object[] { doc.get("title"), doc.get("authors") });
|
||||
}
|
||||
}
|
||||
|
||||
void searchFiles(String query, String field)
|
||||
throws IOException, org.apache.lucene.queryparser.classic.ParseException {
|
||||
int resultNumber = 20;
|
||||
TopDocs results = queryIndex(query, field, resultNumber);
|
||||
showResults(results);
|
||||
}
|
||||
|
||||
private static void validateField(String fieldContent) {
|
||||
List<String> availableOptions = new ArrayList<>();
|
||||
availableOptions.add("title");
|
||||
availableOptions.add("authors");
|
||||
availableOptions.add("institutions");
|
||||
availableOptions.add("abstract");
|
||||
availableOptions.add("emails");
|
||||
if (!availableOptions.contains(fieldContent)) {
|
||||
System.out.println("Wrong field name. Available options:");
|
||||
System.out.println("authors");
|
||||
System.out.println("title");
|
||||
System.out.println("abstract");
|
||||
System.out.println("institutions");
|
||||
System.out.println("emails");
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
private static void usage() {
|
||||
System.out.println("Usage: Searcher <directory>");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
public static void main(String[] args)
|
||||
throws IOException, ParseException, org.apache.lucene.queryparser.classic.ParseException {
|
||||
if (args.length != 2) {
|
||||
usage();
|
||||
}
|
||||
String dataDirectory = args[0];
|
||||
String indexDirectory = ".index";
|
||||
String searchField = args[1];
|
||||
validateField(searchField);
|
||||
Indexer indexer = new Indexer(dataDirectory, indexDirectory);
|
||||
indexer.populateIndex();
|
||||
|
||||
try {
|
||||
for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) {
|
||||
if ("Nimbus".equals(info.getName())) {
|
||||
javax.swing.UIManager.setLookAndFeel(info.getClassName());
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (ClassNotFoundException ex) {
|
||||
java.util.logging.Logger.getLogger(Searcher.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
|
||||
} catch (InstantiationException ex) {
|
||||
java.util.logging.Logger.getLogger(Searcher.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
|
||||
} catch (IllegalAccessException ex) {
|
||||
java.util.logging.Logger.getLogger(Searcher.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
|
||||
} catch (javax.swing.UnsupportedLookAndFeelException ex) {
|
||||
java.util.logging.Logger.getLogger(Searcher.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
|
||||
}
|
||||
|
||||
/* Create and display the form */
|
||||
java.awt.EventQueue.invokeLater(new Runnable() {
|
||||
public void run() {
|
||||
try {
|
||||
new Searcher(dataDirectory, indexDirectory, searchField).setVisible(true);
|
||||
} catch (IOException exp) {
|
||||
System.err.println(exp);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Variables declaration - do not modify//GEN-BEGIN:variables
|
||||
private javax.swing.JButton jButton1;
|
||||
private javax.swing.JScrollPane jScrollPane1;
|
||||
private javax.swing.JTable jTable1;
|
||||
private javax.swing.JTextField jTextField1;
|
||||
// End of variables declaration//GEN-END:variables
|
||||
}
|
||||
107
src/main/java/org/RI/P2/SearcherForm.form
Normal file
107
src/main/java/org/RI/P2/SearcherForm.form
Normal file
@@ -0,0 +1,107 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
|
||||
<Form version="1.3" maxVersion="1.9" type="org.netbeans.modules.form.forminfo.JFrameFormInfo">
|
||||
<Properties>
|
||||
<Property name="defaultCloseOperation" type="int" value="3"/>
|
||||
</Properties>
|
||||
<SyntheticProperties>
|
||||
<SyntheticProperty name="formSizePolicy" type="int" value="1"/>
|
||||
<SyntheticProperty name="generateCenter" type="boolean" value="false"/>
|
||||
</SyntheticProperties>
|
||||
<AuxValues>
|
||||
<AuxValue name="FormSettings_autoResourcing" type="java.lang.Integer" value="0"/>
|
||||
<AuxValue name="FormSettings_autoSetComponentName" type="java.lang.Boolean" value="false"/>
|
||||
<AuxValue name="FormSettings_generateFQN" type="java.lang.Boolean" value="true"/>
|
||||
<AuxValue name="FormSettings_generateMnemonicsCode" type="java.lang.Boolean" value="false"/>
|
||||
<AuxValue name="FormSettings_i18nAutoMode" type="java.lang.Boolean" value="false"/>
|
||||
<AuxValue name="FormSettings_layoutCodeTarget" type="java.lang.Integer" value="1"/>
|
||||
<AuxValue name="FormSettings_listenerGenerationStyle" type="java.lang.Integer" value="0"/>
|
||||
<AuxValue name="FormSettings_variablesLocal" type="java.lang.Boolean" value="false"/>
|
||||
<AuxValue name="FormSettings_variablesModifier" type="java.lang.Integer" value="2"/>
|
||||
</AuxValues>
|
||||
|
||||
<Layout>
|
||||
<DimensionLayout dim="0">
|
||||
<Group type="103" groupAlignment="0" attributes="0">
|
||||
<Group type="102" attributes="0">
|
||||
<EmptySpace pref="19" max="32767" attributes="0"/>
|
||||
<Group type="103" groupAlignment="0" max="-2" attributes="0">
|
||||
<Group type="102" alignment="0" attributes="0">
|
||||
<Component id="jTextField1" max="32767" attributes="0"/>
|
||||
<EmptySpace type="unrelated" max="-2" attributes="0"/>
|
||||
<Component id="jButton1" min="-2" max="-2" attributes="0"/>
|
||||
</Group>
|
||||
<Component id="jScrollPane1" min="-2" pref="375" max="-2" attributes="0"/>
|
||||
</Group>
|
||||
<EmptySpace max="-2" attributes="0"/>
|
||||
</Group>
|
||||
</Group>
|
||||
</DimensionLayout>
|
||||
<DimensionLayout dim="1">
|
||||
<Group type="103" groupAlignment="0" attributes="0">
|
||||
<Group type="102" alignment="0" attributes="0">
|
||||
<EmptySpace min="-2" pref="15" max="-2" attributes="0"/>
|
||||
<Group type="103" groupAlignment="3" attributes="0">
|
||||
<Component id="jTextField1" alignment="3" min="-2" max="-2" attributes="0"/>
|
||||
<Component id="jButton1" alignment="3" min="-2" max="-2" attributes="0"/>
|
||||
</Group>
|
||||
<EmptySpace max="-2" attributes="0"/>
|
||||
<Component id="jScrollPane1" min="-2" pref="275" max="-2" attributes="0"/>
|
||||
<EmptySpace max="32767" attributes="0"/>
|
||||
</Group>
|
||||
</Group>
|
||||
</DimensionLayout>
|
||||
</Layout>
|
||||
<SubComponents>
|
||||
<Component class="javax.swing.JTextField" name="jTextField1">
|
||||
<Properties>
|
||||
<Property name="toolTipText" type="java.lang.String" value=""/>
|
||||
</Properties>
|
||||
</Component>
|
||||
<Component class="javax.swing.JButton" name="jButton1">
|
||||
<Properties>
|
||||
<Property name="text" type="java.lang.String" value="Buscar"/>
|
||||
<Property name="toolTipText" type="java.lang.String" value=""/>
|
||||
</Properties>
|
||||
<Events>
|
||||
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="jButton1ActionPerformed"/>
|
||||
</Events>
|
||||
</Component>
|
||||
<Container class="javax.swing.JScrollPane" name="jScrollPane1">
|
||||
<AuxValues>
|
||||
<AuxValue name="autoScrollPane" type="java.lang.Boolean" value="true"/>
|
||||
</AuxValues>
|
||||
|
||||
<Layout class="org.netbeans.modules.form.compat2.layouts.support.JScrollPaneSupportLayout"/>
|
||||
<SubComponents>
|
||||
<Component class="javax.swing.JTable" name="jTable1">
|
||||
<Properties>
|
||||
<Property name="model" type="javax.swing.table.TableModel" editor="org.netbeans.modules.form.editors2.TableModelEditor">
|
||||
<Table columnCount="2" rowCount="0">
|
||||
<Column editable="true" title="Titulo" type="java.lang.Object"/>
|
||||
<Column editable="true" title="Año" type="java.lang.Object"/>
|
||||
</Table>
|
||||
</Property>
|
||||
<Property name="columnModel" type="javax.swing.table.TableColumnModel" editor="org.netbeans.modules.form.editors2.TableColumnModelEditor">
|
||||
<TableColumnModel selectionModel="0">
|
||||
<Column maxWidth="-1" minWidth="-1" prefWidth="-1" resizable="true">
|
||||
<Title/>
|
||||
<Editor/>
|
||||
<Renderer/>
|
||||
</Column>
|
||||
<Column maxWidth="-1" minWidth="-1" prefWidth="-1" resizable="true">
|
||||
<Title/>
|
||||
<Editor/>
|
||||
<Renderer/>
|
||||
</Column>
|
||||
</TableColumnModel>
|
||||
</Property>
|
||||
<Property name="tableHeader" type="javax.swing.table.JTableHeader" editor="org.netbeans.modules.form.editors2.JTableHeaderEditor">
|
||||
<TableHeader reorderingAllowed="true" resizingAllowed="true"/>
|
||||
</Property>
|
||||
</Properties>
|
||||
</Component>
|
||||
</SubComponents>
|
||||
</Container>
|
||||
</SubComponents>
|
||||
</Form>
|
||||
Reference in New Issue
Block a user