111 lines
3.6 KiB
Java
111 lines
3.6 KiB
Java
package org.RI.P2;
|
|
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.InputStreamReader;
|
|
import java.io.Reader;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.Paths;
|
|
import java.text.ParseException;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.stream.Collectors;
|
|
|
|
import org.apache.lucene.analysis.Analyzer;
|
|
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
|
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
|
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
import org.apache.lucene.document.Document;
|
|
import org.apache.lucene.index.IndexWriter;
|
|
import org.apache.lucene.index.IndexWriterConfig;
|
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
import org.apache.lucene.store.Directory;
|
|
import org.apache.lucene.store.FSDirectory;
|
|
import org.json.simple.JSONArray;
|
|
import org.json.simple.JSONObject;
|
|
import org.json.simple.JSONValue;
|
|
|
|
public class Indexer {
|
|
IndexWriter index;
|
|
String folderPath;
|
|
List<File> files;
|
|
PerFieldAnalyzerWrapper customAnalyzer;
|
|
|
|
Indexer(String folderPath) throws IOException, ParseException {
|
|
this.folderPath = folderPath;
|
|
files = readFiles();
|
|
customAnalyzer = createAnalyzer();
|
|
}
|
|
|
|
PerFieldAnalyzerWrapper createAnalyzer() {
|
|
Map<String, Analyzer> analyzerPerField = new HashMap<>();
|
|
analyzerPerField.put("title", new EnglishAnalyzer());
|
|
analyzerPerField.put("abstract", new EnglishAnalyzer());
|
|
PerFieldAnalyzerWrapper customAnalyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(),
|
|
analyzerPerField);
|
|
return customAnalyzer;
|
|
}
|
|
|
|
List<File> readFiles() throws IOException {
|
|
List<File> files = Files.walk(Paths.get(folderPath)).filter(Files::isRegularFile).map(Path::toFile)
|
|
.collect(Collectors.toList());
|
|
return files;
|
|
}
|
|
|
|
JSONArray parseJSONFile(File file) throws IOException {
|
|
InputStream jsonFile = new FileInputStream(file);
|
|
Reader readerJson = new InputStreamReader(jsonFile);
|
|
Object fileObject = JSONValue.parse(readerJson);
|
|
JSONArray arrayObject = new JSONArray();
|
|
arrayObject.add(fileObject);
|
|
return arrayObject;
|
|
}
|
|
|
|
void openIndex() throws IOException {
|
|
Directory dir = FSDirectory.open(Paths.get(folderPath));
|
|
IndexWriterConfig config = new IndexWriterConfig(customAnalyzer);
|
|
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
|
|
index = new IndexWriter(dir, config);
|
|
}
|
|
|
|
void addDocuments(JSONArray jsonObjects) throws IOException {
|
|
for (JSONObject object : (List<JSONObject>) jsonObjects) {
|
|
Document doc = new Document();
|
|
index.addDocument(doc);
|
|
}
|
|
}
|
|
|
|
void commitChanges() throws IOException {
|
|
index.commit();
|
|
index.close();
|
|
}
|
|
|
|
void populateIndex() throws IOException, ParseException {
|
|
createIndex();
|
|
for (File file : files) {
|
|
JSONArray jsonObjects = parseJSONFile(file);
|
|
addDocument(jsonObjects);
|
|
}
|
|
commitChanges();
|
|
}
|
|
|
|
private static void usage() {
|
|
System.out.println("Usage: Indexer <directory>");
|
|
System.exit(1);
|
|
}
|
|
|
|
public static void main(String[] args) throws ParseException, IOException {
|
|
if (args.length != 1) {
|
|
usage();
|
|
}
|
|
String dataDirectory = args[0];
|
|
Indexer indexer = new Indexer(dataDirectory);
|
|
indexer.populateIndex();
|
|
}
|
|
}
|