RI-P2/src/main/java/org/RI/P2/Indexer.java

111 lines
3.6 KiB
Java

package org.RI.P2;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParseException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;
public class Indexer {
IndexWriter index;
String folderPath;
List<File> files;
PerFieldAnalyzerWrapper customAnalyzer;
Indexer(String folderPath) throws IOException, ParseException {
this.folderPath = folderPath;
files = readFiles();
customAnalyzer = createAnalyzer();
}
PerFieldAnalyzerWrapper createAnalyzer() {
Map<String, Analyzer> analyzerPerField = new HashMap<>();
analyzerPerField.put("title", new EnglishAnalyzer());
analyzerPerField.put("abstract", new EnglishAnalyzer());
PerFieldAnalyzerWrapper customAnalyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(),
analyzerPerField);
return customAnalyzer;
}
List<File> readFiles() throws IOException {
List<File> files = Files.walk(Paths.get(folderPath)).filter(Files::isRegularFile).map(Path::toFile)
.collect(Collectors.toList());
return files;
}
JSONArray parseJSONFile(File file) throws IOException {
InputStream jsonFile = new FileInputStream(file);
Reader readerJson = new InputStreamReader(jsonFile);
Object fileObject = JSONValue.parse(readerJson);
JSONArray arrayObject = new JSONArray();
arrayObject.add(fileObject);
return arrayObject;
}
void openIndex() throws IOException {
Directory dir = FSDirectory.open(Paths.get(folderPath));
IndexWriterConfig config = new IndexWriterConfig(customAnalyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
index = new IndexWriter(dir, config);
}
void addDocuments(JSONArray jsonObjects) throws IOException {
for (JSONObject object : (List<JSONObject>) jsonObjects) {
Document doc = new Document();
index.addDocument(doc);
}
}
void commitChanges() throws IOException {
index.commit();
index.close();
}
void populateIndex() throws IOException, ParseException {
createIndex();
for (File file : files) {
JSONArray jsonObjects = parseJSONFile(file);
addDocument(jsonObjects);
}
commitChanges();
}
private static void usage() {
System.out.println("Usage: Indexer <directory>");
System.exit(1);
}
public static void main(String[] args) throws ParseException, IOException {
if (args.length != 1) {
usage();
}
String dataDirectory = args[0];
Indexer indexer = new Indexer(dataDirectory);
indexer.populateIndex();
}
}