package org.RI.P2; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.text.ParseException; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.JSONValue; public class Indexer { IndexWriter index; String folderPath; List files; PerFieldAnalyzerWrapper customAnalyzer; Indexer(String folderPath) throws IOException, ParseException { this.folderPath = folderPath; files = readFiles(); customAnalyzer = createAnalyzer(); } PerFieldAnalyzerWrapper createAnalyzer() { Map analyzerPerField = new HashMap<>(); analyzerPerField.put("title", new EnglishAnalyzer()); analyzerPerField.put("abstract", new EnglishAnalyzer()); PerFieldAnalyzerWrapper customAnalyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), analyzerPerField); return customAnalyzer; } List readFiles() throws IOException { List files = Files.walk(Paths.get(folderPath)).filter(Files::isRegularFile).map(Path::toFile) .collect(Collectors.toList()); return files; } JSONArray parseJSONFile(File file) throws IOException { InputStream jsonFile = new FileInputStream(file); Reader readerJson = new InputStreamReader(jsonFile); Object fileObject = JSONValue.parse(readerJson); JSONArray arrayObject = new JSONArray(); arrayObject.add(fileObject); return arrayObject; } void openIndex() throws IOException { Directory dir = FSDirectory.open(Paths.get(folderPath)); IndexWriterConfig config = new IndexWriterConfig(customAnalyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); index = new IndexWriter(dir, config); } void addDocuments(JSONArray jsonObjects) throws IOException { for (JSONObject object : (List) jsonObjects) { Document doc = new Document(); index.addDocument(doc); } } void commitChanges() throws IOException { index.commit(); index.close(); } void populateIndex() throws IOException, ParseException { createIndex(); for (File file : files) { JSONArray jsonObjects = parseJSONFile(file); addDocument(jsonObjects); } commitChanges(); } private static void usage() { System.out.println("Usage: Indexer "); System.exit(1); } public static void main(String[] args) throws ParseException, IOException { if (args.length != 1) { usage(); } String dataDirectory = args[0]; Indexer indexer = new Indexer(dataDirectory); indexer.populateIndex(); } }