Support of Apache Lucene for Java
Supported Libraries
Library | Version | Supported |
---|---|---|
Apache Lucene | up to: 9.6.0 |
Supported Operations
Operation | Methods Supported |
---|---|
Insert | org.apache.lucene.index.IndexWriter.addDocument org.apache.lucene.index.IndexWriter.addDocuments org.apache.lucene.index.IndexWriter.addIndexes org.apache.lucene.search.IndexSearcher.setQueryCache org.apache.lucene.index.IndexWriter.commit |
Select | org.apache.lucene.index.IndexReader.numDocs org.apache.lucene.index.IndexReader.maxDoc org.apache.lucene.index.IndexReader.document org.apache.lucene.index.IndexReader.docFreq org.apache.lucene.index.IndexReader.getRefCount org.apache.lucene.index.IndexReader.numDeletedDocs org.apache.lucene.index.IndexReader.hasDeletions org.apache.lucene.index.IndexReader.getSumDocFreq org.apache.lucene.index.IndexReader.getDocCount org.apache.lucene.index.IndexReader.getSumTotalTermFreq org.apache.lucene.search.IndexSearcher.search org.apache.lucene.search.IndexSearcher.explain org.apache.lucene.search.IndexSearcher.count org.apache.lucene.search.IndexSearcher.totalTermFreq org.apache.lucene.search.IndexSearcher.getQueryCache org.apache.lucene.search.IndexSearcher.getIndexReader org.apache.lucene.search.IndexSearcher.doc org.apache.lucene.search.IndexSearcher.storedFields org.apache.lucene.search.IndexSearcher.getSlices org.apache.lucene.search.IndexSearcher.searchAfter |
Delete | org.apache.lucene.index.IndexWriter.deleteDocuments org.apache.lucene.index.IndexWriter.deleteAll |
Update | org.apache.lucene.index.IndexWriter.updateDocument |
Objects
Icon | Description |
---|---|
Java ApacheLucene Index | |
Java Unknown ApacheLucene Index |
Links
All links are created between the caller Java method object and the ApacheLucene Index object:
Link type | Methods Supported |
---|---|
useSelectLink | numDocs maxDoc document docFreq getRefCount numDeletedDocs hasDeletions getSumDocFreq getDocCount getSumTotalTermFreq search explain count totalTermFreq getQueryCache getIndexReader doc storedFields getSlices searchAfter |
useInsertLink | addDocument addIndexes setQueryCache commit |
useDeleteLink | deleteDocuments deleteAll |
useUpdateLink | updateDocument |
What results can you expect?
Some example scenarios are shown below:
ApacheLucene Index
public class LuceneReadIndexExample
{
private static final String INDEX_DIR = "c:/temp/lucene6index";
public static void main(String[] args) throws Exception
{
IndexSearcher searcher = createSearcher();
//Search by ID
TopDocs foundDocs = searchById(1, searcher);
System.out.println("Toral Results :: " + foundDocs.totalHits);
for (ScoreDoc sd : foundDocs.scoreDocs)
{
Document d = searcher.doc(sd.doc);
System.out.println(String.format(d.get("firstName")));
}
//Search by firstName
TopDocs foundDocs2 = searchByFirstName("Brian", searcher);
System.out.println("Toral Results :: " + foundDocs2.totalHits);
for (ScoreDoc sd : foundDocs2.scoreDocs)
{
Document d = searcher.doc(sd.doc);
System.out.println(String.format(d.get("id")));
}
}
private static IndexSearcher createSearcher() throws IOException {
Directory dir = FSDirectory.open(Paths.get(INDEX_DIR));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
return searcher;
}
public synchronized IndexWriter retrieveIndexWriter() throws LuceneAlertException
{
if (writer == null)
{
if (indexPath == null)
{
logger.fatal("No Lucene Index Path has been defined.");
throw new LuceneAlertException(
"No Lucene Index Path has been defined.Please define that in the configuration file.");
}
// using memory index
if (indexPath != null)
{
logger.debug("inside of indexPath !=null");
try
{
if (indexPath.equalsIgnoreCase("RAM"))
{
if (ramDir == null)
ramDir = new RAMDirectory();
writer =
new IndexWriter(ramDir, new StandardAnalyzer(Version.LUCENE_29),
IndexWriter.MaxFieldLength.LIMITED);
writer.setWriteLockTimeout(WRITER_LOCK_TIMEDOUT);
}
else
{
FSDirectory fsDir = FSDirectory.open(new File(indexPath));
writer =
new IndexWriter(fsDir, new StandardAnalyzer(Version.LUCENE_29), IndexWriter.MaxFieldLength.LIMITED);
writer.setWriteLockTimeout(WRITER_LOCK_TIMEDOUT);
}
}
catch (CorruptIndexException ce)
{
logger.error("CorruptIndexException thrown when retrieving writer." + ce.getMessage());
throw new LuceneAlertException(ce);
}
catch (IOException ioe)
{
logger.error(" IOException thrown when retrieving writer." + ioe.getMessage());
throw new LuceneAlertException(ioe);
}
}
}
return writer;
}
Select Operation
public synchronized LuceneAlertSearchResult search(Query query, boolean updateRecord, IndexWriter writer,
int index, int batch)
throws IOException, LuceneAlertException
{
if (logger.isDebugEnabled())
logger.debug("Entering LuceneAlertSearchResult(), query=" + query + ", updateRecord=" + updateRecord
+ ", index=" + index + ", batch=" + batch);
LuceneAlertSearchResult result = new LuceneAlertSearchResult();
IndexSearcher searcher = retrieveIndexSearcher();
if (logger.isDebugEnabled())
logger.debug("Hashcode for this=" + this + " ,for indexSearcher=" + searcher);
TopDocs resultDocs = searcher.search(query, searcher.maxDoc());
// for clearAlert
if (updateRecord)
{
updateAlertDoc(writer, resultDocs);
setIndexUpdated(true);
if (logger.isDebugEnabled())
logger.debug("setIndexUpdated to true");
}
// for alert search
else
{
List<Document> docList = new ArrayList<Document>();
ScoreDoc[] resultScoreDocs = resultDocs.scoreDocs;
int totalHits = resultDocs.totalHits;
int end = index + batch;
int start = (index > 0) ? index : 0;
end = (totalHits > end) ? end : totalHits;
if (logger.isDebugEnabled())
logger.debug("Total hits=" + totalHits + ", start=" + start + ", end=" + end);
for (int i = start; i < end; i++)
docList.add(searcher.doc(resultScoreDocs[i].doc));
result.setDocs(docList);
}
if (logger.isDebugEnabled())
logger.debug("LuceneAlertSearchResult() DONE");
return result;
}
Insert Operation
@Override
public void addDocument(AlertDTO alertDTO) throws LuceneAlertException
{
Document document = convertToDocument(alertDTO);
if (logger.isDebugEnabled())
logger.debug("Document object ready to be added into index: " + document);
IndexWriter writer = luceneAlertHelper.retrieveIndexWriter();
if (writer == null)
{
logger.error("Failed to obtain index writer when trying to add doc to index");
throw new LuceneAlertException("Failed to obtain index writer");
}
try
{
writer.addDocument(document);
luceneAlertHelper.commit();
luceneAlertHelper.setIndexUpdated(true);
}
catch (CorruptIndexException e)
{
throw new LuceneAlertException(e);
}
catch (IOException e)
{
throw new LuceneAlertException(e);
}
if (logger.isInfoEnabled())
logger.info("Document added for alert: " + alertDTO.getId());
}
Update Operation
public static void main(String[] args)
{
//Input folder
String docsPath = "inputFiles";
//Output folder
String indexPath = "C:/index/indexedFiles";
//Input Path Variable
final Path docDir = Paths.get(docsPath);
try
{
//org.apache.lucene.store.Directory instance
Directory dir = FSDirectory.open( Paths.get(indexPath) );
//analyzer with the default stop words
Analyzer analyzer = new StandardAnalyzer();
//IndexWriter Configuration
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
//IndexWriter writes new index files to the directory
IndexWriter writer = new IndexWriter(dir, iwc);
//Its recursive method to iterate all files and directories
indexDocs(writer, docDir);
writer.close();
}
catch (IOException e)
{
e.printStackTrace();
}
}
static void indexDocs(final IndexWriter writer, Path path) throws IOException
{
//Directory?
if (Files.isDirectory(path))
{
//Iterate directory
Files.walkFileTree(path, new SimpleFileVisitor<Path>()
{
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException
{
try
{
//Index this file
indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
}
catch (IOException ioe)
{
ioe.printStackTrace();
}
return FileVisitResult.CONTINUE;
}
});
}
else
{
//Index this file
indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());
}
}
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException
{
try (InputStream stream = Files.newInputStream(file))
{
//Create lucene Document
Document doc = new Document();
doc.add(new StringField("path", file.toString(), Field.Store.YES));
doc.add(new LongPoint("modified", lastModified));
doc.add(new TextField("contents", new String(Files.readAllBytes(file)), Store.YES));
//Updates a document by first deleting the document(s)
//containing <code>term</code> and then adding the new
//document. The delete and then add are atomic as seen
//by a reader on the same index
writer.updateDocument(new Term("path", file.toString()), doc);
}
}
}
Delete Operation
@Override
public void deleteDocument(AlertDTO alertDTO) throws LuceneAlertException
{
if (logger.isInfoEnabled())
logger.info("delete document for alert:" + alertDTO.getId());
IndexWriter writer = luceneAlertHelper.retrieveIndexWriter();
if (writer == null)
{
logger.error("Failed to obtain the index writer." + " Adding document for alert: " + alertDTO.getId()
+ " failed.");
throw new LuceneAlertException("Failed to obtain index writer");
}
if ((new Long(alertDTO.getId()) == null))
{
logger.error("Failed to delete alert document for alert: " + alertDTO.getId());
throw new LuceneAlertException("Invalid alert ID.");
}
try
{
writer.deleteDocuments(new Term(LuceneAlertFields.ID, (new Long(alertDTO.getId()).toString())));
luceneAlertHelper.commit();
luceneAlertHelper.setIndexUpdated(true);
}
catch (CorruptIndexException e)
{
logger.error("A CorrupIndexException is thrown when Document is deleted from Lucene Interface for " +
alertDTO.getBan() + ". " + e.getMessage());
throw new LuceneAlertException(e);
}
catch (IOException e)
{
logger.error("A IOException is thrown when Document is deleted from Lucene Interface for " +
alertDTO.getBan() + ". " + e.getMessage());
throw new LuceneAlertException(e);
}
if (logger.isInfoEnabled())
logger.info(alertDTO.getId() + " deleted.");
}
Known Limitations
- If the index location or path is not found in the source application, it will result in an Unknown index object