Support of Apache Lucene for Java

Supported Libraries

Library Version Supported
Apache Lucene up to: 9.6.0

(tick)

Supported Operations

Operation Methods Supported
Insert

org.apache.lucene.index.IndexWriter.addDocument

org.apache.lucene.index.IndexWriter.addDocuments

org.apache.lucene.index.IndexWriter.addIndexes

org.apache.lucene.search.IndexSearcher.setQueryCache

org.apache.lucene.index.IndexWriter.commit

Select

org.apache.lucene.index.IndexReader.numDocs

org.apache.lucene.index.IndexReader.maxDoc

org.apache.lucene.index.IndexReader.document

org.apache.lucene.index.IndexReader.docFreq

org.apache.lucene.index.IndexReader.getRefCount

org.apache.lucene.index.IndexReader.numDeletedDocs

org.apache.lucene.index.IndexReader.hasDeletions

org.apache.lucene.index.IndexReader.getSumDocFreq

org.apache.lucene.index.IndexReader.getDocCount

org.apache.lucene.index.IndexReader.getSumTotalTermFreq

org.apache.lucene.search.IndexSearcher.search

org.apache.lucene.search.IndexSearcher.explain

org.apache.lucene.search.IndexSearcher.count

org.apache.lucene.search.IndexSearcher.totalTermFreq

org.apache.lucene.search.IndexSearcher.getQueryCache

org.apache.lucene.search.IndexSearcher.getIndexReader

org.apache.lucene.search.IndexSearcher.doc

org.apache.lucene.search.IndexSearcher.storedFields

org.apache.lucene.search.IndexSearcher.getSlices

org.apache.lucene.search.IndexSearcher.searchAfter

Delete

org.apache.lucene.index.IndexWriter.deleteDocuments

org.apache.lucene.index.IndexWriter.deleteAll

Update

org.apache.lucene.index.IndexWriter.updateDocument

Objects

Icon Description

Java ApacheLucene Index

Java Unknown ApacheLucene Index

All links are created between the caller Java method object and the ApacheLucene Index object:

Link type Methods Supported
useSelectLink

numDocs

maxDoc

document

docFreq

getRefCount

numDeletedDocs

hasDeletions

getSumDocFreq

getDocCount

getSumTotalTermFreq

search

explain

count

totalTermFreq

getQueryCache

getIndexReader

doc

storedFields

getSlices

searchAfter

useInsertLink

addDocument

addIndexes

setQueryCache

commit

useDeleteLink

deleteDocuments

deleteAll

useUpdateLink

updateDocument

What results can you expect?

Some example scenarios are shown below:

ApacheLucene Index

public class LuceneReadIndexExample 
{
    private static final String INDEX_DIR = "c:/temp/lucene6index";

    public static void main(String[] args) throws Exception 
    {
        IndexSearcher searcher = createSearcher();
        
        //Search by ID
        TopDocs foundDocs = searchById(1, searcher);
        
        System.out.println("Toral Results :: " + foundDocs.totalHits);
        
        for (ScoreDoc sd : foundDocs.scoreDocs) 
        {
            Document d = searcher.doc(sd.doc);
            System.out.println(String.format(d.get("firstName")));
        }
        
        //Search by firstName
        TopDocs foundDocs2 = searchByFirstName("Brian", searcher);
        
        System.out.println("Toral Results :: " + foundDocs2.totalHits);
        
        for (ScoreDoc sd : foundDocs2.scoreDocs) 
        {
            Document d = searcher.doc(sd.doc);
            System.out.println(String.format(d.get("id")));
        }
    }
private static IndexSearcher createSearcher() throws IOException {
        Directory dir = FSDirectory.open(Paths.get(INDEX_DIR));
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        return searcher;
    }
    

public synchronized IndexWriter retrieveIndexWriter() throws LuceneAlertException
   {

      if (writer == null)
      {

         if (indexPath == null)
         {
            logger.fatal("No Lucene Index Path has been defined.");
            throw new LuceneAlertException(
               "No Lucene Index Path has been defined.Please define that in the configuration file.");
         }
         // using memory index
         if (indexPath != null)
         {

            logger.debug("inside of indexPath !=null");
            try
            {
               if (indexPath.equalsIgnoreCase("RAM"))
               {
                  if (ramDir == null)
                     ramDir = new RAMDirectory();
                  writer =
                     new IndexWriter(ramDir, new StandardAnalyzer(Version.LUCENE_29),
                        IndexWriter.MaxFieldLength.LIMITED);
                  writer.setWriteLockTimeout(WRITER_LOCK_TIMEDOUT);
               }
               else
               {
                  FSDirectory fsDir = FSDirectory.open(new File(indexPath));
                  writer =
                     new IndexWriter(fsDir, new StandardAnalyzer(Version.LUCENE_29), IndexWriter.MaxFieldLength.LIMITED);
                  writer.setWriteLockTimeout(WRITER_LOCK_TIMEDOUT);
               }
            }
            catch (CorruptIndexException ce)
            {
               logger.error("CorruptIndexException  thrown when retrieving writer." + ce.getMessage());
               throw new LuceneAlertException(ce);
            }
            catch (IOException ioe)
            {
               logger.error(" IOException  thrown when retrieving writer." + ioe.getMessage());
               throw new LuceneAlertException(ioe);
            }
         }
      }

      return writer;

   }

Select Operation

 public synchronized LuceneAlertSearchResult search(Query query, boolean updateRecord, IndexWriter writer,
      int index, int batch)
      throws IOException, LuceneAlertException
   {

      if (logger.isDebugEnabled())
         logger.debug("Entering LuceneAlertSearchResult(), query=" + query + ", updateRecord=" + updateRecord
            + ", index=" + index + ", batch=" + batch);

      LuceneAlertSearchResult result = new LuceneAlertSearchResult();

      IndexSearcher searcher = retrieveIndexSearcher();

      if (logger.isDebugEnabled())
         logger.debug("Hashcode for this=" + this + " ,for indexSearcher=" + searcher);

      TopDocs resultDocs = searcher.search(query, searcher.maxDoc());

      // for clearAlert
      if (updateRecord)
      {
         updateAlertDoc(writer, resultDocs);
         setIndexUpdated(true);

         if (logger.isDebugEnabled())
            logger.debug("setIndexUpdated to true");

      }
      // for alert search
      else
      {

         List<Document> docList = new ArrayList<Document>();

         ScoreDoc[] resultScoreDocs = resultDocs.scoreDocs;

         int totalHits = resultDocs.totalHits;
         int end = index + batch;
         int start = (index > 0) ? index : 0;
         end = (totalHits > end) ? end : totalHits;

         if (logger.isDebugEnabled())
            logger.debug("Total hits=" + totalHits + ", start=" + start + ", end=" + end);

         for (int i = start; i < end; i++)
            docList.add(searcher.doc(resultScoreDocs[i].doc));

         result.setDocs(docList);
      }

      if (logger.isDebugEnabled())
         logger.debug("LuceneAlertSearchResult() DONE");

      return result;

   }

Insert Operation

    
   @Override
   public void addDocument(AlertDTO alertDTO) throws LuceneAlertException
   {
      Document document = convertToDocument(alertDTO);

      if (logger.isDebugEnabled())
         logger.debug("Document object ready to be added into index: " + document);

      IndexWriter writer = luceneAlertHelper.retrieveIndexWriter();

      if (writer == null)
      {
         logger.error("Failed to obtain index writer when trying to add doc to index");
         throw new LuceneAlertException("Failed to obtain index writer");
      }
      try
      {
         writer.addDocument(document);
         luceneAlertHelper.commit();
         luceneAlertHelper.setIndexUpdated(true);

      }
      catch (CorruptIndexException e)
      {
         throw new LuceneAlertException(e);
      }
      catch (IOException e)
      {
         throw new LuceneAlertException(e);
      }

      if (logger.isInfoEnabled())
         logger.info("Document added for alert: " + alertDTO.getId());

   }

Update Operation

    public static void main(String[] args)
    {
        //Input folder
        String docsPath = "inputFiles";
         
        //Output folder
        String indexPath = "C:/index/indexedFiles";
 
        //Input Path Variable
        final Path docDir = Paths.get(docsPath);
 
        try
        {
            //org.apache.lucene.store.Directory instance
            Directory dir = FSDirectory.open( Paths.get(indexPath) );
             
            //analyzer with the default stop words
            Analyzer analyzer = new StandardAnalyzer();
             
            //IndexWriter Configuration
            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
             
            //IndexWriter writes new index files to the directory
            IndexWriter writer = new IndexWriter(dir, iwc);
             
            //Its recursive method to iterate all files and directories
            indexDocs(writer, docDir);
 
            writer.close();
        } 
        catch (IOException e) 
        {
            e.printStackTrace();
        }
    }
     
    static void indexDocs(final IndexWriter writer, Path path) throws IOException 
    {
        //Directory?
        if (Files.isDirectory(path)) 
        {
            //Iterate directory
            Files.walkFileTree(path, new SimpleFileVisitor<Path>() 
            {
                @Override
                public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException 
                {
                    try
                    {
                        //Index this file
                        indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
                    } 
                    catch (IOException ioe) 
                    {
                        ioe.printStackTrace();
                    }
                    return FileVisitResult.CONTINUE;
                }
            });
        } 
        else
        {
            //Index this file
            indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());
        }
    }
 
    static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException 
    {
        try (InputStream stream = Files.newInputStream(file)) 
        {
            //Create lucene Document
            Document doc = new Document();
             
            doc.add(new StringField("path", file.toString(), Field.Store.YES));
            doc.add(new LongPoint("modified", lastModified));
            doc.add(new TextField("contents", new String(Files.readAllBytes(file)), Store.YES));
             
            //Updates a document by first deleting the document(s) 
            //containing <code>term</code> and then adding the new
            //document.  The delete and then add are atomic as seen
            //by a reader on the same index
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

Delete Operation

    @Override
   public void deleteDocument(AlertDTO alertDTO) throws LuceneAlertException
   {

      if (logger.isInfoEnabled())
         logger.info("delete document for alert:" + alertDTO.getId());

      IndexWriter writer = luceneAlertHelper.retrieveIndexWriter();

      if (writer == null)
      {
         logger.error("Failed to obtain the index writer." + " Adding document for  alert: " + alertDTO.getId()
            + " failed.");
         throw new LuceneAlertException("Failed to obtain index writer");
      }

      if ((new Long(alertDTO.getId()) == null))
      {
         logger.error("Failed to delete alert document for  alert: " + alertDTO.getId());
         throw new LuceneAlertException("Invalid alert ID.");

      }


      try
      {
         writer.deleteDocuments(new Term(LuceneAlertFields.ID, (new Long(alertDTO.getId()).toString())));
         luceneAlertHelper.commit();
         luceneAlertHelper.setIndexUpdated(true);
      }
      catch (CorruptIndexException e)
      {
         logger.error("A CorrupIndexException is thrown when Document is deleted from  Lucene Interface for " +
            alertDTO.getBan() + ". " + e.getMessage());
         throw new LuceneAlertException(e);
      }
      catch (IOException e)
      {
         logger.error("A IOException is thrown when Document is deleted from  Lucene Interface for " +
            alertDTO.getBan() + ". " + e.getMessage());
         throw new LuceneAlertException(e);
      }


      if (logger.isInfoEnabled())
         logger.info(alertDTO.getId() + " deleted.");


   }

Known Limitations

  • If the index location or path is not found in the source application, it will result in an Unknown index object