Support of Apache Lucene for Java

Supported Libraries

Library Version Supported
Apache Lucene up to: 9.6.0

Supported Operations

Operation Methods Supported
Insert
  • org.apache.lucene.index.IndexWriter.addDocument
  • org.apache.lucene.index.IndexWriter.addDocuments
  • org.apache.lucene.index.IndexWriter.addIndexes
  • org.apache.lucene.search.IndexSearcher.setQueryCache
  • org.apache.lucene.index.IndexWriter.commit
Select
org.apache.lucene.index.IndexReader
  • org.apache.lucene.index.IndexReader.numDocs
  • org.apache.lucene.index.IndexReader.maxDoc
  • org.apache.lucene.index.IndexReader.document
  • org.apache.lucene.index.IndexReader.docFreq
  • org.apache.lucene.index.IndexReader.getRefCount
  • org.apache.lucene.index.IndexReader.numDeletedDocs
  • org.apache.lucene.index.IndexReader.hasDeletions
  • org.apache.lucene.index.IndexReader.getSumDocFreq
  • org.apache.lucene.index.IndexReader.getDocCount
  • org.apache.lucene.index.IndexReader.getSumTotalTermFreq
org.apache.lucene.search.IndexSearcher
  • org.apache.lucene.search.IndexSearcher.search
  • org.apache.lucene.search.IndexSearcher.explain
  • org.apache.lucene.search.IndexSearcher.count
  • org.apache.lucene.search.IndexSearcher.totalTermFreq
  • org.apache.lucene.search.IndexSearcher.getQueryCache
  • org.apache.lucene.search.IndexSearcher.getIndexReader
  • org.apache.lucene.search.IndexSearcher.doc
  • org.apache.lucene.search.IndexSearcher.storedFields
  • org.apache.lucene.search.IndexSearcher.getSlices
  • org.apache.lucene.search.IndexSearcher.searchAfter
Delete
  • org.apache.lucene.index.IndexWriter.deleteDocuments
  • org.apache.lucene.index.IndexWriter.deleteAll
Update
  • org.apache.lucene.index.IndexWriter.updateDocument

Objects

Icon Description
  • Java ApacheLucene Index
  • Java Unknown ApacheLucene Index

    All links are created between the caller Java method object and the ApacheLucene Index object:

    Link type Methods Supported
    useSelectLink
  • numDocs
  • maxDoc
  • document
  • docFreq
  • getRefCount
  • numDeletedDocs
  • hasDeletions
  • getSumDocFreq
  • getDocCount
  • getSumTotalTermFreq
  • search
  • explain
  • count
  • totalTermFreq
  • getQueryCache
  • getIndexReader
  • doc
  • storedFields
  • getSlices
  • searchAfter
  • useInsertLink
  • addDocument
  • addIndexes
  • setQueryCache
  • commit
  • useDeleteLink
  • deleteDocuments
  • deleteAll
  • useUpdateLink
  • updateDocument
  • What results can you expect?

    Some example scenarios are shown below:

    ApacheLucene Index

    public class LuceneReadIndexExample 
    {
        private static final String INDEX_DIR = "c:/temp/lucene6index";
    
        public static void main(String[] args) throws Exception 
        {
            IndexSearcher searcher = createSearcher();
            
            //Search by ID
            TopDocs foundDocs = searchById(1, searcher);
            
            System.out.println("Toral Results :: " + foundDocs.totalHits);
            
            for (ScoreDoc sd : foundDocs.scoreDocs) 
            {
                Document d = searcher.doc(sd.doc);
                System.out.println(String.format(d.get("firstName")));
            }
            
            //Search by firstName
            TopDocs foundDocs2 = searchByFirstName("Brian", searcher);
            
            System.out.println("Toral Results :: " + foundDocs2.totalHits);
            
            for (ScoreDoc sd : foundDocs2.scoreDocs) 
            {
                Document d = searcher.doc(sd.doc);
                System.out.println(String.format(d.get("id")));
            }
        }
    private static IndexSearcher createSearcher() throws IOException {
            Directory dir = FSDirectory.open(Paths.get(INDEX_DIR));
            IndexReader reader = DirectoryReader.open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);
            return searcher;
        }
        
    

    public synchronized IndexWriter retrieveIndexWriter() throws LuceneAlertException
       {
    
          if (writer == null)
          {
    
             if (indexPath == null)
             {
                logger.fatal("No Lucene Index Path has been defined.");
                throw new LuceneAlertException(
                   "No Lucene Index Path has been defined.Please define that in the configuration file.");
             }
             // using memory index
             if (indexPath != null)
             {
    
                logger.debug("inside of indexPath !=null");
                try
                {
                   if (indexPath.equalsIgnoreCase("RAM"))
                   {
                      if (ramDir == null)
                         ramDir = new RAMDirectory();
                      writer =
                         new IndexWriter(ramDir, new StandardAnalyzer(Version.LUCENE_29),
                            IndexWriter.MaxFieldLength.LIMITED);
                      writer.setWriteLockTimeout(WRITER_LOCK_TIMEDOUT);
                   }
                   else
                   {
                      FSDirectory fsDir = FSDirectory.open(new File(indexPath));
                      writer =
                         new IndexWriter(fsDir, new StandardAnalyzer(Version.LUCENE_29), IndexWriter.MaxFieldLength.LIMITED);
                      writer.setWriteLockTimeout(WRITER_LOCK_TIMEDOUT);
                   }
                }
                catch (CorruptIndexException ce)
                {
                   logger.error("CorruptIndexException  thrown when retrieving writer." + ce.getMessage());
                   throw new LuceneAlertException(ce);
                }
                catch (IOException ioe)
                {
                   logger.error(" IOException  thrown when retrieving writer." + ioe.getMessage());
                   throw new LuceneAlertException(ioe);
                }
             }
          }
    
          return writer;
    
       }
    

    Select Operation

     public synchronized LuceneAlertSearchResult search(Query query, boolean updateRecord, IndexWriter writer,
          int index, int batch)
          throws IOException, LuceneAlertException
       {
    
          if (logger.isDebugEnabled())
             logger.debug("Entering LuceneAlertSearchResult(), query=" + query + ", updateRecord=" + updateRecord
                + ", index=" + index + ", batch=" + batch);
    
          LuceneAlertSearchResult result = new LuceneAlertSearchResult();
    
          IndexSearcher searcher = retrieveIndexSearcher();
    
          if (logger.isDebugEnabled())
             logger.debug("Hashcode for this=" + this + " ,for indexSearcher=" + searcher);
    
          TopDocs resultDocs = searcher.search(query, searcher.maxDoc());
    
          // for clearAlert
          if (updateRecord)
          {
             updateAlertDoc(writer, resultDocs);
             setIndexUpdated(true);
    
             if (logger.isDebugEnabled())
                logger.debug("setIndexUpdated to true");
    
          }
          // for alert search
          else
          {
    
             List<Document> docList = new ArrayList<Document>();
    
             ScoreDoc[] resultScoreDocs = resultDocs.scoreDocs;
    
             int totalHits = resultDocs.totalHits;
             int end = index + batch;
             int start = (index > 0) ? index : 0;
             end = (totalHits > end) ? end : totalHits;
    
             if (logger.isDebugEnabled())
                logger.debug("Total hits=" + totalHits + ", start=" + start + ", end=" + end);
    
             for (int i = start; i < end; i++)
                docList.add(searcher.doc(resultScoreDocs[i].doc));
    
             result.setDocs(docList);
          }
    
          if (logger.isDebugEnabled())
             logger.debug("LuceneAlertSearchResult() DONE");
    
          return result;
    
       }
    

    Insert Operation

        
       @Override
       public void addDocument(AlertDTO alertDTO) throws LuceneAlertException
       {
          Document document = convertToDocument(alertDTO);
    
          if (logger.isDebugEnabled())
             logger.debug("Document object ready to be added into index: " + document);
    
          IndexWriter writer = luceneAlertHelper.retrieveIndexWriter();
    
          if (writer == null)
          {
             logger.error("Failed to obtain index writer when trying to add doc to index");
             throw new LuceneAlertException("Failed to obtain index writer");
          }
          try
          {
             writer.addDocument(document);
             luceneAlertHelper.commit();
             luceneAlertHelper.setIndexUpdated(true);
    
          }
          catch (CorruptIndexException e)
          {
             throw new LuceneAlertException(e);
          }
          catch (IOException e)
          {
             throw new LuceneAlertException(e);
          }
    
          if (logger.isInfoEnabled())
             logger.info("Document added for alert: " + alertDTO.getId());
    
       }
    

    Update Operation

        public static void main(String[] args)
        {
            //Input folder
            String docsPath = "inputFiles";
             
            //Output folder
            String indexPath = "C:/index/indexedFiles";
     
            //Input Path Variable
            final Path docDir = Paths.get(docsPath);
     
            try
            {
                //org.apache.lucene.store.Directory instance
                Directory dir = FSDirectory.open( Paths.get(indexPath) );
                 
                //analyzer with the default stop words
                Analyzer analyzer = new StandardAnalyzer();
                 
                //IndexWriter Configuration
                IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
                iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
                 
                //IndexWriter writes new index files to the directory
                IndexWriter writer = new IndexWriter(dir, iwc);
                 
                //Its recursive method to iterate all files and directories
                indexDocs(writer, docDir);
     
                writer.close();
            } 
            catch (IOException e) 
            {
                e.printStackTrace();
            }
        }
         
        static void indexDocs(final IndexWriter writer, Path path) throws IOException 
        {
            //Directory?
            if (Files.isDirectory(path)) 
            {
                //Iterate directory
                Files.walkFileTree(path, new SimpleFileVisitor<Path>() 
                {
                    @Override
                    public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException 
                    {
                        try
                        {
                            //Index this file
                            indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
                        } 
                        catch (IOException ioe) 
                        {
                            ioe.printStackTrace();
                        }
                        return FileVisitResult.CONTINUE;
                    }
                });
            } 
            else
            {
                //Index this file
                indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());
            }
        }
     
        static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException 
        {
            try (InputStream stream = Files.newInputStream(file)) 
            {
                //Create lucene Document
                Document doc = new Document();
                 
                doc.add(new StringField("path", file.toString(), Field.Store.YES));
                doc.add(new LongPoint("modified", lastModified));
                doc.add(new TextField("contents", new String(Files.readAllBytes(file)), Store.YES));
                 
                //Updates a document by first deleting the document(s) 
                //containing <code>term</code> and then adding the new
                //document.  The delete and then add are atomic as seen
                //by a reader on the same index
                writer.updateDocument(new Term("path", file.toString()), doc);
            }
        }
    }
    

    Delete Operation

        @Override
       public void deleteDocument(AlertDTO alertDTO) throws LuceneAlertException
       {
    
          if (logger.isInfoEnabled())
             logger.info("delete document for alert:" + alertDTO.getId());
    
          IndexWriter writer = luceneAlertHelper.retrieveIndexWriter();
    
          if (writer == null)
          {
             logger.error("Failed to obtain the index writer." + " Adding document for  alert: " + alertDTO.getId()
                + " failed.");
             throw new LuceneAlertException("Failed to obtain index writer");
          }
    
          if ((new Long(alertDTO.getId()) == null))
          {
             logger.error("Failed to delete alert document for  alert: " + alertDTO.getId());
             throw new LuceneAlertException("Invalid alert ID.");
    
          }
    
    
          try
          {
             writer.deleteDocuments(new Term(LuceneAlertFields.ID, (new Long(alertDTO.getId()).toString())));
             luceneAlertHelper.commit();
             luceneAlertHelper.setIndexUpdated(true);
          }
          catch (CorruptIndexException e)
          {
             logger.error("A CorrupIndexException is thrown when Document is deleted from  Lucene Interface for " +
                alertDTO.getBan() + ". " + e.getMessage());
             throw new LuceneAlertException(e);
          }
          catch (IOException e)
          {
             logger.error("A IOException is thrown when Document is deleted from  Lucene Interface for " +
                alertDTO.getBan() + ". " + e.getMessage());
             throw new LuceneAlertException(e);
          }
    
    
          if (logger.isInfoEnabled())
             logger.info(alertDTO.getId() + " deleted.");
    
    
       }
    

    Known Limitations

    • If the index location or path is not found in the source application, it will result in an Unknown index object