Wednesday, April 10, 2013

Real time search in lucene aka NRTManager.

Here, I am going to post a simple program that demonstrates real time searching using NRTManager. For more info on lucene's real time search visit: http://blog.mikemccandless.com/2011/11/near-real-time-readers-with-lucenes.html
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package com.immunesecurity.NRTManager;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.StringField;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.NRTManager.TrackingIndexWriter;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.store.Directory;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.NRTManager;
import org.apache.lucene.search.NRTManagerReopenThread;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class NRTManagerTest {

 public static void main(String[] args) throws Exception {
  NRTManagerTest nrtManagerTest = new NRTManagerTest();
  nrtManagerTest.init();
  nrtManagerTest.index();
  Thread.sleep(5000);
  nrtManagerTest.search();
 }

 private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
 private Directory dir = new RAMDirectory();
 private long latestGen;
 private Logger logger = LoggerFactory.getLogger(getClass());
 private NRTManager nrtManager;
 private NRTManagerReopenThread reopenThread;
 private TrackingIndexWriter tiw;
 private Version version = Version.LUCENE_40;
 private IndexWriter writer;

 private void index() {
  Document newDoc = new Document();
  Field idField = new Field("id", "6", StringField.TYPE_STORED);
  newDoc.add(idField);
  try {
   tiw.addDocument(newDoc, analyzer);
  } catch (IOException ex) {
   logger.error(ex.getMessage());
  }
 }

 private void init() throws Exception {
  IndexWriterConfig cfg = new IndexWriterConfig(version, analyzer);
  cfg.setRAMBufferSizeMB(128);
  LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
  mp.setUseCompoundFile(false);
  cfg.setMergePolicy(mp);
  cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

  writer = new IndexWriter(dir, cfg);

  tiw = new TrackingIndexWriter(writer);
  nrtManager = new NRTManager(tiw, new SearcherFactory());
  int priority = Math.min(Thread.currentThread().getPriority() + 2,
    Thread.MAX_PRIORITY);
  reopenThread = new NRTManagerReopenThread(nrtManager, 2, 0.03);
  reopenThread.setName("NRT Reopen Thread");
  reopenThread.setPriority(priority);
  reopenThread.setDaemon(true);
  reopenThread.start();
 }

 private void search() throws Exception {
  nrtManager.waitForGeneration(latestGen);
  IndexSearcher searcher = nrtManager.acquire();
  try {
   TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 100);
   logger.info(String.format("no of results found=%d",
     topDocs.scoreDocs.length));
  } finally {
   nrtManager.release(searcher);
  }
 }
}

2 comments:

  1. Punit, could you explain the code as well,Effects of using NRT was not explained.

    Regards,
    Ronald

    ReplyDelete
  2. Hi Ronald,
    Are you familiar with with earlier version of lucene in which we need to call index.commit() to get the latest document added to the lucene index. But with NRT manager we do need to call index.commit(), NRT automatically refreshes periodically and we get latest documents without having to call index.commit().

    ReplyDelete