Thursday, April 11, 2013

Simple Auto Suggester using lucene 4.1 using AnalyzingSuggester

Autosuggestion and Autocomplete system is a must for any software that has search engine in it. Google has set a new bar with its excellent autosuggestion and autocomplete system. So, it would be great if we could build autosuggester system by ourselves with little coding and understanding of what goes behind the autosuggester system. This is where lucene comes in. Here, I will show you a simple autosuggester program that will first build the data set to be used for autosuggestion and will lookup that data set for suggesting. The data set is a simple list of word and its count. The three classes shown below demonstrates a simple autosuggester.

LuceneAutoSuggesterTest.java

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
package com.lucene.autosuggester;

import java.io.IOException;
import java.util.List;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
import org.apache.lucene.util.Version;
import org.junit.Test;

public class LuceneAutoSuggesterTest {

 @Test
 public void luceneAutoSuggester() throws IOException {
  WordFreq wordFreqs[] = new WordFreq[] { new WordFreq("ball", 50),
    new WordFreq("bar", 10), new WordFreq("ba", 12),
    new WordFreq("ballon", 6) };

  AnalyzingSuggester suggester = new AnalyzingSuggester(
    new StandardAnalyzer(Version.LUCENE_41));

  suggester.build(new WordFreqArrayIterator(wordFreqs));

  List<LookupResult> results = suggester.lookup("ba", false, 100);

  System.out.println("Suggested words for input \"ba\"");
  for (LookupResult lookupResult : results) {
   System.out.println(lookupResult.key + ":" + lookupResult.value);
  }
 }
}


WordFreq.java

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
package com.lucene.autosuggester;

import org.apache.lucene.util.BytesRef;

public class WordFreq {
 public long count;
 public BytesRef term;

 public WordFreq(BytesRef term, long count) {
  this.term = term;
  this.count = count;
 }

 public WordFreq(String term, long count) {
  this(new BytesRef(term), count);
 }
}


WordFreqArrayIterator.java

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
package com.lucene.autosuggester;

import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;

import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.BytesRef;

public final class WordFreqArrayIterator implements TermFreqIterator {
 private WordFreq current;
 private final Iterator<WordFreq> wordFreqIterator;

 public WordFreqArrayIterator(Iterator<WordFreq> wordFreqIterator) {
  this.wordFreqIterator = wordFreqIterator;
 }

 public WordFreqArrayIterator(WordFreq[] list) {
  this(Arrays.asList(list).iterator());
 }

 @Override
 public Comparator<BytesRef> getComparator() {
  return null;
 }

 @Override
 public BytesRef next() {
  if (wordFreqIterator.hasNext()) {
   current = wordFreqIterator.next();
   return current.term;
  }
  return null;
 }

 @Override
 public long weight() {
  return current.count;
 }
}

For more reference on AnalyzingSuggesterBytesRef and TermFreqIterator, see its documentation.

For more info on AnalyzingSuggester visit:
http://blog.mikemccandless.com/2012/09/lucenes-new-analyzing-suggester.html

No comments:

Post a Comment