TestSegmentTermEnum


1   package org.apache.lucene.index;
2   
3   import java.io.IOException  ;
4   
5   import junit.framework.TestCase;
6   
7   import org.apache.lucene.analysis.WhitespaceAnalyzer;
8   import org.apache.lucene.document.Document;
9   import org.apache.lucene.document.Field;
10  import org.apache.lucene.index.IndexReader;
11  import org.apache.lucene.index.IndexWriter;
12  import org.apache.lucene.index.Term;
13  import org.apache.lucene.index.TermEnum;
14  import org.apache.lucene.store.Directory;
15  import org.apache.lucene.store.RAMDirectory;
16  
17  /**
18   * @author goller
19   */
20  public class TestSegmentTermEnum extends TestCase
21  {
22    Directory dir = new RAMDirectory();
23  
24    public void testTermEnum()
25    {
26      IndexWriter writer = null;
27  
28      try {
29        writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
30  
31        // add 100 documents with term : aaa
32        // add 100 documents with terms: aaa bbb
33        // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100
34        for (int i = 0; i < 100; i++) {
35          addDoc(writer, "aaa");
36          addDoc(writer, "aaa bbb");
37        }
38  
39        writer.close();
40      }
41      catch (IOException   e) {
42        e.printStackTrace();
43      }
44  
45      try {
46        // verify document frequency of terms in an unoptimized index
47        verifyDocFreq();
48  
49        // merge segments by optimizing the index
50        writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
51        writer.optimize();
52        writer.close();
53  
54        // verify document frequency of terms in an optimized index
55        verifyDocFreq();
56      }
57      catch (IOException   e2) {
58        e2.printStackTrace();
59      }
60    }
61  
62    private void verifyDocFreq()
63        throws IOException  
64    {
65        IndexReader reader = IndexReader.open(dir);
66        TermEnum termEnum = null;
67  
68      // create enumeration of all terms
69      termEnum = reader.terms();
70      // go to the first term (aaa)
71      termEnum.next();
72      // assert that term is 'aaa'
73      assertEquals("aaa", termEnum.term().text());
74      assertEquals(200, termEnum.docFreq());
75      // go to the second term (bbb)
76      termEnum.next();
77      // assert that term is 'bbb'
78      assertEquals("bbb", termEnum.term().text());
79      assertEquals(100, termEnum.docFreq());
80  
81      termEnum.close();
82  
83  
84      // create enumeration of terms after term 'aaa', including 'aaa'
85      termEnum = reader.terms(new Term("content", "aaa"));
86      // assert that term is 'aaa'
87      assertEquals("aaa", termEnum.term().text());
88      assertEquals(200, termEnum.docFreq());
89      // go to term 'bbb'
90      termEnum.next();
91      // assert that term is 'bbb'
92      assertEquals("bbb", termEnum.term().text());
93      assertEquals(100, termEnum.docFreq());
94  
95      termEnum.close();
96    }
97  
98    private void addDoc(IndexWriter writer, String   value)
99    {
100     Document doc = new Document();
101     doc.add(Field.UnStored("content", value));
102 
103     try {
104       writer.addDocument(doc);
105     }
106     catch (IOException   e) {
107       e.printStackTrace();
108     }
109   }
110 }
111
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags