KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > search > TestTermVectors


1 package org.apache.lucene.search;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import junit.framework.TestCase;
20 import org.apache.lucene.analysis.SimpleAnalyzer;
21 import org.apache.lucene.document.Document;
22 import org.apache.lucene.document.Field;
23 import org.apache.lucene.index.*;
24 import org.apache.lucene.store.Directory;
25 import org.apache.lucene.store.RAMDirectory;
26 import org.apache.lucene.util.English;
27
28 import java.io.IOException JavaDoc;
29 import java.util.HashMap JavaDoc;
30 import java.util.Map JavaDoc;
31
32 public class TestTermVectors extends TestCase {
33   private IndexSearcher searcher;
34   private RAMDirectory directory = new RAMDirectory();
35   public TestTermVectors(String JavaDoc s) {
36     super(s);
37   }
38
39   public void setUp() throws Exception JavaDoc {
40     IndexWriter writer
41             = new IndexWriter(directory, new SimpleAnalyzer(), true);
42     //writer.setUseCompoundFile(true);
43
//writer.infoStream = System.out;
44
StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
45     for (int i = 0; i < 1000; i++) {
46       Document doc = new Document();
47       doc.add(Field.Text("field", English.intToEnglish(i), true));
48       writer.addDocument(doc);
49     }
50     writer.close();
51     searcher = new IndexSearcher(directory);
52   }
53
54   protected void tearDown() {
55
56   }
57
58   public void test() {
59     assertTrue(searcher != null);
60   }
61
62   public void testTermVectors() {
63     Query query = new TermQuery(new Term("field", "seventy"));
64     try {
65       Hits hits = searcher.search(query);
66       assertEquals(100, hits.length());
67       
68       for (int i = 0; i < hits.length(); i++)
69       {
70         TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
71         assertTrue(vector != null);
72         assertTrue(vector.length == 1);
73         //assertTrue();
74
}
75       TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(50));
76       //System.out.println("Explain: " + searcher.explain(query, hits.id(50)));
77
//System.out.println("Vector: " + vector[0].toString());
78
} catch (IOException JavaDoc e) {
79       assertTrue(false);
80     }
81   }
82   
83   public void testTermPositionVectors() {
84     Query query = new TermQuery(new Term("field", "fifty"));
85     try {
86       Hits hits = searcher.search(query);
87       assertEquals(100, hits.length());
88       
89       for (int i = 0; i < hits.length(); i++)
90       {
91         TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
92         assertTrue(vector != null);
93         assertTrue(vector.length == 1);
94         //assertTrue();
95
}
96     } catch (IOException JavaDoc e) {
97       assertTrue(false);
98     }
99   }
100   
101   public void testKnownSetOfDocuments() {
102     String JavaDoc [] termArray = {"eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored",
103                       "with", "an"};
104     String JavaDoc test1 = "eating chocolate in a computer lab"; //6 terms
105
String JavaDoc test2 = "computer in a computer lab"; //5 terms
106
String JavaDoc test3 = "a chocolate lab grows old"; //5 terms
107
String JavaDoc test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
108
Map JavaDoc test4Map = new HashMap JavaDoc();
109     test4Map.put("chocolate", new Integer JavaDoc(3));
110     test4Map.put("lab", new Integer JavaDoc(2));
111     test4Map.put("eating", new Integer JavaDoc(1));
112     test4Map.put("computer", new Integer JavaDoc(1));
113     test4Map.put("with", new Integer JavaDoc(1));
114     test4Map.put("a", new Integer JavaDoc(1));
115     test4Map.put("colored", new Integer JavaDoc(1));
116     test4Map.put("in", new Integer JavaDoc(1));
117     test4Map.put("an", new Integer JavaDoc(1));
118     test4Map.put("computer", new Integer JavaDoc(1));
119     test4Map.put("old", new Integer JavaDoc(1));
120     
121     Document testDoc1 = new Document();
122     setupDoc(testDoc1, test1);
123     Document testDoc2 = new Document();
124     setupDoc(testDoc2, test2);
125     Document testDoc3 = new Document();
126     setupDoc(testDoc3, test3);
127     Document testDoc4 = new Document();
128     setupDoc(testDoc4, test4);
129         
130     Directory dir = new RAMDirectory();
131     
132     try {
133       IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
134       assertTrue(writer != null);
135       writer.addDocument(testDoc1);
136       writer.addDocument(testDoc2);
137       writer.addDocument(testDoc3);
138       writer.addDocument(testDoc4);
139       writer.close();
140       IndexSearcher knownSearcher = new IndexSearcher(dir);
141       TermEnum termEnum = knownSearcher.reader.terms();
142       TermDocs termDocs = knownSearcher.reader.termDocs();
143       //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);
144

145       Similarity sim = knownSearcher.getSimilarity();
146       while (termEnum.next() == true)
147       {
148         Term term = termEnum.term();
149         //System.out.println("Term: " + term);
150
termDocs.seek(term);
151         while (termDocs.next())
152         {
153           int docId = termDocs.doc();
154           int freq = termDocs.freq();
155           //System.out.println("Doc Id: " + docId + " freq " + freq);
156
TermFreqVector vector = knownSearcher.reader.getTermFreqVector(docId, "field");
157           float tf = sim.tf(freq);
158           float idf = sim.idf(term, knownSearcher);
159           //float qNorm = sim.queryNorm()
160
//This is fine since we don't have stop words
161
float lNorm = sim.lengthNorm("field", vector.getTerms().length);
162           //float coord = sim.coord()
163
//System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
164
assertTrue(vector != null);
165           String JavaDoc[] vTerms = vector.getTerms();
166           int [] freqs = vector.getTermFrequencies();
167           for (int i = 0; i < vTerms.length; i++)
168           {
169             if (term.text().equals(vTerms[i]) == true)
170             {
171               assertTrue(freqs[i] == freq);
172             }
173           }
174           
175         }
176         //System.out.println("--------");
177
}
178       Query query = new TermQuery(new Term("field", "chocolate"));
179       Hits hits = knownSearcher.search(query);
180       //doc 3 should be the first hit b/c it is the shortest match
181
assertTrue(hits.length() == 3);
182       float score = hits.score(0);
183       /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
184       System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
185       System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
186       System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
187       System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString());
188       System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/

189       assertTrue(testDoc3.toString().equals(hits.doc(0).toString()));
190       assertTrue(testDoc4.toString().equals(hits.doc(1).toString()));
191       assertTrue(testDoc1.toString().equals(hits.doc(2).toString()));
192       TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits.id(1), "field");
193       assertTrue(vector != null);
194       //System.out.println("Vector: " + vector);
195
String JavaDoc[] terms = vector.getTerms();
196       int [] freqs = vector.getTermFrequencies();
197       assertTrue(terms != null && terms.length == 10);
198       for (int i = 0; i < terms.length; i++) {
199         String JavaDoc term = terms[i];
200         //System.out.println("Term: " + term);
201
int freq = freqs[i];
202         assertTrue(test4.indexOf(term) != -1);
203         Integer JavaDoc freqInt = (Integer JavaDoc)test4Map.get(term);
204         assertTrue(freqInt != null);
205         assertTrue(freqInt.intValue() == freq);
206       }
207       knownSearcher.close();
208     } catch (IOException JavaDoc e) {
209       e.printStackTrace();
210       assertTrue(false);
211     }
212
213
214   }
215   
216   private void setupDoc(Document doc, String JavaDoc text)
217   {
218     doc.add(Field.Text("field", text, true));
219     //System.out.println("Document: " + doc);
220
}
221   
222   
223 }
224
Popular Tags