1 package org.apache.lucene.search; 2 3 18 19 import junit.framework.TestCase; 20 import org.apache.lucene.analysis.SimpleAnalyzer; 21 import org.apache.lucene.document.Document; 22 import org.apache.lucene.document.Field; 23 import org.apache.lucene.index.*; 24 import org.apache.lucene.store.Directory; 25 import org.apache.lucene.store.RAMDirectory; 26 import org.apache.lucene.util.English; 27 28 import java.io.IOException ; 29 import java.util.HashMap ; 30 import java.util.Map ; 31 32 public class TestTermVectors extends TestCase { 33 private IndexSearcher searcher; 34 private RAMDirectory directory = new RAMDirectory(); 35 public TestTermVectors(String s) { 36 super(s); 37 } 38 39 public void setUp() throws Exception { 40 IndexWriter writer 41 = new IndexWriter(directory, new SimpleAnalyzer(), true); 42 StringBuffer buffer = new StringBuffer (); 45 for (int i = 0; i < 1000; i++) { 46 Document doc = new Document(); 47 doc.add(Field.Text("field", English.intToEnglish(i), true)); 48 writer.addDocument(doc); 49 } 50 writer.close(); 51 searcher = new IndexSearcher(directory); 52 } 53 54 protected void tearDown() { 55 56 } 57 58 public void test() { 59 assertTrue(searcher != null); 60 } 61 62 public void testTermVectors() { 63 Query query = new TermQuery(new Term("field", "seventy")); 64 try { 65 Hits hits = searcher.search(query); 66 assertEquals(100, hits.length()); 67 68 for (int i = 0; i < hits.length(); i++) 69 { 70 TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i)); 71 assertTrue(vector != null); 72 assertTrue(vector.length == 1); 73 } 75 TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(50)); 76 } catch (IOException e) { 79 assertTrue(false); 80 } 81 } 82 83 public void testTermPositionVectors() { 84 Query query = new TermQuery(new Term("field", "fifty")); 85 try { 86 Hits hits = searcher.search(query); 87 assertEquals(100, hits.length()); 88 89 for (int i = 0; i < hits.length(); i++) 90 { 91 TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i)); 92 assertTrue(vector != null); 93 assertTrue(vector.length == 1); 94 } 96 } catch (IOException e) { 97 assertTrue(false); 98 } 99 } 100 101 public void testKnownSetOfDocuments() { 102 String [] termArray = {"eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored", 103 "with", "an"}; 104 String test1 = "eating chocolate in a computer lab"; String test2 = "computer in a computer lab"; String test3 = "a chocolate lab grows old"; String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; Map test4Map = new HashMap (); 109 test4Map.put("chocolate", new Integer (3)); 110 test4Map.put("lab", new Integer (2)); 111 test4Map.put("eating", new Integer (1)); 112 test4Map.put("computer", new Integer (1)); 113 test4Map.put("with", new Integer (1)); 114 test4Map.put("a", new Integer (1)); 115 test4Map.put("colored", new Integer (1)); 116 test4Map.put("in", new Integer (1)); 117 test4Map.put("an", new Integer (1)); 118 test4Map.put("computer", new Integer (1)); 119 test4Map.put("old", new Integer (1)); 120 121 Document testDoc1 = new Document(); 122 setupDoc(testDoc1, test1); 123 Document testDoc2 = new Document(); 124 setupDoc(testDoc2, test2); 125 Document testDoc3 = new Document(); 126 setupDoc(testDoc3, test3); 127 Document testDoc4 = new Document(); 128 setupDoc(testDoc4, test4); 129 130 Directory dir = new RAMDirectory(); 131 132 try { 133 IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true); 134 assertTrue(writer != null); 135 writer.addDocument(testDoc1); 136 writer.addDocument(testDoc2); 137 writer.addDocument(testDoc3); 138 writer.addDocument(testDoc4); 139 writer.close(); 140 IndexSearcher knownSearcher = new IndexSearcher(dir); 141 TermEnum termEnum = knownSearcher.reader.terms(); 142 TermDocs termDocs = knownSearcher.reader.termDocs(); 143 145 Similarity sim = knownSearcher.getSimilarity(); 146 while (termEnum.next() == true) 147 { 148 Term term = termEnum.term(); 149 termDocs.seek(term); 151 while (termDocs.next()) 152 { 153 int docId = termDocs.doc(); 154 int freq = termDocs.freq(); 155 TermFreqVector vector = knownSearcher.reader.getTermFreqVector(docId, "field"); 157 float tf = sim.tf(freq); 158 float idf = sim.idf(term, knownSearcher); 159 float lNorm = sim.lengthNorm("field", vector.getTerms().length); 162 assertTrue(vector != null); 165 String [] vTerms = vector.getTerms(); 166 int [] freqs = vector.getTermFrequencies(); 167 for (int i = 0; i < vTerms.length; i++) 168 { 169 if (term.text().equals(vTerms[i]) == true) 170 { 171 assertTrue(freqs[i] == freq); 172 } 173 } 174 175 } 176 } 178 Query query = new TermQuery(new Term("field", "chocolate")); 179 Hits hits = knownSearcher.search(query); 180 assertTrue(hits.length() == 3); 182 float score = hits.score(0); 183 189 assertTrue(testDoc3.toString().equals(hits.doc(0).toString())); 190 assertTrue(testDoc4.toString().equals(hits.doc(1).toString())); 191 assertTrue(testDoc1.toString().equals(hits.doc(2).toString())); 192 TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits.id(1), "field"); 193 assertTrue(vector != null); 194 String [] terms = vector.getTerms(); 196 int [] freqs = vector.getTermFrequencies(); 197 assertTrue(terms != null && terms.length == 10); 198 for (int i = 0; i < terms.length; i++) { 199 String term = terms[i]; 200 int freq = freqs[i]; 202 assertTrue(test4.indexOf(term) != -1); 203 Integer freqInt = (Integer )test4Map.get(term); 204 assertTrue(freqInt != null); 205 assertTrue(freqInt.intValue() == freq); 206 } 207 knownSearcher.close(); 208 } catch (IOException e) { 209 e.printStackTrace(); 210 assertTrue(false); 211 } 212 213 214 } 215 216 private void setupDoc(Document doc, String text) 217 { 218 doc.add(Field.Text("field", text, true)); 219 } 221 222 223 } 224 | Popular Tags |