KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > search > TestFuzzyQuery


1 package org.apache.lucene.search;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.IOException JavaDoc;
20
21 import junit.framework.TestCase;
22 import org.apache.lucene.analysis.WhitespaceAnalyzer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.index.IndexWriter;
26 import org.apache.lucene.index.Term;
27 import org.apache.lucene.store.RAMDirectory;
28
29 /**
30  * Tests {@link FuzzyQuery}.
31  *
32  * @author Daniel Naber
33  */

34 public class TestFuzzyQuery extends TestCase {
35
36   public void testDefaultFuzziness() throws Exception JavaDoc {
37     RAMDirectory directory = new RAMDirectory();
38     IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
39     addDoc("aaaaa", writer);
40     addDoc("aaaab", writer);
41     addDoc("aaabb", writer);
42     addDoc("aabbb", writer);
43     addDoc("abbbb", writer);
44     addDoc("bbbbb", writer);
45     addDoc("ddddd", writer);
46     writer.optimize();
47     writer.close();
48     IndexSearcher searcher = new IndexSearcher(directory);
49
50     FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"));
51     Hits hits = searcher.search(query);
52     assertEquals(3, hits.length());
53
54     // not similar enough:
55
query = new FuzzyQuery(new Term("field", "xxxxx"));
56     hits = searcher.search(query);
57     assertEquals(0, hits.length());
58     query = new FuzzyQuery(new Term("field", "aaccc")); // edit distance to "aaaaa" = 3
59
hits = searcher.search(query);
60     assertEquals(0, hits.length());
61
62     // query identical to a word in the index:
63
query = new FuzzyQuery(new Term("field", "aaaaa"));
64     hits = searcher.search(query);
65     assertEquals(3, hits.length());
66     assertEquals(hits.doc(0).get("field"), ("aaaaa"));
67     // default allows for up to two edits:
68
assertEquals(hits.doc(1).get("field"), ("aaaab"));
69     assertEquals(hits.doc(2).get("field"), ("aaabb"));
70
71     // query similar to a word in the index:
72
query = new FuzzyQuery(new Term("field", "aaaac"));
73     hits = searcher.search(query);
74     assertEquals(3, hits.length());
75     assertEquals(hits.doc(0).get("field"), ("aaaaa"));
76     assertEquals(hits.doc(1).get("field"), ("aaaab"));
77     assertEquals(hits.doc(2).get("field"), ("aaabb"));
78
79     query = new FuzzyQuery(new Term("field", "ddddX"));
80     hits = searcher.search(query);
81     assertEquals(1, hits.length());
82     assertEquals(hits.doc(0).get("field"), ("ddddd"));
83
84     // different field = no match:
85
query = new FuzzyQuery(new Term("anotherfield", "ddddX"));
86     hits = searcher.search(query);
87     assertEquals(0, hits.length());
88
89     searcher.close();
90     directory.close();
91   }
92
93   public void testDefaultFuzzinessLong() throws Exception JavaDoc {
94     RAMDirectory directory = new RAMDirectory();
95     IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
96     addDoc("aaaaaaa", writer);
97     addDoc("segment", writer);
98     writer.optimize();
99     writer.close();
100     IndexSearcher searcher = new IndexSearcher(directory);
101
102     FuzzyQuery query;
103     // not similar enough:
104
query = new FuzzyQuery(new Term("field", "xxxxx"));
105     Hits hits = searcher.search(query);
106     assertEquals(0, hits.length());
107     // edit distance to "aaaaaaa" = 3, this matches because the string is longer than
108
// in testDefaultFuzziness so a bigger difference is allowed:
109
query = new FuzzyQuery(new Term("field", "aaaaccc"));
110     hits = searcher.search(query);
111     assertEquals(1, hits.length());
112     assertEquals(hits.doc(0).get("field"), ("aaaaaaa"));
113
114     // no match, more than half of the characters is wrong:
115
query = new FuzzyQuery(new Term("field", "aaacccc"));
116     hits = searcher.search(query);
117     assertEquals(0, hits.length());
118
119     // "student" and "stellent" are indeed similar to "segment" by default:
120
query = new FuzzyQuery(new Term("field", "student"));
121     hits = searcher.search(query);
122     assertEquals(1, hits.length());
123     query = new FuzzyQuery(new Term("field", "stellent"));
124     hits = searcher.search(query);
125     assertEquals(1, hits.length());
126
127     searcher.close();
128     directory.close();
129   }
130   
131   private void addDoc(String JavaDoc text, IndexWriter writer) throws IOException JavaDoc {
132     Document doc = new Document();
133     doc.add(Field.Text("field", text));
134     writer.addDocument(doc);
135   }
136
137 }
138
Popular Tags