KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > queryParser > TestQueryParser


1 package org.apache.lucene.queryParser;
2
3 /**
4  * Copyright 2002-2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import junit.framework.TestCase;
20 import org.apache.lucene.analysis.Analyzer;
21 import org.apache.lucene.analysis.LowerCaseTokenizer;
22 import org.apache.lucene.analysis.SimpleAnalyzer;
23 import org.apache.lucene.analysis.Token;
24 import org.apache.lucene.analysis.TokenFilter;
25 import org.apache.lucene.analysis.TokenStream;
26 import org.apache.lucene.analysis.WhitespaceAnalyzer;
27 import org.apache.lucene.analysis.standard.StandardAnalyzer;
28 import org.apache.lucene.document.DateField;
29 import org.apache.lucene.search.BooleanQuery;
30 import org.apache.lucene.search.FuzzyQuery;
31 import org.apache.lucene.search.PhraseQuery;
32 import org.apache.lucene.search.PrefixQuery;
33 import org.apache.lucene.search.Query;
34 import org.apache.lucene.search.RangeQuery;
35 import org.apache.lucene.search.TermQuery;
36 import org.apache.lucene.search.WildcardQuery;
37 import java.io.IOException JavaDoc;
38 import java.io.Reader JavaDoc;
39 import java.text.DateFormat JavaDoc;
40 import java.util.Calendar JavaDoc;
41
42 /**
43  * Tests QueryParser.
44  */

45 public class TestQueryParser extends TestCase {
46
47   public static Analyzer qpAnalyzer = new QPTestAnalyzer();
48
49   public static class QPTestFilter extends TokenFilter {
50     /**
51      * Filter which discards the token 'stop' and which expands the
52      * token 'phrase' into 'phrase1 phrase2'
53      */

54     public QPTestFilter(TokenStream in) {
55       super(in);
56     }
57
58     boolean inPhrase = false;
59     int savedStart = 0, savedEnd = 0;
60
61     public Token next() throws IOException JavaDoc {
62       if (inPhrase) {
63         inPhrase = false;
64         return new Token("phrase2", savedStart, savedEnd);
65       } else
66         for (Token token = input.next(); token != null; token = input.next()) {
67           if (token.termText().equals("phrase")) {
68             inPhrase = true;
69             savedStart = token.startOffset();
70             savedEnd = token.endOffset();
71             return new Token("phrase1", savedStart, savedEnd);
72           } else if (!token.termText().equals("stop"))
73             return token;
74         }
75       return null;
76     }
77   }
78
79   public static class QPTestAnalyzer extends Analyzer {
80
81     /** Filters LowerCaseTokenizer with StopFilter. */
82     public final TokenStream tokenStream(String JavaDoc fieldName, Reader JavaDoc reader) {
83       return new QPTestFilter(new LowerCaseTokenizer(reader));
84     }
85   }
86
87   public static class QPTestParser extends QueryParser {
88     public QPTestParser(String JavaDoc f, Analyzer a) {
89       super(f, a);
90     }
91
92     protected Query getFuzzyQuery(String JavaDoc field, String JavaDoc termStr, float minSimilarity) throws ParseException {
93       throw new ParseException("Fuzzy queries not allowed");
94     }
95
96     protected Query getWildcardQuery(String JavaDoc field, String JavaDoc termStr) throws ParseException {
97       throw new ParseException("Wildcard queries not allowed");
98     }
99   }
100
101   private int originalMaxClauses;
102
103   public void setUp() {
104     originalMaxClauses = BooleanQuery.getMaxClauseCount();
105   }
106
107   public QueryParser getParser(Analyzer a) throws Exception JavaDoc {
108     if (a == null)
109       a = new SimpleAnalyzer();
110     QueryParser qp = new QueryParser("field", a);
111     qp.setOperator(QueryParser.DEFAULT_OPERATOR_OR);
112     return qp;
113   }
114
115   public Query getQuery(String JavaDoc query, Analyzer a) throws Exception JavaDoc {
116     return getParser(a).parse(query);
117   }
118
119   public void assertQueryEquals(String JavaDoc query, Analyzer a, String JavaDoc result)
120     throws Exception JavaDoc {
121     Query q = getQuery(query, a);
122     String JavaDoc s = q.toString("field");
123     if (!s.equals(result)) {
124       fail("Query /" + query + "/ yielded /" + s
125            + "/, expecting /" + result + "/");
126     }
127   }
128
129   public void assertWildcardQueryEquals(String JavaDoc query, boolean lowercase, String JavaDoc result)
130     throws Exception JavaDoc {
131     QueryParser qp = getParser(null);
132     qp.setLowercaseWildcardTerms(lowercase);
133     Query q = qp.parse(query);
134     String JavaDoc s = q.toString("field");
135     if (!s.equals(result)) {
136       fail("WildcardQuery /" + query + "/ yielded /" + s
137            + "/, expecting /" + result + "/");
138     }
139   }
140
141   public Query getQueryDOA(String JavaDoc query, Analyzer a)
142     throws Exception JavaDoc {
143     if (a == null)
144       a = new SimpleAnalyzer();
145     QueryParser qp = new QueryParser("field", a);
146     qp.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
147     return qp.parse(query);
148   }
149
150   public void assertQueryEqualsDOA(String JavaDoc query, Analyzer a, String JavaDoc result)
151     throws Exception JavaDoc {
152     Query q = getQueryDOA(query, a);
153     String JavaDoc s = q.toString("field");
154     if (!s.equals(result)) {
155       fail("Query /" + query + "/ yielded /" + s
156            + "/, expecting /" + result + "/");
157     }
158   }
159
160   public void testSimple() throws Exception JavaDoc {
161     assertQueryEquals("term term term", null, "term term term");
162     assertQueryEquals("türm term term", null, "türm term term");
163     assertQueryEquals("ümlaut", null, "ümlaut");
164
165     assertQueryEquals("a AND b", null, "+a +b");
166     assertQueryEquals("(a AND b)", null, "+a +b");
167     assertQueryEquals("c OR (a AND b)", null, "c (+a +b)");
168     assertQueryEquals("a AND NOT b", null, "+a -b");
169     assertQueryEquals("a AND -b", null, "+a -b");
170     assertQueryEquals("a AND !b", null, "+a -b");
171     assertQueryEquals("a && b", null, "+a +b");
172     assertQueryEquals("a && ! b", null, "+a -b");
173
174     assertQueryEquals("a OR b", null, "a b");
175     assertQueryEquals("a || b", null, "a b");
176     assertQueryEquals("a OR !b", null, "a -b");
177     assertQueryEquals("a OR ! b", null, "a -b");
178     assertQueryEquals("a OR -b", null, "a -b");
179
180     assertQueryEquals("+term -term term", null, "+term -term term");
181     assertQueryEquals("foo:term AND field:anotherTerm", null,
182                       "+foo:term +anotherterm");
183     assertQueryEquals("term AND \"phrase phrase\"", null,
184                       "+term +\"phrase phrase\"");
185     assertQueryEquals("\"hello there\"", null, "\"hello there\"");
186     assertTrue(getQuery("a AND b", null) instanceof BooleanQuery);
187     assertTrue(getQuery("hello", null) instanceof TermQuery);
188     assertTrue(getQuery("\"hello there\"", null) instanceof PhraseQuery);
189
190     assertQueryEquals("germ term^2.0", null, "germ term^2.0");
191     assertQueryEquals("(term)^2.0", null, "term^2.0");
192     assertQueryEquals("(germ term)^2.0", null, "(germ term)^2.0");
193     assertQueryEquals("term^2.0", null, "term^2.0");
194     assertQueryEquals("term^2", null, "term^2.0");
195     assertQueryEquals("\"germ term\"^2.0", null, "\"germ term\"^2.0");
196     assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0");
197
198     assertQueryEquals("(foo OR bar) AND (baz OR boo)", null,
199                       "+(foo bar) +(baz boo)");
200     assertQueryEquals("((a OR b) AND NOT c) OR d", null,
201                       "(+(a b) -c) d");
202     assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null,
203                       "+(apple \"steve jobs\") -(foo bar baz)");
204     assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null,
205                       "+(title:dog title:cat) -author:\"bob dole\"");
206   }
207
208   public void testPunct() throws Exception JavaDoc {
209     Analyzer a = new WhitespaceAnalyzer();
210     assertQueryEquals("a&b", a, "a&b");
211     assertQueryEquals("a&&b", a, "a&&b");
212     assertQueryEquals(".NET", a, ".NET");
213   }
214
215   public void testSlop() throws Exception JavaDoc {
216     assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
217     assertQueryEquals("\"term germ\"~2 flork", null, "\"term germ\"~2 flork");
218     assertQueryEquals("\"term\"~2", null, "term");
219     assertQueryEquals("\" \"~2 germ", null, "germ");
220     assertQueryEquals("\"term germ\"~2^2", null, "\"term germ\"~2^2.0");
221   }
222
223   public void testNumber() throws Exception JavaDoc {
224 // The numbers go away because SimpleAnalzyer ignores them
225
assertQueryEquals("3", null, "");
226     assertQueryEquals("term 1.0 1 2", null, "term");
227     assertQueryEquals("term term1 term2", null, "term term term");
228
229     Analyzer a = new StandardAnalyzer();
230     assertQueryEquals("3", a, "3");
231     assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2");
232     assertQueryEquals("term term1 term2", a, "term term1 term2");
233   }
234
235   public void testWildcard() throws Exception JavaDoc {
236     assertQueryEquals("term*", null, "term*");
237     assertQueryEquals("term*^2", null, "term*^2.0");
238     assertQueryEquals("term~", null, "term~0.5");
239     assertQueryEquals("term~0.7", null, "term~0.7");
240     assertQueryEquals("term~^2", null, "term^2.0~0.5");
241     assertQueryEquals("term^2~", null, "term^2.0~0.5");
242     assertQueryEquals("term*germ", null, "term*germ");
243     assertQueryEquals("term*germ^3", null, "term*germ^3.0");
244
245     assertTrue(getQuery("term*", null) instanceof PrefixQuery);
246     assertTrue(getQuery("term*^2", null) instanceof PrefixQuery);
247     assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
248     assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
249     FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null);
250     assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
251     assertEquals(0, fq.getPrefixLength());
252     fq = (FuzzyQuery)getQuery("term~", null);
253     assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
254     assertEquals(0, fq.getPrefixLength());
255     try {
256       getQuery("term~1.1", null); // value > 1, throws exception
257
fail();
258     } catch(ParseException pe) {
259       // expected exception
260
}
261     assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
262
263 /* Tests to see that wild card terms are (or are not) properly
264      * lower-cased with propery parser configuration
265      */

266 // First prefix queries:
267
assertWildcardQueryEquals("term*", true, "term*");
268     assertWildcardQueryEquals("Term*", true, "term*");
269     assertWildcardQueryEquals("TERM*", true, "term*");
270     assertWildcardQueryEquals("term*", false, "term*");
271     assertWildcardQueryEquals("Term*", false, "Term*");
272     assertWildcardQueryEquals("TERM*", false, "TERM*");
273 // Then 'full' wildcard queries:
274
assertWildcardQueryEquals("te?m", true, "te?m");
275     assertWildcardQueryEquals("Te?m", true, "te?m");
276     assertWildcardQueryEquals("TE?M", true, "te?m");
277     assertWildcardQueryEquals("Te?m*gerM", true, "te?m*germ");
278     assertWildcardQueryEquals("te?m", false, "te?m");
279     assertWildcardQueryEquals("Te?m", false, "Te?m");
280     assertWildcardQueryEquals("TE?M", false, "TE?M");
281     assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM");
282   }
283
284   public void testQPA() throws Exception JavaDoc {
285     assertQueryEquals("term term term", qpAnalyzer, "term term term");
286     assertQueryEquals("term +stop term", qpAnalyzer, "term term");
287     assertQueryEquals("term -stop term", qpAnalyzer, "term term");
288     assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
289     assertQueryEquals("term phrase term", qpAnalyzer,
290                       "term \"phrase1 phrase2\" term");
291     assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
292                       "+term -\"phrase1 phrase2\" term");
293     assertQueryEquals("stop", qpAnalyzer, "");
294     assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery);
295     assertTrue(getQuery("term +stop", qpAnalyzer) instanceof TermQuery);
296   }
297
298   public void testRange() throws Exception JavaDoc {
299     assertQueryEquals("[ a TO z]", null, "[a TO z]");
300     assertTrue(getQuery("[ a TO z]", null) instanceof RangeQuery);
301     assertQueryEquals("[ a TO z ]", null, "[a TO z]");
302     assertQueryEquals("{ a TO z}", null, "{a TO z}");
303     assertQueryEquals("{ a TO z }", null, "{a TO z}");
304     assertQueryEquals("{ a TO z }^2.0", null, "{a TO z}^2.0");
305     assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar");
306     assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar");
307     assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
308     assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
309   }
310
311   public String JavaDoc getDate(String JavaDoc s) throws Exception JavaDoc {
312     DateFormat JavaDoc df = DateFormat.getDateInstance(DateFormat.SHORT);
313     return DateField.dateToString(df.parse(s));
314   }
315
316   public String JavaDoc getLocalizedDate(int year, int month, int day) {
317     DateFormat JavaDoc df = DateFormat.getDateInstance(DateFormat.SHORT);
318     Calendar JavaDoc calendar = Calendar.getInstance();
319     calendar.set(year, month, day);
320     return df.format(calendar.getTime());
321   }
322
323   public void testDateRange() throws Exception JavaDoc {
324     String JavaDoc startDate = getLocalizedDate(2002, 1, 1);
325     String JavaDoc endDate = getLocalizedDate(2002, 1, 4);
326     assertQueryEquals("[ " + startDate + " TO " + endDate + "]", null,
327                       "[" + getDate(startDate) + " TO " + getDate(endDate) + "]");
328     assertQueryEquals("{ " + startDate + " " + endDate + " }", null,
329                       "{" + getDate(startDate) + " TO " + getDate(endDate) + "}");
330   }
331
332   public void testEscaped() throws Exception JavaDoc {
333     Analyzer a = new WhitespaceAnalyzer();
334     
335     /*assertQueryEquals("\\[brackets", a, "\\[brackets");
336     assertQueryEquals("\\[brackets", null, "brackets");
337     assertQueryEquals("\\\\", a, "\\\\");
338     assertQueryEquals("\\+blah", a, "\\+blah");
339     assertQueryEquals("\\(blah", a, "\\(blah");
340
341     assertQueryEquals("\\-blah", a, "\\-blah");
342     assertQueryEquals("\\!blah", a, "\\!blah");
343     assertQueryEquals("\\{blah", a, "\\{blah");
344     assertQueryEquals("\\}blah", a, "\\}blah");
345     assertQueryEquals("\\:blah", a, "\\:blah");
346     assertQueryEquals("\\^blah", a, "\\^blah");
347     assertQueryEquals("\\[blah", a, "\\[blah");
348     assertQueryEquals("\\]blah", a, "\\]blah");
349     assertQueryEquals("\\\"blah", a, "\\\"blah");
350     assertQueryEquals("\\(blah", a, "\\(blah");
351     assertQueryEquals("\\)blah", a, "\\)blah");
352     assertQueryEquals("\\~blah", a, "\\~blah");
353     assertQueryEquals("\\*blah", a, "\\*blah");
354     assertQueryEquals("\\?blah", a, "\\?blah");
355     //assertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar");
356     //assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
357     //assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/

358
359     assertQueryEquals("a\\-b:c", a, "a-b:c");
360     assertQueryEquals("a\\+b:c", a, "a+b:c");
361     assertQueryEquals("a\\:b:c", a, "a:b:c");
362     assertQueryEquals("a\\\\b:c", a, "a\\b:c");
363
364     assertQueryEquals("a:b\\-c", a, "a:b-c");
365     assertQueryEquals("a:b\\+c", a, "a:b+c");
366     assertQueryEquals("a:b\\:c", a, "a:b:c");
367     assertQueryEquals("a:b\\\\c", a, "a:b\\c");
368
369     assertQueryEquals("a:b\\-c*", a, "a:b-c*");
370     assertQueryEquals("a:b\\+c*", a, "a:b+c*");
371     assertQueryEquals("a:b\\:c*", a, "a:b:c*");
372
373     assertQueryEquals("a:b\\\\c*", a, "a:b\\c*");
374
375     assertQueryEquals("a:b\\-?c", a, "a:b-?c");
376     assertQueryEquals("a:b\\+?c", a, "a:b+?c");
377     assertQueryEquals("a:b\\:?c", a, "a:b:?c");
378
379     assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
380
381     assertQueryEquals("a:b\\-c~", a, "a:b-c~0.5");
382     assertQueryEquals("a:b\\+c~", a, "a:b+c~0.5");
383     assertQueryEquals("a:b\\:c~", a, "a:b:c~0.5");
384     assertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5");
385
386     assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
387     assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");
388     assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
389   }
390
391   public void testTabNewlineCarriageReturn()
392     throws Exception JavaDoc {
393     assertQueryEqualsDOA("+weltbank +worlbank", null,
394       "+weltbank +worlbank");
395
396     assertQueryEqualsDOA("+weltbank\n+worlbank", null,
397       "+weltbank +worlbank");
398     assertQueryEqualsDOA("weltbank \n+worlbank", null,
399       "+weltbank +worlbank");
400     assertQueryEqualsDOA("weltbank \n +worlbank", null,
401       "+weltbank +worlbank");
402
403     assertQueryEqualsDOA("+weltbank\r+worlbank", null,
404       "+weltbank +worlbank");
405     assertQueryEqualsDOA("weltbank \r+worlbank", null,
406       "+weltbank +worlbank");
407     assertQueryEqualsDOA("weltbank \r +worlbank", null,
408       "+weltbank +worlbank");
409
410     assertQueryEqualsDOA("+weltbank\r\n+worlbank", null,
411       "+weltbank +worlbank");
412     assertQueryEqualsDOA("weltbank \r\n+worlbank", null,
413       "+weltbank +worlbank");
414     assertQueryEqualsDOA("weltbank \r\n +worlbank", null,
415       "+weltbank +worlbank");
416     assertQueryEqualsDOA("weltbank \r \n +worlbank", null,
417       "+weltbank +worlbank");
418
419     assertQueryEqualsDOA("+weltbank\t+worlbank", null,
420       "+weltbank +worlbank");
421     assertQueryEqualsDOA("weltbank \t+worlbank", null,
422       "+weltbank +worlbank");
423     assertQueryEqualsDOA("weltbank \t +worlbank", null,
424       "+weltbank +worlbank");
425   }
426
427   public void testSimpleDAO()
428     throws Exception JavaDoc {
429     assertQueryEqualsDOA("term term term", null, "+term +term +term");
430     assertQueryEqualsDOA("term +term term", null, "+term +term +term");
431     assertQueryEqualsDOA("term term +term", null, "+term +term +term");
432     assertQueryEqualsDOA("term +term +term", null, "+term +term +term");
433     assertQueryEqualsDOA("-term term term", null, "-term +term +term");
434   }
435
436   public void testBoost()
437     throws Exception JavaDoc {
438     StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(new String JavaDoc[]{"on"});
439     QueryParser qp = new QueryParser("field", oneStopAnalyzer);
440     Query q = qp.parse("on^1.0");
441     assertNotNull(q);
442     q = qp.parse("\"hello\"^2.0");
443     assertNotNull(q);
444     assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
445     q = qp.parse("hello^2.0");
446     assertNotNull(q);
447     assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
448     q = qp.parse("\"on\"^1.0");
449     assertNotNull(q);
450
451     q = QueryParser.parse("the^3", "field", new StandardAnalyzer());
452     assertNotNull(q);
453   }
454
455   public void testException() throws Exception JavaDoc {
456     try {
457       assertQueryEquals("\"some phrase", null, "abc");
458       fail("ParseException expected, not thrown");
459     } catch (ParseException expected) {
460     }
461   }
462
463   public void testCustomQueryParserWildcard() {
464     try {
465       new QPTestParser("contents", new WhitespaceAnalyzer()).parse("a?t");
466     } catch (ParseException expected) {
467       return;
468     }
469     fail("Wildcard queries should not be allowed");
470   }
471
472   public void testCustomQueryParserFuzzy() throws Exception JavaDoc {
473     try {
474       new QPTestParser("contents", new WhitespaceAnalyzer()).parse("xunit~");
475     } catch (ParseException expected) {
476       return;
477     }
478     fail("Fuzzy queries should not be allowed");
479   }
480
481   public void testBooleanQuery() throws Exception JavaDoc {
482     BooleanQuery.setMaxClauseCount(2);
483     try {
484       QueryParser.parse("one two three", "field", new WhitespaceAnalyzer());
485       fail("ParseException expected due to too many boolean clauses");
486     } catch (ParseException expected) {
487       // too many boolean clauses, so ParseException is expected
488
}
489   }
490
491   public void tearDown() {
492     BooleanQuery.setMaxClauseCount(originalMaxClauses);
493   }
494
495 }
496
Popular Tags