KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > search > BooleanQuery


1 package org.apache.lucene.search;
2
3 /**
4  * Copyright 2004-2005 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.IOException JavaDoc;
20 import java.util.Iterator JavaDoc;
21 import java.util.Set JavaDoc;
22 import java.util.Vector JavaDoc;
23
24 import org.apache.lucene.index.IndexReader;
25 import org.apache.lucene.util.ToStringUtils;
26
27 /** A Query that matches documents matching boolean combinations of other
28   * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
29   * BooleanQuerys.
30   */

31 public class BooleanQuery extends Query {
32
33   /**
34    * @deprecated use {@link #setMaxClauseCount(int)} instead
35    */

36   public static int maxClauseCount = 1024;
37
38   /** Thrown when an attempt is made to add more than {@link
39    * #getMaxClauseCount()} clauses. This typically happens if
40    * a PrefixQuery, FuzzyQuery, WildcardQuery, or RangeQuery
41    * is expanded to many terms during search.
42    */

43   public static class TooManyClauses extends RuntimeException JavaDoc {}
44
45   /** Return the maximum number of clauses permitted, 1024 by default.
46    * Attempts to add more than the permitted number of clauses cause {@link
47    * TooManyClauses} to be thrown.
48    * @see #setMaxClauseCount(int)
49    */

50   public static int getMaxClauseCount() { return maxClauseCount; }
51
52   /** Set the maximum number of clauses permitted per BooleanQuery.
53    * Default value is 1024.
54    * <p>TermQuery clauses are generated from for example prefix queries and
55    * fuzzy queries. Each TermQuery needs some buffer space during search,
56    * so this parameter indirectly controls the maximum buffer requirements for
57    * query search.
58    * <p>When this parameter becomes a bottleneck for a Query one can use a
59    * Filter. For example instead of a {@link RangeQuery} one can use a
60    * {@link RangeFilter}.
61    * <p>Normally the buffers are allocated by the JVM. When using for example
62    * {@link org.apache.lucene.store.MMapDirectory} the buffering is left to
63    * the operating system.
64    */

65   public static void setMaxClauseCount(int maxClauseCount) {
66     if (maxClauseCount < 1)
67       throw new IllegalArgumentException JavaDoc("maxClauseCount must be >= 1");
68     BooleanQuery.maxClauseCount = maxClauseCount;
69   }
70
71   private Vector JavaDoc clauses = new Vector JavaDoc();
72   private boolean disableCoord;
73
74   /** Constructs an empty boolean query. */
75   public BooleanQuery() {}
76
77   /** Constructs an empty boolean query.
78    *
79    * {@link Similarity#coord(int,int)} may be disabled in scoring, as
80    * appropriate. For example, this score factor does not make sense for most
81    * automatically generated queries, like {@link WildcardQuery} and {@link
82    * FuzzyQuery}.
83    *
84    * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
85    */

86   public BooleanQuery(boolean disableCoord) {
87     this.disableCoord = disableCoord;
88   }
89
90   /** Returns true iff {@link Similarity#coord(int,int)} is disabled in
91    * scoring for this query instance.
92    * @see #BooleanQuery(boolean)
93    */

94   public boolean isCoordDisabled() { return disableCoord; }
95
96   // Implement coord disabling.
97
// Inherit javadoc.
98
public Similarity getSimilarity(Searcher searcher) {
99     Similarity result = super.getSimilarity(searcher);
100     if (disableCoord) { // disable coord as requested
101
result = new SimilarityDelegator(result) {
102           public float coord(int overlap, int maxOverlap) {
103             return 1.0f;
104           }
105         };
106     }
107     return result;
108   }
109
110   /**
111    * Specifies a minimum number of the optional BooleanClauses
112    * which must be satisifed.
113    *
114    * <p>
115    * By default no optional clauses are neccessary for a match
116    * (unless there are no required clauses). If this method is used,
117    * then the specified numebr of clauses is required.
118    * </p>
119    * <p>
120    * Use of this method is totally independant of specifying that
121    * any specific clauses are required (or prohibited). This number will
122    * only be compared against the number of matching optional clauses.
123    * </p>
124    * <p>
125    * EXPERT NOTE: Using this method will force the use of BooleanWeight2,
126    * regardless of wether setUseScorer14(true) has been called.
127    * </p>
128    *
129    * @param min the number of optional clauses that must match
130    * @see #setUseScorer14
131    */

132   public void setMinimumNumberShouldMatch(int min) {
133     this.minNrShouldMatch = min;
134   }
135   protected int minNrShouldMatch = 0;
136
137   /**
138    * Gets the minimum number of the optional BooleanClauses
139    * which must be satisifed.
140    */

141   public int getMinimumNumberShouldMatch() {
142     return minNrShouldMatch;
143   }
144
145   /** Adds a clause to a boolean query. Clauses may be:
146    * <ul>
147    * <li><code>required</code> which means that documents which <i>do not</i>
148    * match this sub-query will <i>not</i> match the boolean query;
149    * <li><code>prohibited</code> which means that documents which <i>do</i>
150    * match this sub-query will <i>not</i> match the boolean query; or
151    * <li>neither, in which case matched documents are neither prohibited from
152    * nor required to match the sub-query. However, a document must match at
153    * least 1 sub-query to match the boolean query.
154    * </ul>
155    * It is an error to specify a clause as both <code>required</code> and
156    * <code>prohibited</code>.
157    *
158    * @deprecated use {@link #add(Query, BooleanClause.Occur)} instead:
159    * <ul>
160    * <li>For add(query, true, false) use add(query, BooleanClause.Occur.MUST)
161    * <li>For add(query, false, false) use add(query, BooleanClause.Occur.SHOULD)
162    * <li>For add(query, false, true) use add(query, BooleanClause.Occur.MUST_NOT)
163    * </ul>
164    */

165   public void add(Query query, boolean required, boolean prohibited) {
166     add(new BooleanClause(query, required, prohibited));
167   }
168
169   /** Adds a clause to a boolean query.
170    *
171    * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
172    * @see #getMaxClauseCount()
173    */

174   public void add(Query query, BooleanClause.Occur occur) {
175     add(new BooleanClause(query, occur));
176   }
177
178   /** Adds a clause to a boolean query.
179    * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
180    * @see #getMaxClauseCount()
181    */

182   public void add(BooleanClause clause) {
183     if (clauses.size() >= maxClauseCount)
184       throw new TooManyClauses();
185
186     clauses.addElement(clause);
187   }
188
189   /** Returns the set of clauses in this query. */
190   public BooleanClause[] getClauses() {
191     return (BooleanClause[])clauses.toArray(new BooleanClause[0]);
192   }
193
194   private class BooleanWeight implements Weight {
195     protected Similarity similarity;
196     protected Vector JavaDoc weights = new Vector JavaDoc();
197
198     public BooleanWeight(Searcher searcher)
199       throws IOException JavaDoc {
200       this.similarity = getSimilarity(searcher);
201       for (int i = 0 ; i < clauses.size(); i++) {
202         BooleanClause c = (BooleanClause)clauses.elementAt(i);
203         weights.add(c.getQuery().createWeight(searcher));
204       }
205     }
206
207     public Query getQuery() { return BooleanQuery.this; }
208     public float getValue() { return getBoost(); }
209
210     public float sumOfSquaredWeights() throws IOException JavaDoc {
211       float sum = 0.0f;
212       for (int i = 0 ; i < weights.size(); i++) {
213         BooleanClause c = (BooleanClause)clauses.elementAt(i);
214         Weight w = (Weight)weights.elementAt(i);
215         if (!c.isProhibited())
216           sum += w.sumOfSquaredWeights(); // sum sub weights
217
}
218
219       sum *= getBoost() * getBoost(); // boost each sub-weight
220

221       return sum ;
222     }
223
224
225     public void normalize(float norm) {
226       norm *= getBoost(); // incorporate boost
227
for (int i = 0 ; i < weights.size(); i++) {
228         BooleanClause c = (BooleanClause)clauses.elementAt(i);
229         Weight w = (Weight)weights.elementAt(i);
230         if (!c.isProhibited())
231           w.normalize(norm);
232       }
233     }
234
235     /** @return A good old 1.4 Scorer */
236     public Scorer scorer(IndexReader reader) throws IOException JavaDoc {
237       // First see if the (faster) ConjunctionScorer will work. This can be
238
// used when all clauses are required. Also, at this point a
239
// BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
240
// from a BooleanScorer are not always sorted by document number (sigh)
241
// and hence BooleanScorer cannot implement skipTo() correctly, which is
242
// required by ConjunctionScorer.
243
boolean allRequired = true;
244       boolean noneBoolean = true;
245       for (int i = 0 ; i < weights.size(); i++) {
246         BooleanClause c = (BooleanClause)clauses.elementAt(i);
247         if (!c.isRequired())
248           allRequired = false;
249         if (c.getQuery() instanceof BooleanQuery)
250           noneBoolean = false;
251       }
252
253       if (allRequired && noneBoolean) { // ConjunctionScorer is okay
254
ConjunctionScorer result =
255           new ConjunctionScorer(similarity);
256         for (int i = 0 ; i < weights.size(); i++) {
257           Weight w = (Weight)weights.elementAt(i);
258           Scorer subScorer = w.scorer(reader);
259           if (subScorer == null)
260             return null;
261           result.add(subScorer);
262         }
263         return result;
264       }
265
266       // Use good-old BooleanScorer instead.
267
BooleanScorer result = new BooleanScorer(similarity);
268
269       for (int i = 0 ; i < weights.size(); i++) {
270         BooleanClause c = (BooleanClause)clauses.elementAt(i);
271         Weight w = (Weight)weights.elementAt(i);
272         Scorer subScorer = w.scorer(reader);
273         if (subScorer != null)
274           result.add(subScorer, c.isRequired(), c.isProhibited());
275         else if (c.isRequired())
276           return null;
277       }
278
279       return result;
280     }
281
282     public Explanation explain(IndexReader reader, int doc)
283       throws IOException JavaDoc {
284       Explanation sumExpl = new Explanation();
285       sumExpl.setDescription("sum of:");
286       int coord = 0;
287       int maxCoord = 0;
288       float sum = 0.0f;
289       for (int i = 0 ; i < weights.size(); i++) {
290         BooleanClause c = (BooleanClause)clauses.elementAt(i);
291         Weight w = (Weight)weights.elementAt(i);
292         Explanation e = w.explain(reader, doc);
293         if (!c.isProhibited()) maxCoord++;
294         if (e.getValue() > 0) {
295           if (!c.isProhibited()) {
296             sumExpl.addDetail(e);
297             sum += e.getValue();
298             coord++;
299           } else {
300             return new Explanation(0.0f, "match prohibited");
301           }
302         } else if (c.isRequired()) {
303           return new Explanation(0.0f, "match required");
304         }
305       }
306       sumExpl.setValue(sum);
307
308       if (coord == 1) // only one clause matched
309
sumExpl = sumExpl.getDetails()[0]; // eliminate wrapper
310

311       float coordFactor = similarity.coord(coord, maxCoord);
312       if (coordFactor == 1.0f) // coord is no-op
313
return sumExpl; // eliminate wrapper
314
else {
315         Explanation result = new Explanation();
316         result.setDescription("product of:");
317         result.addDetail(sumExpl);
318         result.addDetail(new Explanation(coordFactor,
319                                          "coord("+coord+"/"+maxCoord+")"));
320         result.setValue(sum*coordFactor);
321         return result;
322       }
323     }
324   }
325
326   private class BooleanWeight2 extends BooleanWeight {
327     /* Merge into BooleanWeight in case the 1.4 BooleanScorer is dropped */
328     public BooleanWeight2(Searcher searcher)
329       throws IOException JavaDoc {
330         super(searcher);
331     }
332
333     /** @return An alternative Scorer that uses and provides skipTo(),
334      * and scores documents in document number order.
335      */

336     public Scorer scorer(IndexReader reader) throws IOException JavaDoc {
337       BooleanScorer2 result = new BooleanScorer2(similarity,
338                                                  minNrShouldMatch);
339
340       for (int i = 0 ; i < weights.size(); i++) {
341         BooleanClause c = (BooleanClause)clauses.elementAt(i);
342         Weight w = (Weight)weights.elementAt(i);
343         Scorer subScorer = w.scorer(reader);
344         if (subScorer != null)
345           result.add(subScorer, c.isRequired(), c.isProhibited());
346         else if (c.isRequired())
347           return null;
348       }
349
350       return result;
351     }
352   }
353
354   /** Indicates whether to use good old 1.4 BooleanScorer. */
355   private static boolean useScorer14 = false;
356
357   public static void setUseScorer14(boolean use14) {
358     useScorer14 = use14;
359   }
360
361   public static boolean getUseScorer14() {
362     return useScorer14;
363   }
364
365   protected Weight createWeight(Searcher searcher) throws IOException JavaDoc {
366
367     if (0 < minNrShouldMatch) {
368       // :TODO: should we throw an exception if getUseScorer14 ?
369
return new BooleanWeight2(searcher);
370     }
371
372     return getUseScorer14() ? (Weight) new BooleanWeight(searcher)
373                             : (Weight) new BooleanWeight2(searcher);
374   }
375
376   public Query rewrite(IndexReader reader) throws IOException JavaDoc {
377     if (clauses.size() == 1) { // optimize 1-clause queries
378
BooleanClause c = (BooleanClause)clauses.elementAt(0);
379       if (!c.isProhibited()) { // just return clause
380

381         Query query = c.getQuery().rewrite(reader); // rewrite first
382

383         if (getBoost() != 1.0f) { // incorporate boost
384
if (query == c.getQuery()) // if rewrite was no-op
385
query = (Query)query.clone(); // then clone before boost
386
query.setBoost(getBoost() * query.getBoost());
387         }
388
389         return query;
390       }
391     }
392
393     BooleanQuery clone = null; // recursively rewrite
394
for (int i = 0 ; i < clauses.size(); i++) {
395       BooleanClause c = (BooleanClause)clauses.elementAt(i);
396       Query query = c.getQuery().rewrite(reader);
397       if (query != c.getQuery()) { // clause rewrote: must clone
398
if (clone == null)
399           clone = (BooleanQuery)this.clone();
400         clone.clauses.setElementAt
401           (new BooleanClause(query, c.getOccur()), i);
402       }
403     }
404     if (clone != null) {
405       return clone; // some clauses rewrote
406
} else
407       return this; // no clauses rewrote
408
}
409
410   // inherit javadoc
411
public void extractTerms(Set JavaDoc terms) {
412       for (Iterator JavaDoc i = clauses.iterator(); i.hasNext();) {
413           BooleanClause clause = (BooleanClause) i.next();
414           clause.getQuery().extractTerms(terms);
415         }
416   }
417
418   public Object JavaDoc clone() {
419     BooleanQuery clone = (BooleanQuery)super.clone();
420     clone.clauses = (Vector JavaDoc)this.clauses.clone();
421     return clone;
422   }
423
424   /** Prints a user-readable version of this query. */
425   public String JavaDoc toString(String JavaDoc field) {
426     StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
427     boolean needParens=(getBoost() != 1.0) || (getMinimumNumberShouldMatch()>0) ;
428     if (needParens) {
429       buffer.append("(");
430     }
431
432     for (int i = 0 ; i < clauses.size(); i++) {
433       BooleanClause c = (BooleanClause)clauses.elementAt(i);
434       if (c.isProhibited())
435         buffer.append("-");
436       else if (c.isRequired())
437         buffer.append("+");
438
439       Query subQuery = c.getQuery();
440       if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens
441
buffer.append("(");
442         buffer.append(c.getQuery().toString(field));
443         buffer.append(")");
444       } else
445         buffer.append(c.getQuery().toString(field));
446
447       if (i != clauses.size()-1)
448         buffer.append(" ");
449     }
450
451     if (needParens) {
452       buffer.append(")");
453     }
454
455     if (getMinimumNumberShouldMatch()>0) {
456       buffer.append('~');
457       buffer.append(getMinimumNumberShouldMatch());
458     }
459
460     if (getBoost() != 1.0f)
461     {
462       buffer.append(ToStringUtils.boost(getBoost()));
463     }
464
465     return buffer.toString();
466   }
467
468   /** Returns true iff <code>o</code> is equal to this. */
469   public boolean equals(Object JavaDoc o) {
470     if (!(o instanceof BooleanQuery))
471       return false;
472     BooleanQuery other = (BooleanQuery)o;
473     return (this.getBoost() == other.getBoost())
474         && this.clauses.equals(other.clauses)
475         && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch();
476   }
477
478   /** Returns a hash code value for this object.*/
479   public int hashCode() {
480     return Float.floatToIntBits(getBoost()) ^ clauses.hashCode()
481            + getMinimumNumberShouldMatch();
482   }
483
484 }
485
Popular Tags