KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > search > DisjunctionMaxQuery


1 package org.apache.lucene.search;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.index.IndexReader;
20
21 import java.io.IOException JavaDoc;
22 import java.util.ArrayList JavaDoc;
23 import java.util.Iterator JavaDoc;
24 import java.util.Collection JavaDoc;
25
26 /**
27  * A query that generates the union of the documents produced by its subqueries, and that scores each document as the maximum
28  * score for that document produced by any subquery plus a tie breaking increment for any additional matching subqueries.
29  * This is useful to search for a word in multiple fields with different boost factors (so that the fields cannot be
30  * combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost,
31  * not the sum of the field scores (as BooleanQuery would give).
32  * If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching
33  * another gets a higher score than "albino" matching both fields.
34  * To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in
35  * each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery.
36  * The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that
37  * include this term in only the best of those multiple fields, without confusing this with the better case of two different terms
38  * in the multiple fields.
39  * @author Chuck Williams
40  */

41 public class DisjunctionMaxQuery extends Query {
42
43   /* The subqueries */
44   private ArrayList JavaDoc disjuncts = new ArrayList JavaDoc();
45
46   /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
47   private float tieBreakerMultiplier = 0.0f;
48
49   /** Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries.
50    * @param tieBreakerMultiplier this score of each non-maximum disjunct for a document is multiplied by this weight
51    * and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that
52    * 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique
53    * word in the lower scored field (i.e., one that is not in any higher scored field.
54    */

55   public DisjunctionMaxQuery(float tieBreakerMultiplier) {
56     this.tieBreakerMultiplier = tieBreakerMultiplier;
57   }
58
59   /**
60    * Creates a new DisjunctionMaxQuery
61    * @param disjuncts a Collection<Query> of all the disjuncts to add
62    * @param tieBreakerMultiplier the weight to give to each matching non-maximum disjunct
63    */

64   public DisjunctionMaxQuery(Collection JavaDoc disjuncts, float tieBreakerMultiplier) {
65     this.tieBreakerMultiplier = tieBreakerMultiplier;
66     add(disjuncts);
67   }
68
69   /** Add a subquery to this disjunction
70    * @param query the disjunct added
71    */

72   public void add(Query query) {
73     disjuncts.add(query);
74   }
75
76   /** Add a collection of disjuncts to this disjunction
77    * via Iterable<Query>
78    */

79   public void add(Collection JavaDoc disjuncts) {
80     this.disjuncts.addAll(disjuncts);
81   }
82
83   /** An Iterator<Query> over the disjuncts */
84   public Iterator JavaDoc iterator() {
85     return disjuncts.iterator();
86   }
87
88   /* The Weight for DisjunctionMaxQuery's, used to normalize, score and explain these queries */
89   private class DisjunctionMaxWeight implements Weight {
90
91     private Searcher searcher; // The searcher with which we are associated.
92
private ArrayList JavaDoc weights = new ArrayList JavaDoc(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts
93

94     /* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */
95     public DisjunctionMaxWeight(Searcher searcher) throws IOException JavaDoc {
96       this.searcher = searcher;
97       for (int i = 0; i < disjuncts.size(); i++)
98         weights.add(((Query) disjuncts.get(i)).createWeight(searcher));
99     }
100
101     /* Return our associated DisjunctionMaxQuery */
102     public Query getQuery() { return DisjunctionMaxQuery.this; }
103
104     /* Return our boost */
105     public float getValue() { return getBoost(); }
106
107     /* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */
108     public float sumOfSquaredWeights() throws IOException JavaDoc {
109       float max = 0.0f, sum = 0.0f;
110       for (int i = 0; i < weights.size(); i++) {
111         float sub = ((Weight) weights.get(i)).sumOfSquaredWeights();
112         sum += sub;
113         max = Math.max(max, sub);
114       }
115       return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * getBoost() * getBoost();
116     }
117
118     /* Apply the computed normalization factor to our subqueries */
119     public void normalize(float norm) {
120       norm *= getBoost(); // Incorporate our boost
121
for (int i = 0 ; i < weights.size(); i++)
122         ((Weight) weights.get(i)).normalize(norm);
123     }
124
125     /* Create the scorer used to score our associated DisjunctionMaxQuery */
126     public Scorer scorer(IndexReader reader) throws IOException JavaDoc {
127       DisjunctionMaxScorer result = new DisjunctionMaxScorer(tieBreakerMultiplier, getSimilarity(searcher));
128       for (int i = 0 ; i < weights.size(); i++) {
129         Weight w = (Weight) weights.get(i);
130         Scorer subScorer = w.scorer(reader);
131         if (subScorer == null) return null;
132         result.add(subScorer);
133       }
134       return result;
135     }
136
137     /* Explain the score we computed for doc */
138     public Explanation explain(IndexReader reader, int doc) throws IOException JavaDoc {
139       if ( disjuncts.size() == 1) return ((Weight) weights.get(0)).explain(reader,doc);
140       Explanation result = new Explanation();
141       float max = 0.0f, sum = 0.0f;
142       result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:");
143       for (int i = 0 ; i < weights.size(); i++) {
144         Explanation e = ((Weight) weights.get(i)).explain(reader, doc);
145         if (e.getValue() > 0) {
146           result.addDetail(e);
147           sum += e.getValue();
148           max = Math.max(max, e.getValue());
149         }
150       }
151       result.setValue(max + (sum - max)*tieBreakerMultiplier);
152       return result;
153     }
154
155   } // end of DisjunctionMaxWeight inner class
156

157   /* Create the Weight used to score us */
158   protected Weight createWeight(Searcher searcher) throws IOException JavaDoc {
159     return new DisjunctionMaxWeight(searcher);
160   }
161
162   /** Optimize our representation and our subqueries representations
163    * @param reader the IndexReader we query
164    * @return an optimized copy of us (which may not be a copy if there is nothing to optimize) */

165   public Query rewrite(IndexReader reader) throws IOException JavaDoc {
166     if (disjuncts.size() == 1) {
167       Query singleton = (Query) disjuncts.get(0);
168       Query result = singleton.rewrite(reader);
169       if (getBoost() != 1.0f) {
170         if (result == singleton) result = (Query)result.clone();
171         result.setBoost(getBoost() * result.getBoost());
172       }
173       return result;
174     }
175     DisjunctionMaxQuery clone = null;
176     for (int i = 0 ; i < disjuncts.size(); i++) {
177       Query clause = (Query) disjuncts.get(i);
178       Query rewrite = clause.rewrite(reader);
179       if (rewrite != clause) {
180         if (clone == null) clone = (DisjunctionMaxQuery)this.clone();
181         clone.disjuncts.set(i, rewrite);
182       }
183     }
184     if (clone != null) return clone;
185     else return this;
186   }
187
188   /** Create a shallow copy of us -- used in rewriting if necessary
189    * @return a copy of us (but reuse, don't copy, our subqueries) */

190   public Object JavaDoc clone() {
191     DisjunctionMaxQuery clone = (DisjunctionMaxQuery)super.clone();
192     clone.disjuncts = (ArrayList JavaDoc)this.disjuncts.clone();
193     return clone;
194   }
195
196   /** Prettyprint us.
197    * @param field the field to which we are applied
198    * @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"
199    */

200   public String JavaDoc toString(String JavaDoc field) {
201     StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
202     buffer.append("(");
203     for (int i = 0 ; i < disjuncts.size(); i++) {
204       Query subquery = (Query) disjuncts.get(i);
205       if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens
206
buffer.append("(");
207         buffer.append(subquery.toString(field));
208         buffer.append(")");
209       }
210       else buffer.append(subquery.toString(field));
211       if (i != disjuncts.size()-1) buffer.append(" | ");
212     }
213     buffer.append(")");
214     if (tieBreakerMultiplier != 0.0f) {
215       buffer.append("~");
216       buffer.append(tieBreakerMultiplier);
217     }
218     if (getBoost() != 1.0) {
219       buffer.append("^");
220       buffer.append(getBoost());
221     }
222     return buffer.toString();
223   }
224
225   /** Return true iff we represent the same query as o
226    * @param o another object
227    * @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
228    */

229   public boolean equals(Object JavaDoc o) {
230     if (! (o instanceof DisjunctionMaxQuery) ) return false;
231     DisjunctionMaxQuery other = (DisjunctionMaxQuery)o;
232     return this.getBoost() == other.getBoost()
233             && this.tieBreakerMultiplier == other.tieBreakerMultiplier
234             && this.disjuncts.equals(other.disjuncts);
235   }
236
237   /** Compute a hash code for hashing us
238    * @return the hash code
239    */

240   public int hashCode() {
241     return Float.floatToIntBits(getBoost())
242             + Float.floatToIntBits(tieBreakerMultiplier)
243             + disjuncts.hashCode();
244   }
245
246 }
247
Popular Tags