KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > search > PhraseQuery


1 package org.apache.lucene.search;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.IOException JavaDoc;
20 import java.util.Set JavaDoc;
21 import java.util.Vector JavaDoc;
22
23 import org.apache.lucene.index.Term;
24 import org.apache.lucene.index.TermPositions;
25 import org.apache.lucene.index.IndexReader;
26 import org.apache.lucene.util.ToStringUtils;
27
28 /** A Query that matches documents containing a particular sequence of terms.
29  * A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
30  *
31  * <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
32  */

33 public class PhraseQuery extends Query {
34   private String JavaDoc field;
35   private Vector JavaDoc terms = new Vector JavaDoc();
36   private Vector JavaDoc positions = new Vector JavaDoc();
37   private int slop = 0;
38
39   /** Constructs an empty phrase query. */
40   public PhraseQuery() {}
41
42   /** Sets the number of other words permitted between words in query phrase.
43     If zero, then this is an exact phrase search. For larger values this works
44     like a <code>WITHIN</code> or <code>NEAR</code> operator.
45
46     <p>The slop is in fact an edit-distance, where the units correspond to
47     moves of terms in the query phrase out of position. For example, to switch
48     the order of two words requires two moves (the first move places the words
49     atop one another), so to permit re-orderings of phrases, the slop must be
50     at least two.
51
52     <p>More exact matches are scored higher than sloppier matches, thus search
53     results are sorted by exactness.
54
55     <p>The slop is zero by default, requiring exact matches.*/

56   public void setSlop(int s) { slop = s; }
57   /** Returns the slop. See setSlop(). */
58   public int getSlop() { return slop; }
59
60   /**
61    * Adds a term to the end of the query phrase.
62    * The relative position of the term is the one immediately after the last term added.
63    */

64   public void add(Term term) {
65     int position = 0;
66     if(positions.size() > 0)
67         position = ((Integer JavaDoc) positions.lastElement()).intValue() + 1;
68
69     add(term, position);
70   }
71
72   /**
73    * Adds a term to the end of the query phrase.
74    * The relative position of the term within the phrase is specified explicitly.
75    * This allows e.g. phrases with more than one term at the same position
76    * or phrases with gaps (e.g. in connection with stopwords).
77    *
78    * @param term
79    * @param position
80    */

81   public void add(Term term, int position) {
82       if (terms.size() == 0)
83           field = term.field();
84       else if (term.field() != field)
85           throw new IllegalArgumentException JavaDoc("All phrase terms must be in the same field: " + term);
86
87       terms.addElement(term);
88       positions.addElement(new Integer JavaDoc(position));
89   }
90
91   /** Returns the set of terms in this phrase. */
92   public Term[] getTerms() {
93     return (Term[])terms.toArray(new Term[0]);
94   }
95
96   /**
97    * Returns the relative positions of terms in this phrase.
98    */

99   public int[] getPositions() {
100       int[] result = new int[positions.size()];
101       for(int i = 0; i < positions.size(); i++)
102           result[i] = ((Integer JavaDoc) positions.elementAt(i)).intValue();
103       return result;
104   }
105
106   private class PhraseWeight implements Weight {
107     private Similarity similarity;
108     private float value;
109     private float idf;
110     private float queryNorm;
111     private float queryWeight;
112
113     public PhraseWeight(Searcher searcher)
114       throws IOException JavaDoc {
115       this.similarity = getSimilarity(searcher);
116
117       idf = similarity.idf(terms, searcher);
118     }
119
120     public String JavaDoc toString() { return "weight(" + PhraseQuery.this + ")"; }
121
122     public Query getQuery() { return PhraseQuery.this; }
123     public float getValue() { return value; }
124
125     public float sumOfSquaredWeights() {
126       queryWeight = idf * getBoost(); // compute query weight
127
return queryWeight * queryWeight; // square it
128
}
129
130     public void normalize(float queryNorm) {
131       this.queryNorm = queryNorm;
132       queryWeight *= queryNorm; // normalize query weight
133
value = queryWeight * idf; // idf for document
134
}
135
136     public Scorer scorer(IndexReader reader) throws IOException JavaDoc {
137       if (terms.size() == 0) // optimize zero-term case
138
return null;
139
140       TermPositions[] tps = new TermPositions[terms.size()];
141       for (int i = 0; i < terms.size(); i++) {
142         TermPositions p = reader.termPositions((Term)terms.elementAt(i));
143         if (p == null)
144           return null;
145         tps[i] = p;
146       }
147
148       if (slop == 0) // optimize exact case
149
return new ExactPhraseScorer(this, tps, getPositions(), similarity,
150                                      reader.norms(field));
151       else
152         return
153           new SloppyPhraseScorer(this, tps, getPositions(), similarity, slop,
154                                  reader.norms(field));
155
156     }
157
158     public Explanation explain(IndexReader reader, int doc)
159       throws IOException JavaDoc {
160
161       Explanation result = new Explanation();
162       result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
163
164       StringBuffer JavaDoc docFreqs = new StringBuffer JavaDoc();
165       StringBuffer JavaDoc query = new StringBuffer JavaDoc();
166       query.append('\"');
167       for (int i = 0; i < terms.size(); i++) {
168         if (i != 0) {
169           docFreqs.append(" ");
170           query.append(" ");
171         }
172
173         Term term = (Term)terms.elementAt(i);
174
175         docFreqs.append(term.text());
176         docFreqs.append("=");
177         docFreqs.append(reader.docFreq(term));
178
179         query.append(term.text());
180       }
181       query.append('\"');
182
183       Explanation idfExpl =
184         new Explanation(idf, "idf(" + field + ": " + docFreqs + ")");
185
186       // explain query weight
187
Explanation queryExpl = new Explanation();
188       queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
189
190       Explanation boostExpl = new Explanation(getBoost(), "boost");
191       if (getBoost() != 1.0f)
192         queryExpl.addDetail(boostExpl);
193       queryExpl.addDetail(idfExpl);
194
195       Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
196       queryExpl.addDetail(queryNormExpl);
197
198       queryExpl.setValue(boostExpl.getValue() *
199                          idfExpl.getValue() *
200                          queryNormExpl.getValue());
201
202       result.addDetail(queryExpl);
203
204       // explain field weight
205
Explanation fieldExpl = new Explanation();
206       fieldExpl.setDescription("fieldWeight("+field+":"+query+" in "+doc+
207                                "), product of:");
208
209       Explanation tfExpl = scorer(reader).explain(doc);
210       fieldExpl.addDetail(tfExpl);
211       fieldExpl.addDetail(idfExpl);
212
213       Explanation fieldNormExpl = new Explanation();
214       byte[] fieldNorms = reader.norms(field);
215       float fieldNorm =
216         fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
217       fieldNormExpl.setValue(fieldNorm);
218       fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
219       fieldExpl.addDetail(fieldNormExpl);
220
221       fieldExpl.setValue(tfExpl.getValue() *
222                          idfExpl.getValue() *
223                          fieldNormExpl.getValue());
224
225       result.addDetail(fieldExpl);
226
227       // combine them
228
result.setValue(queryExpl.getValue() * fieldExpl.getValue());
229
230       if (queryExpl.getValue() == 1.0f)
231         return fieldExpl;
232
233       return result;
234     }
235   }
236
237   protected Weight createWeight(Searcher searcher) throws IOException JavaDoc {
238     if (terms.size() == 1) { // optimize one-term case
239
Term term = (Term)terms.elementAt(0);
240       Query termQuery = new TermQuery(term);
241       termQuery.setBoost(getBoost());
242       return termQuery.createWeight(searcher);
243     }
244     return new PhraseWeight(searcher);
245   }
246
247   /**
248    * @see org.apache.lucene.search.Query#extractTerms(java.util.Set)
249    */

250   public void extractTerms(Set JavaDoc queryTerms) {
251     queryTerms.addAll(terms);
252   }
253
254   /** Prints a user-readable version of this query. */
255   public String JavaDoc toString(String JavaDoc f) {
256     StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
257     if (!field.equals(f)) {
258       buffer.append(field);
259       buffer.append(":");
260     }
261
262     buffer.append("\"");
263     for (int i = 0; i < terms.size(); i++) {
264       buffer.append(((Term)terms.elementAt(i)).text());
265       if (i != terms.size()-1)
266   buffer.append(" ");
267     }
268     buffer.append("\"");
269
270     if (slop != 0) {
271       buffer.append("~");
272       buffer.append(slop);
273     }
274
275     buffer.append(ToStringUtils.boost(getBoost()));
276
277     return buffer.toString();
278   }
279
280   /** Returns true iff <code>o</code> is equal to this. */
281   public boolean equals(Object JavaDoc o) {
282     if (!(o instanceof PhraseQuery))
283       return false;
284     PhraseQuery other = (PhraseQuery)o;
285     return (this.getBoost() == other.getBoost())
286       && (this.slop == other.slop)
287       && this.terms.equals(other.terms)
288       && this.positions.equals(other.positions);
289   }
290
291   /** Returns a hash code value for this object.*/
292   public int hashCode() {
293     return Float.floatToIntBits(getBoost())
294       ^ slop
295       ^ terms.hashCode()
296       ^ positions.hashCode();
297   }
298
299 }
300
Popular Tags