PhrasePrefixQuery


1   package org.apache.lucene.search;
2   
3   /**
4    * Copyright 2004 The Apache Software Foundation
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.IOException  ;
20  import java.util.ArrayList  ;
21  import java.util.Iterator  ;
22  import java.util.Vector  ;
23  
24  import org.apache.lucene.index.IndexReader;
25  import org.apache.lucene.index.MultipleTermPositions;
26  import org.apache.lucene.index.Term;
27  import org.apache.lucene.index.TermPositions;
28  import org.apache.lucene.search.Query;
29  import org.apache.lucene.util.ToStringUtils;
30  
31  /**
32   * PhrasePrefixQuery is a generalized version of PhraseQuery, with an added
33   * method {@link #add(Term[])}.
34   * To use this class, to search for the phrase "Microsoft app*" first use
35   * add(Term) on the term "Microsoft", then find all terms that has "app" as
36   * prefix using IndexReader.terms(Term), and use PhrasePrefixQuery.add(Term[]
37   * terms) to add them to the query.
38   * 
39   * @deprecated use {@link org.apache.lucene.search.MultiPhraseQuery} instead
40   * @author Anders Nielsen
41   * @version 1.0
42   */
43  public class PhrasePrefixQuery extends Query {
44    private String   field;
45    private ArrayList   termArrays = new ArrayList  ();
46    private Vector   positions = new Vector  ();
47  
48    private int slop = 0;
49  
50    /** Sets the phrase slop for this query.
51     * @see PhraseQuery#setSlop(int)
52     */
53    public void setSlop(int s) { slop = s; }
54  
55    /** Sets the phrase slop for this query.
56     * @see PhraseQuery#getSlop()
57     */
58    public int getSlop() { return slop; }
59  
60    /** Add a single term at the next position in the phrase.
61     * @see PhraseQuery#add(Term)
62     */
63    public void add(Term term) { add(new Term[]{term}); }
64  
65    /** Add multiple terms at the next position in the phrase.  Any of the terms
66     * may match.
67     *
68     * @see PhraseQuery#add(Term)
69     */
70    public void add(Term[] terms) {
71      int position = 0;
72      if (positions.size() > 0)
73        position = ((Integer  ) positions.lastElement()).intValue() + 1;
74  
75      add(terms, position);
76    }
77  
78    /**
79     * Allows to specify the relative position of terms within the phrase.
80     * 
81     * @see PhraseQuery#add(Term, int)
82     * @param terms
83     * @param position
84     */
85    public void add(Term[] terms, int position) {
86      if (termArrays.size() == 0)
87        field = terms[0].field();
88  
89      for (int i = 0; i < terms.length; i++) {
90        if (terms[i].field() != field) {
91          throw new IllegalArgumentException  (
92              "All phrase terms must be in the same field (" + field + "): "
93                  + terms[i]);
94        }
95      }
96  
97      termArrays.add(terms);
98      positions.addElement(new Integer  (position));
99    }
100 
101   /**
102    * Returns the relative positions of terms in this phrase.
103    */
104   public int[] getPositions() {
105     int[] result = new int[positions.size()];
106     for (int i = 0; i < positions.size(); i++)
107       result[i] = ((Integer  ) positions.elementAt(i)).intValue();
108     return result;
109   }
110 
111   private class PhrasePrefixWeight implements Weight {
112     private Similarity similarity;
113     private float value;
114     private float idf;
115     private float queryNorm;
116     private float queryWeight;
117 
118     public PhrasePrefixWeight(Searcher searcher)
119       throws IOException   {
120       this.similarity = getSimilarity(searcher);
121 
122       // compute idf
123       Iterator   i = termArrays.iterator();
124       while (i.hasNext()) {
125         Term[] terms = (Term[])i.next();
126         for (int j=0; j<terms.length; j++) {
127           idf += getSimilarity(searcher).idf(terms[j], searcher);
128         }
129       }
130     }
131 
132     public Query getQuery() { return PhrasePrefixQuery.this; }
133     public float getValue() { return value; }
134 
135     public float sumOfSquaredWeights() {
136       queryWeight = idf * getBoost();             // compute query weight
137       return queryWeight * queryWeight;           // square it
138     }
139 
140     public void normalize(float queryNorm) {
141       this.queryNorm = queryNorm;
142       queryWeight *= queryNorm;                   // normalize query weight
143       value = queryWeight * idf;                  // idf for document 
144     }
145 
146     public Scorer scorer(IndexReader reader) throws IOException   {
147       if (termArrays.size() == 0)                  // optimize zero-term case
148         return null;
149 
150       TermPositions[] tps = new TermPositions[termArrays.size()];
151       for (int i=0; i<tps.length; i++) {
152         Term[] terms = (Term[])termArrays.get(i);
153 
154         TermPositions p;
155         if (terms.length > 1)
156           p = new MultipleTermPositions(reader, terms);
157         else
158           p = reader.termPositions(terms[0]);
159 
160         if (p == null)
161           return null;
162 
163         tps[i] = p;
164       }
165 
166       if (slop == 0)
167         return new ExactPhraseScorer(this, tps, getPositions(), similarity,
168                                      reader.norms(field));
169       else
170         return new SloppyPhraseScorer(this, tps, getPositions(), similarity,
171                                       slop, reader.norms(field));
172     }
173 
174     public Explanation explain(IndexReader reader, int doc)
175       throws IOException   {
176       Explanation result = new Explanation();
177       result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
178 
179       Explanation idfExpl = new Explanation(idf, "idf("+getQuery()+")");
180 
181       // explain query weight
182       Explanation queryExpl = new Explanation();
183       queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
184 
185       Explanation boostExpl = new Explanation(getBoost(), "boost");
186       if (getBoost() != 1.0f)
187         queryExpl.addDetail(boostExpl);
188 
189       queryExpl.addDetail(idfExpl);
190 
191       Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
192       queryExpl.addDetail(queryNormExpl);
193 
194       queryExpl.setValue(boostExpl.getValue() *
195                          idfExpl.getValue() *
196                          queryNormExpl.getValue());
197 
198       result.addDetail(queryExpl);
199 
200       // explain field weight
201       Explanation fieldExpl = new Explanation();
202       fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+
203                                "), product of:");
204 
205       Explanation tfExpl = scorer(reader).explain(doc);
206       fieldExpl.addDetail(tfExpl);
207       fieldExpl.addDetail(idfExpl);
208 
209       Explanation fieldNormExpl = new Explanation();
210       byte[] fieldNorms = reader.norms(field);
211       float fieldNorm =
212         fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
213       fieldNormExpl.setValue(fieldNorm);
214       fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
215       fieldExpl.addDetail(fieldNormExpl);
216 
217       fieldExpl.setValue(tfExpl.getValue() *
218                          idfExpl.getValue() *
219                          fieldNormExpl.getValue());
220 
221       result.addDetail(fieldExpl);
222 
223       // combine them
224       result.setValue(queryExpl.getValue() * fieldExpl.getValue());
225 
226       if (queryExpl.getValue() == 1.0f)
227         return fieldExpl;
228 
229       return result;
230     }
231   }
232 
233   protected Weight createWeight(Searcher searcher) throws IOException   {
234     if (termArrays.size() == 1) {                 // optimize one-term case
235       Term[] terms = (Term[])termArrays.get(0);
236       BooleanQuery boq = new BooleanQuery(true);
237       for (int i=0; i<terms.length; i++) {
238         boq.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
239       }
240       boq.setBoost(getBoost());
241       return boq.createWeight(searcher);
242     }
243     return new PhrasePrefixWeight(searcher);
244   }
245 
246   /** Prints a user-readable version of this query. */
247   public final String   toString(String   f) {
248     StringBuffer   buffer = new StringBuffer  ();
249     if (!field.equals(f)) {
250       buffer.append(field);
251       buffer.append(":");
252     }
253 
254     buffer.append("\"");
255     Iterator   i = termArrays.iterator();
256     while (i.hasNext()) {
257       Term[] terms = (Term[])i.next();
258       buffer.append(terms[0].text() + (terms.length > 1 ? "*" : ""));
259       if (i.hasNext())
260         buffer.append(" ");
261     }
262     buffer.append("\"");
263 
264     if (slop != 0) {
265       buffer.append("~");
266       buffer.append(slop);
267     }
268 
269     buffer.append(ToStringUtils.boost(getBoost()));
270 
271     return buffer.toString();
272   }
273 }
274
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags