KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > search > highlight > QueryTermExtractor


1 package org.apache.lucene.search.highlight;
2 /**
3  * Copyright 2002-2004 The Apache Software Foundation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18 import java.io.IOException JavaDoc;
19 import java.util.HashSet JavaDoc;
20
21 import org.apache.lucene.index.IndexReader;
22 import org.apache.lucene.index.Term;
23 import org.apache.lucene.search.BooleanClause;
24 import org.apache.lucene.search.BooleanQuery;
25 import org.apache.lucene.search.PhraseQuery;
26 import org.apache.lucene.search.Query;
27 import org.apache.lucene.search.TermQuery;
28
29 /**
30  * Utility class used to extract the terms used in a query, plus any weights.
31  * This class will not find terms for MultiTermQuery, RangeQuery and PrefixQuery classes
32  * so the caller must pass a rewritten query (see Query.rewrite) to obtain a list of
33  * expanded terms.
34  *
35  */

36 public final class QueryTermExtractor
37 {
38
39     /**
40      * Extracts all terms texts of a given Query into an array of WeightedTerms
41      *
42      * @param query Query to extract term texts from
43      * @return an array of the terms used in a query, plus their weights.
44      */

45     public static final WeightedTerm[] getTerms(Query query)
46     {
47         return getTerms(query,false);
48     }
49
50     /**
51      * Extracts all terms texts of a given Query into an array of WeightedTerms
52      *
53      * @param query Query to extract term texts from
54      * @param reader used to compute IDF which can be used to a) score selected fragments better
55      * b) use graded highlights eg chaning intensity of font color
56      * @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based
57      * @return an array of the terms used in a query, plus their weights.
58      */

59     public static final WeightedTerm[] getIdfWeightedTerms(Query query, IndexReader reader, String JavaDoc fieldName)
60     {
61         WeightedTerm[] terms=getTerms(query,false);
62         int totalNumDocs=reader.numDocs();
63         for (int i = 0; i < terms.length; i++)
64         {
65             try
66             {
67                 int docFreq=reader.docFreq(new Term(fieldName,terms[i].term));
68                 //IDF algorithm taken from DefaultSimilarity class
69
float idf=(float)(Math.log((float)totalNumDocs/(double)(docFreq+1)) + 1.0);
70                 terms[i].weight*=idf;
71             }
72             catch (IOException JavaDoc e)
73             {
74                 //ignore
75
}
76         }
77         return terms;
78     }
79
80     /**
81      * Extracts all terms texts of a given Query into an array of WeightedTerms
82      *
83      * @param query Query to extract term texts from
84      * @param prohibited <code>true</code> to extract "prohibited" terms, too
85    * @return an array of the terms used in a query, plus their weights.
86    */

87     public static final WeightedTerm[] getTerms(Query query, boolean prohibited)
88     {
89         HashSet JavaDoc<WeightedTerm> terms=new HashSet JavaDoc<WeightedTerm>();
90         getTerms(query,terms,prohibited);
91         return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]);
92     }
93
94     private static final void getTerms(Query query, HashSet JavaDoc<WeightedTerm> terms,boolean prohibited)
95     {
96         if (query instanceof BooleanQuery)
97             getTermsFromBooleanQuery((BooleanQuery) query, terms, prohibited);
98         else
99             if (query instanceof PhraseQuery)
100                 getTermsFromPhraseQuery((PhraseQuery) query, terms);
101             else
102                 if (query instanceof TermQuery)
103                     getTermsFromTermQuery((TermQuery) query, terms);
104 // else
105
// if ((query instanceof PrefixQuery)
106
// || (query instanceof RangeQuery)
107
// || (query instanceof MultiTermQuery))
108
// {
109
// //client should call rewrite BEFORE calling highlighter
110
// // Query expandedQuery = rewrite(reader, query);
111
// // getTerms(reader, expandedQuery, terms, prohibited);
112
// }
113
}
114
115     private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet JavaDoc<WeightedTerm> terms, boolean prohibited)
116     {
117         BooleanClause[] queryClauses = query.getClauses();
118         int i;
119
120         for (i = 0; i < queryClauses.length; i++)
121         {
122             if (prohibited || !queryClauses[i].prohibited)
123                 getTerms(queryClauses[i].query, terms, prohibited);
124         }
125     }
126
127     private static final void getTermsFromPhraseQuery(PhraseQuery query, HashSet JavaDoc<WeightedTerm> terms)
128     {
129         Term[] queryTerms = query.getTerms();
130         int i;
131
132         for (i = 0; i < queryTerms.length; i++)
133         {
134             terms.add(new WeightedTerm(query.getBoost(),queryTerms[i].text()));
135         }
136     }
137
138     private static final void getTermsFromTermQuery(TermQuery query, HashSet JavaDoc<WeightedTerm> terms)
139     {
140         terms.add(new WeightedTerm(query.getBoost(),query.getTerm().text()));
141     }
142
143
144 }
145
Popular Tags