KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > search > highlight > QueryScorer


1 package org.apache.lucene.search.highlight;
2 /**
3  * Copyright 2002-2004 The Apache Software Foundation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18 import java.util.HashMap JavaDoc;
19 import java.util.HashSet JavaDoc;
20 import org.apache.lucene.analysis.Token;
21 import org.apache.lucene.index.IndexReader;
22 import org.apache.lucene.search.Query;
23
24 /**
25  * {@link Scorer} implementation which scores text fragments by the number of unique query terms found. This class uses the {@link QueryTermExtractor} class to process determine the query terms and their boosts to be used.
26  * @author mark@searcharea.co.uk
27  */

28 //TODO: provide option to boost score of fragments near beginning of document
29
// based on fragment.getFragNum()
30
public class QueryScorer implements Scorer
31 {
32     TextFragment currentTextFragment=null;
33     HashSet JavaDoc<String JavaDoc> uniqueTermsInFragment;
34     float totalScore=0;
35     float maxTermWeight=0;
36     private HashMap JavaDoc<String JavaDoc, WeightedTerm> termsToFind;
37     
38
39     /**
40      *
41      * @param query a Lucene query (ideally rewritten using query.rewrite
42      * before being passed to this class and the searcher)
43      */

44     public QueryScorer(Query query)
45     {
46         this(QueryTermExtractor.getTerms(query));
47     }
48
49     /**
50      *
51      * @param query a Lucene query (ideally rewritten using query.rewrite
52      * before being passed to this class and the searcher)
53      * @param reader used to compute IDF which can be used to a) score selected fragments better
54      * b) use graded highlights eg set font color intensity
55      * @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based
56      */

57     public QueryScorer(Query query, IndexReader reader, String JavaDoc fieldName)
58     {
59         this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
60     }
61
62     public QueryScorer(WeightedTerm []weightedTerms )
63     {
64         termsToFind = new HashMap JavaDoc<String JavaDoc, WeightedTerm>();
65         for (int i = 0; i < weightedTerms.length; i++)
66         {
67             termsToFind.put(weightedTerms[i].term,weightedTerms[i]);
68             maxTermWeight=Math.max(maxTermWeight,weightedTerms[i].getWeight());
69         }
70     }
71     
72
73     /* (non-Javadoc)
74      * @see org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
75      */

76     public void startFragment(TextFragment newFragment)
77     {
78         uniqueTermsInFragment = new HashSet JavaDoc<String JavaDoc>();
79         currentTextFragment=newFragment;
80         totalScore=0;
81         
82     }
83     
84     /* (non-Javadoc)
85      * @see org.apache.lucene.search.highlight.FragmentScorer#scoreToken(org.apache.lucene.analysis.Token)
86      */

87     public float getTokenScore(Token token)
88     {
89         String JavaDoc termText=token.termText();
90         
91         WeightedTerm queryTerm=(WeightedTerm) termsToFind.get(termText);
92         if(queryTerm==null)
93         {
94             //not a query term - return
95
return 0;
96         }
97         //found a query term - is it unique in this doc?
98
if(!uniqueTermsInFragment.contains(termText))
99         {
100             totalScore+=queryTerm.getWeight();
101             uniqueTermsInFragment.add(termText);
102         }
103         return queryTerm.getWeight();
104     }
105     
106     
107     /* (non-Javadoc)
108      * @see org.apache.lucene.search.highlight.FragmentScorer#endFragment(org.apache.lucene.search.highlight.TextFragment)
109      */

110     public float getFragmentScore()
111     {
112         return totalScore;
113     }
114
115
116     /* (non-Javadoc)
117      * @see org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
118      */

119     public void allFragmentsProcessed()
120     {
121         //this class has no special operations to perform at end of processing
122
}
123
124     /**
125      * @return The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale.
126      * @uml.property name="maxTermWeight"
127      */

128     public float getMaxTermWeight()
129     {
130         return maxTermWeight;
131     }
132 }
133
Popular Tags