KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > searcher > LuceneQueryOptimizer


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.searcher;
5
6 import org.apache.lucene.search.Searcher;
7 import org.apache.lucene.search.BooleanQuery;
8 import org.apache.lucene.search.BooleanClause;
9 import org.apache.lucene.search.TermQuery;
10 import org.apache.lucene.search.QueryFilter;
11 import org.apache.lucene.search.Filter;
12 import org.apache.lucene.search.TopDocs;
13
14 import java.util.LinkedHashMap JavaDoc;
15 import java.util.Map JavaDoc;
16 import java.io.IOException JavaDoc;
17
18 /** Utility which converts certain query clauses into {@link QueryFilter}s and
19  * caches these. Only required {@link TermQuery}s whose boost is zero and
20  * whose term occurs in at least a certain fraction of documents are converted
21  * to cached filters. This accellerates query constraints like language,
22  * document format, etc., which do not affect ranking but might otherwise slow
23  * search considerably. */

24 class LuceneQueryOptimizer {
25   private LinkedHashMap JavaDoc cache; // an LRU cache of QueryFilter
26

27   private float threshold;
28
29   /** Construct an optimizer that caches and uses filters for required {@link
30    * TermQuery}s whose boost is zero.
31    * @param cacheSize the number of QueryFilters to cache
32    * @param threshold the fraction of documents which must contain term
33    */

34   public LuceneQueryOptimizer(final int cacheSize, float threshold) {
35     this.cache = new LinkedHashMap JavaDoc(cacheSize, 0.75f, true) {
36         protected boolean removeEldestEntry(Map.Entry JavaDoc eldest) {
37           return size() > cacheSize; // limit size of cache
38
}
39       };
40     this.threshold = threshold;
41   }
42
43   public TopDocs optimize(BooleanQuery original,
44                           Searcher searcher, int numHits)
45     throws IOException JavaDoc {
46
47     BooleanQuery query = new BooleanQuery();
48     BooleanQuery filterQuery = null;
49
50     BooleanClause[] clauses = original.getClauses();
51     for (int i = 0; i < clauses.length; i++) {
52       BooleanClause c = clauses[i];
53       if (c.required // required
54
&& c.query.getBoost() == 0.0f // boost is zero
55
&& c.query instanceof TermQuery // TermQuery
56
&& (searcher.docFreq(((TermQuery)c.query).getTerm())
57               / (float)searcher.maxDoc()) >= threshold) { // check threshold
58
if (filterQuery == null)
59           filterQuery = new BooleanQuery();
60         filterQuery.add(c.query, true, false); // filter it
61
} else {
62         query.add(c); // query it
63
}
64     }
65
66     Filter filter = null;
67     if (filterQuery != null) {
68       synchronized (cache) { // check cache
69
filter = (Filter)cache.get(filterQuery);
70       }
71       if (filter == null) { // miss
72
filter = new QueryFilter(filterQuery); // construct new entry
73
synchronized (cache) {
74           cache.put(filterQuery, filter); // cache it
75
}
76       }
77     }
78
79     return searcher.search(query, filter, numHits);
80   }
81 }
82
Popular Tags