KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > searcher > QueryFilters


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.searcher;
5
6 import net.nutch.plugin.*;
7 import net.nutch.searcher.Query.Clause;
8 import net.nutch.util.LogFormatter;
9 import java.util.logging.Logger JavaDoc;
10 import java.util.*;
11
12 import org.apache.lucene.search.BooleanQuery;
13
14 /** Creates and caches {@link QueryFilter} implementing plugins. QueryFilter
15  * implementations should define either the "fields" or "raw-fields" attributes
16  * for any fields that they process, otherwise these will be ignored by the
17  * query parser. Raw fields are parsed as a single Query.Term, including
18  * internal punctuation, while non-raw fields are parsed containing punctuation
19  * are parsed as multi-token Query.Phrase's.
20  */

21 public class QueryFilters {
22   private static final Logger JavaDoc LOG =
23     LogFormatter.getLogger("net.nutch.searcher.QueryFilters");
24
25   private static final QueryFilter[] CACHE;
26   private static final HashSet FIELD_NAMES = new HashSet();
27   private static final HashSet RAW_FIELD_NAMES = new HashSet();
28
29   static {
30     try {
31       ExtensionPoint point = PluginRepository.getInstance()
32         .getExtensionPoint(QueryFilter.X_POINT_ID);
33       if (point == null)
34         throw new RuntimeException JavaDoc(QueryFilter.X_POINT_ID+" not found.");
35       Extension[] extensions = point.getExtentens();
36       CACHE = new QueryFilter[extensions.length];
37       for (int i = 0; i < extensions.length; i++) {
38         Extension extension = extensions[i];
39         ArrayList fieldNames = parseFieldNames(extension, "fields");
40         ArrayList rawFieldNames = parseFieldNames(extension, "raw-fields");
41         if (fieldNames.size() == 0 && rawFieldNames.size() == 0) {
42           LOG.warning("QueryFilter: "+extension.getId()+" names no fields.");
43           continue;
44         }
45         CACHE[i] = (QueryFilter)extension.getExtensionInstance();
46         FIELD_NAMES.addAll(fieldNames);
47         FIELD_NAMES.addAll(rawFieldNames);
48         RAW_FIELD_NAMES.addAll(rawFieldNames);
49       }
50     } catch (PluginRuntimeException e) {
51       throw new RuntimeException JavaDoc(e);
52     }
53   }
54
55   private static ArrayList parseFieldNames(Extension extension,
56                                            String JavaDoc attribute) {
57     String JavaDoc fields = extension.getAttribute(attribute);
58     if (fields == null) fields = "";
59     return Collections.list(new StringTokenizer(fields, " ,\t\n\r"));
60   }
61
62   private QueryFilters() {} // no public ctor
63

64   /** Run all defined filters. */
65   public static BooleanQuery filter(Query input) throws QueryException {
66     // first check that all field names are claimed by some plugin
67
Clause[] clauses = input.getClauses();
68     for (int i = 0; i < clauses.length; i++) {
69       Clause c = clauses[i];
70       if (!isField(c.getField()))
71         throw new QueryException("Not a known field name:"+c.getField());
72     }
73
74     // then run each plugin
75
BooleanQuery output = new BooleanQuery();
76     for (int i = 0 ; i < CACHE.length; i++) {
77       output = CACHE[i].filter(input, output);
78     }
79     return output;
80   }
81
82   public static boolean isField(String JavaDoc name) {
83     return FIELD_NAMES.contains(name);
84   }
85   public static boolean isRawField(String JavaDoc name) {
86     return RAW_FIELD_NAMES.contains(name);
87   }
88 }
89
Popular Tags