KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > searcher > Query


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.searcher;
5
6 import java.io.DataInput JavaDoc;
7 import java.io.DataOutput JavaDoc;
8 import java.io.IOException JavaDoc;
9 import java.io.BufferedReader JavaDoc;
10 import java.io.InputStreamReader JavaDoc;
11 import java.util.Arrays JavaDoc;
12 import java.util.ArrayList JavaDoc;
13 import java.util.logging.Logger JavaDoc;
14
15 import net.nutch.util.LogFormatter;
16 import net.nutch.analysis.NutchAnalysis;
17
18 import net.nutch.io.Writable;
19
20 /** A Nutch query. */
21 public final class Query implements Writable, Cloneable JavaDoc {
22   public static final Logger JavaDoc LOG =
23     LogFormatter.getLogger("net.nutch.searcher.Query");
24
25   /** A query clause. */
26   public static class Clause implements Cloneable JavaDoc {
27     public static final String JavaDoc DEFAULT_FIELD = "DEFAULT";
28
29     private static final byte REQUIRED_BIT = 1;
30     private static final byte PROHIBITED_BIT = 2;
31     private static final byte PHRASE_BIT = 4;
32
33     private boolean isRequired;
34     private boolean isProhibited;
35     private String JavaDoc field = DEFAULT_FIELD;
36     private float weight = 1.0f;
37     private Object JavaDoc termOrPhrase;
38
39     public Clause(Term term, String JavaDoc field,
40                   boolean isRequired, boolean isProhibited) {
41       this(term, isRequired, isProhibited);
42       this.field = field;
43     }
44
45     public Clause(Term term, boolean isRequired, boolean isProhibited) {
46       this.isRequired = isRequired;
47       this.isProhibited = isProhibited;
48       this.termOrPhrase = term;
49     }
50
51     public Clause(Phrase phrase, String JavaDoc field,
52                   boolean isRequired, boolean isProhibited) {
53       this(phrase, isRequired, isProhibited);
54       this.field = field;
55     }
56
57     public Clause(Phrase phrase, boolean isRequired, boolean isProhibited) {
58       this.isRequired = isRequired;
59       this.isProhibited = isProhibited;
60       this.termOrPhrase = phrase;
61     }
62
63     public boolean isRequired() { return isRequired; }
64     public boolean isProhibited() { return isProhibited; }
65
66     public String JavaDoc getField() { return field; }
67
68     public float getWeight() { return weight; }
69     public void setWeight(float weight) { this.weight = weight; }
70
71     public boolean isPhrase() { return termOrPhrase instanceof Phrase; }
72
73     public Phrase getPhrase() { return (Phrase)termOrPhrase; }
74     public Term getTerm() { return (Term)termOrPhrase; }
75
76     public void write(DataOutput JavaDoc out) throws IOException JavaDoc {
77       byte bits = 0;
78       if (isPhrase())
79         bits |= PHRASE_BIT;
80       if (isRequired)
81         bits |= REQUIRED_BIT;
82       if (isProhibited)
83         bits |= PROHIBITED_BIT;
84       out.writeByte(bits);
85       out.writeUTF(field);
86       out.writeFloat(weight);
87       
88       if (isPhrase())
89         getPhrase().write(out);
90       else
91         getTerm().write(out);
92     }
93
94     public static Clause read(DataInput JavaDoc in) throws IOException JavaDoc {
95       byte bits = in.readByte();
96       boolean required = ((bits & REQUIRED_BIT) != 0);
97       boolean prohibited = ((bits & PROHIBITED_BIT) != 0);
98
99       String JavaDoc field = in.readUTF();
100       float weight = in.readFloat();
101
102       Clause clause;
103       if ((bits & PHRASE_BIT) == 0) {
104         clause = new Clause(Term.read(in), field, required, prohibited);
105       } else {
106         clause = new Clause(Phrase.read(in), field, required, prohibited);
107       }
108       clause.weight = weight;
109       return clause;
110     }
111
112     public String JavaDoc toString() {
113       StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
114 // if (isRequired)
115
// buffer.append("+");
116
// else
117
if (isProhibited)
118         buffer.append ("-");
119
120       if (!DEFAULT_FIELD.equals(field)) {
121         buffer.append(field);
122         buffer.append(":");
123       }
124
125       if (!isPhrase() && QueryFilters.isRawField(field)) {
126         buffer.append('"'); // quote raw terms
127
buffer.append(termOrPhrase.toString());
128         buffer.append('"');
129       } else {
130         buffer.append(termOrPhrase.toString());
131       }
132
133       return buffer.toString();
134     }
135
136     public boolean equals(Object JavaDoc o) {
137       if (!(o instanceof Clause)) return false;
138       Clause other = (Clause)o;
139       return
140         (this.isRequired == other.isRequired) &&
141         (this.isProhibited == other.isProhibited) &&
142         (this.weight == other.weight) &&
143         (this.termOrPhrase == null ? other.termOrPhrase == null :
144          this.termOrPhrase.equals(other.termOrPhrase));
145     }
146         
147     public int hashCode() {
148       return
149         (this.isRequired ? 0 : 1) ^
150         (this.isProhibited ? 2 : 4) ^
151         Float.floatToIntBits(this.weight) ^
152         (this.termOrPhrase != null ? termOrPhrase.hashCode() : 0);
153     }
154     
155     public Object JavaDoc clone() {
156       try {
157         return super.clone();
158       } catch (CloneNotSupportedException JavaDoc e) {
159         throw new RuntimeException JavaDoc(e);
160       }
161     }
162   }
163
164   /** A single-term query clause. */
165   public static class Term {
166     private String JavaDoc text;
167
168     public Term(String JavaDoc text) {
169       this.text = text;
170     }
171
172     public void write(DataOutput JavaDoc out) throws IOException JavaDoc {
173       out.writeUTF(text);
174     }
175
176     public static Term read(DataInput JavaDoc in) throws IOException JavaDoc {
177       String JavaDoc text = in.readUTF();
178       return new Term(text);
179     }
180
181     public String JavaDoc toString() {
182       return text;
183     }
184
185     public boolean equals(Object JavaDoc o) {
186       if (!(o instanceof Term)) return false;
187       Term other = (Term)o;
188       return text == null ? other.text == null : text.equals(other.text);
189     }
190
191     public int hashCode() {
192       return text != null ? text.hashCode() : 0;
193     }
194   }
195
196   /** A phrase query clause. */
197   public static class Phrase {
198     private Term[] terms;
199
200     public Phrase(Term[] terms) {
201       this.terms = terms;
202     }
203
204     public Phrase(String JavaDoc[] terms) {
205       this.terms = new Term[terms.length];
206       for (int i = 0; i < terms.length; i++) {
207         this.terms[i] = new Term(terms[i]);
208       }
209     }
210
211     public Term[] getTerms() { return terms; }
212
213     public void write(DataOutput JavaDoc out) throws IOException JavaDoc {
214       out.writeByte(terms.length);
215       for (int i = 0; i < terms.length; i++)
216         terms[i].write(out);
217     }
218
219     public static Phrase read(DataInput JavaDoc in) throws IOException JavaDoc {
220       int length = in.readByte();
221       Term[] terms = new Term[length];
222       for (int i = 0; i < length; i++)
223         terms[i] = Term.read(in);
224       return new Phrase(terms);
225     }
226
227     public String JavaDoc toString() {
228       StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
229       buffer.append("\"");
230       for (int i = 0; i < terms.length; i++) {
231         buffer.append(terms[i].toString());
232         if (i != terms.length-1)
233           buffer.append(" ");
234       }
235       buffer.append("\"");
236       return buffer.toString();
237     }
238
239     public boolean equals(Object JavaDoc o) {
240       if (!(o instanceof Phrase)) return false;
241       Phrase other = (Phrase)o;
242       if (!(this.terms.length == this.terms.length))
243         return false;
244       for (int i = 0; i < terms.length; i++) {
245         if (!this.terms[i].equals(other.terms[i]))
246           return false;
247       }
248       return true;
249     }
250
251     public int hashCode() {
252       int hashCode = terms.length;
253       for (int i = 0; i < terms.length; i++) {
254         hashCode ^= terms[i].hashCode();
255       }
256       return hashCode;
257     }
258
259   }
260
261
262   private ArrayList JavaDoc clauses = new ArrayList JavaDoc();
263
264   private static final Clause[] CLAUSES_PROTO = new Clause[0];
265
266   /** Return all clauses. */
267   public Clause[] getClauses() {
268     return (Clause[])clauses.toArray(CLAUSES_PROTO);
269   }
270
271   /** Add a required term in the default field. */
272   public void addRequiredTerm(String JavaDoc term) {
273     addRequiredTerm(term, Clause.DEFAULT_FIELD);
274   }
275
276   /** Add a required term in a specified field. */
277   public void addRequiredTerm(String JavaDoc term, String JavaDoc field) {
278     clauses.add(new Clause(new Term(term), field, true, false));
279   }
280
281   /** Add a prohibited term in the default field. */
282   public void addProhibitedTerm(String JavaDoc term) {
283     addProhibitedTerm(term, Clause.DEFAULT_FIELD);
284   }
285
286   /** Add a prohibited term in the specified field. */
287   public void addProhibitedTerm(String JavaDoc term, String JavaDoc field) {
288     clauses.add(new Clause(new Term(term), field, false, true));
289   }
290
291   /** Add a required phrase in the default field. */
292   public void addRequiredPhrase(String JavaDoc[] terms) {
293     addRequiredPhrase(terms, Clause.DEFAULT_FIELD);
294   }
295
296   /** Add a required phrase in the specified field. */
297   public void addRequiredPhrase(String JavaDoc[] terms, String JavaDoc field) {
298     if (terms.length == 0) { // ignore empty phrase
299
} else if (terms.length == 1) {
300       addRequiredTerm(terms[0], field); // optimize to term query
301
} else {
302       clauses.add(new Clause(new Phrase(terms), field, true, false));
303     }
304   }
305
306   /** Add a prohibited phrase in the default field. */
307   public void addProhibitedPhrase(String JavaDoc[] terms) {
308     addProhibitedPhrase(terms, Clause.DEFAULT_FIELD);
309   }
310
311   /** Add a prohibited phrase in the specified field. */
312   public void addProhibitedPhrase(String JavaDoc[] terms, String JavaDoc field) {
313     if (terms.length == 0) { // ignore empty phrase
314
} else if (terms.length == 1) {
315       addProhibitedTerm(terms[0], field); // optimize to term query
316
} else {
317       clauses.add(new Clause(new Phrase(terms), field, false, true));
318     }
319   }
320
321   public void write(DataOutput JavaDoc out) throws IOException JavaDoc {
322     out.writeByte(clauses.size());
323     for (int i = 0; i < clauses.size(); i++)
324       ((Clause)clauses.get(i)).write(out);
325   }
326   
327   public static Query read(DataInput JavaDoc in) throws IOException JavaDoc {
328     Query result = new Query();
329     result.readFields(in);
330     return result;
331   }
332
333   public void readFields(DataInput JavaDoc in) throws IOException JavaDoc {
334     clauses.clear();
335     int length = in.readByte();
336     for (int i = 0; i < length; i++)
337       clauses.add(Clause.read(in));
338   }
339
340   public String JavaDoc toString() {
341     StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
342     for (int i = 0; i < clauses.size(); i++) {
343       buffer.append(clauses.get(i).toString());
344       if (i != clauses.size()-1)
345         buffer.append(" ");
346     }
347     return buffer.toString();
348   }
349
350   public boolean equals(Object JavaDoc o) {
351     if (!(o instanceof Query)) return false;
352     Query other = (Query)o;
353     return this.clauses.equals(other.clauses);
354   }
355   
356   public int hashCode() {
357     return this.clauses.hashCode();
358   }
359
360   public Object JavaDoc clone() {
361     Query clone = null;
362     try {
363       clone = (Query)super.clone();
364     } catch (CloneNotSupportedException JavaDoc e) {
365       throw new RuntimeException JavaDoc(e);
366     }
367     clone.clauses = (ArrayList JavaDoc)clauses.clone();
368     return clone;
369   }
370
371
372   /** Flattens a query into the set of text terms that it contains. These are
373    * terms which should be higlighted in matching documents. */

374   public String JavaDoc[] getTerms() {
375     ArrayList JavaDoc result = new ArrayList JavaDoc();
376     for (int i = 0; i < clauses.size(); i++) {
377       Clause clause = (Clause)clauses.get(i);
378       if (!clause.isProhibited()) {
379         if (clause.isPhrase()) {
380           Term[] terms = clause.getPhrase().getTerms();
381           for (int j = 0; j < terms.length; j++) {
382             result.add(terms[j].toString());
383           }
384         } else {
385           result.add(clause.getTerm().toString());
386         }
387       }
388     }
389     return (String JavaDoc[])result.toArray(new String JavaDoc[result.size()]);
390   }
391
392
393   /** Parse a query from a string. */
394   public static Query parse(String JavaDoc queryString) throws IOException JavaDoc {
395     return fixup(NutchAnalysis.parseQuery(queryString));
396   }
397
398   /** Convert clauses in unknown fields to the default field. */
399   private static Query fixup(Query input) {
400     // walk the query
401
Query output = new Query();
402     Clause[] clauses = input.getClauses();
403     for (int i = 0; i < clauses.length; i++) {
404       Clause c = clauses[i];
405       if (!QueryFilters.isField(c.getField())) { // unknown field
406
ArrayList JavaDoc terms = new ArrayList JavaDoc(); // add name to query
407
if (c.isPhrase()) {
408           terms.addAll(Arrays.asList(c.getPhrase().getTerms()));
409         } else {
410           terms.add(c.getTerm());
411         }
412         terms.add(0, new Term(c.getField())); // add to front of phrase
413
c = (Clause)c.clone();
414         c.field = Clause.DEFAULT_FIELD; // use default field instead
415
c.termOrPhrase
416           = new Phrase((Term[])terms.toArray(new Term[terms.size()]));
417       }
418       output.clauses.add(c); // copy clause to output
419
}
420     return output;
421   }
422
423   /** For debugging. */
424   public static void main(String JavaDoc[] args) throws Exception JavaDoc {
425     BufferedReader JavaDoc in = new BufferedReader JavaDoc(new InputStreamReader JavaDoc(System.in));
426     while (true) {
427       System.out.print("Query: ");
428       String JavaDoc line = in.readLine();
429       Query query = parse(line);
430       System.out.println("Parsed: " + query);
431       System.out.println("Translated: " + QueryFilters.filter(query));
432     }
433   }
434 }
435
Popular Tags