1 2 3 4 package net.nutch.searcher; 5 6 import java.io.DataInput ; 7 import java.io.DataOutput ; 8 import java.io.IOException ; 9 import java.io.BufferedReader ; 10 import java.io.InputStreamReader ; 11 import java.util.Arrays ; 12 import java.util.ArrayList ; 13 import java.util.logging.Logger ; 14 15 import net.nutch.util.LogFormatter; 16 import net.nutch.analysis.NutchAnalysis; 17 18 import net.nutch.io.Writable; 19 20 21 public final class Query implements Writable, Cloneable { 22 public static final Logger LOG = 23 LogFormatter.getLogger("net.nutch.searcher.Query"); 24 25 26 public static class Clause implements Cloneable { 27 public static final String DEFAULT_FIELD = "DEFAULT"; 28 29 private static final byte REQUIRED_BIT = 1; 30 private static final byte PROHIBITED_BIT = 2; 31 private static final byte PHRASE_BIT = 4; 32 33 private boolean isRequired; 34 private boolean isProhibited; 35 private String field = DEFAULT_FIELD; 36 private float weight = 1.0f; 37 private Object termOrPhrase; 38 39 public Clause(Term term, String field, 40 boolean isRequired, boolean isProhibited) { 41 this(term, isRequired, isProhibited); 42 this.field = field; 43 } 44 45 public Clause(Term term, boolean isRequired, boolean isProhibited) { 46 this.isRequired = isRequired; 47 this.isProhibited = isProhibited; 48 this.termOrPhrase = term; 49 } 50 51 public Clause(Phrase phrase, String field, 52 boolean isRequired, boolean isProhibited) { 53 this(phrase, isRequired, isProhibited); 54 this.field = field; 55 } 56 57 public Clause(Phrase phrase, boolean isRequired, boolean isProhibited) { 58 this.isRequired = isRequired; 59 this.isProhibited = isProhibited; 60 this.termOrPhrase = phrase; 61 } 62 63 public boolean isRequired() { return isRequired; } 64 public boolean isProhibited() { return isProhibited; } 65 66 public String getField() { return field; } 67 68 public float getWeight() { return weight; } 69 public void setWeight(float weight) { this.weight = weight; } 70 71 public boolean isPhrase() { return termOrPhrase instanceof Phrase; } 72 73 public Phrase getPhrase() { return (Phrase)termOrPhrase; } 74 public Term getTerm() { return (Term)termOrPhrase; } 75 76 public void write(DataOutput out) throws IOException { 77 byte bits = 0; 78 if (isPhrase()) 79 bits |= PHRASE_BIT; 80 if (isRequired) 81 bits |= REQUIRED_BIT; 82 if (isProhibited) 83 bits |= PROHIBITED_BIT; 84 out.writeByte(bits); 85 out.writeUTF(field); 86 out.writeFloat(weight); 87 88 if (isPhrase()) 89 getPhrase().write(out); 90 else 91 getTerm().write(out); 92 } 93 94 public static Clause read(DataInput in) throws IOException { 95 byte bits = in.readByte(); 96 boolean required = ((bits & REQUIRED_BIT) != 0); 97 boolean prohibited = ((bits & PROHIBITED_BIT) != 0); 98 99 String field = in.readUTF(); 100 float weight = in.readFloat(); 101 102 Clause clause; 103 if ((bits & PHRASE_BIT) == 0) { 104 clause = new Clause(Term.read(in), field, required, prohibited); 105 } else { 106 clause = new Clause(Phrase.read(in), field, required, prohibited); 107 } 108 clause.weight = weight; 109 return clause; 110 } 111 112 public String toString() { 113 StringBuffer buffer = new StringBuffer (); 114 if (isProhibited) 118 buffer.append ("-"); 119 120 if (!DEFAULT_FIELD.equals(field)) { 121 buffer.append(field); 122 buffer.append(":"); 123 } 124 125 if (!isPhrase() && QueryFilters.isRawField(field)) { 126 buffer.append('"'); buffer.append(termOrPhrase.toString()); 128 buffer.append('"'); 129 } else { 130 buffer.append(termOrPhrase.toString()); 131 } 132 133 return buffer.toString(); 134 } 135 136 public boolean equals(Object o) { 137 if (!(o instanceof Clause)) return false; 138 Clause other = (Clause)o; 139 return 140 (this.isRequired == other.isRequired) && 141 (this.isProhibited == other.isProhibited) && 142 (this.weight == other.weight) && 143 (this.termOrPhrase == null ? other.termOrPhrase == null : 144 this.termOrPhrase.equals(other.termOrPhrase)); 145 } 146 147 public int hashCode() { 148 return 149 (this.isRequired ? 0 : 1) ^ 150 (this.isProhibited ? 2 : 4) ^ 151 Float.floatToIntBits(this.weight) ^ 152 (this.termOrPhrase != null ? termOrPhrase.hashCode() : 0); 153 } 154 155 public Object clone() { 156 try { 157 return super.clone(); 158 } catch (CloneNotSupportedException e) { 159 throw new RuntimeException (e); 160 } 161 } 162 } 163 164 165 public static class Term { 166 private String text; 167 168 public Term(String text) { 169 this.text = text; 170 } 171 172 public void write(DataOutput out) throws IOException { 173 out.writeUTF(text); 174 } 175 176 public static Term read(DataInput in) throws IOException { 177 String text = in.readUTF(); 178 return new Term(text); 179 } 180 181 public String toString() { 182 return text; 183 } 184 185 public boolean equals(Object o) { 186 if (!(o instanceof Term)) return false; 187 Term other = (Term)o; 188 return text == null ? other.text == null : text.equals(other.text); 189 } 190 191 public int hashCode() { 192 return text != null ? text.hashCode() : 0; 193 } 194 } 195 196 197 public static class Phrase { 198 private Term[] terms; 199 200 public Phrase(Term[] terms) { 201 this.terms = terms; 202 } 203 204 public Phrase(String [] terms) { 205 this.terms = new Term[terms.length]; 206 for (int i = 0; i < terms.length; i++) { 207 this.terms[i] = new Term(terms[i]); 208 } 209 } 210 211 public Term[] getTerms() { return terms; } 212 213 public void write(DataOutput out) throws IOException { 214 out.writeByte(terms.length); 215 for (int i = 0; i < terms.length; i++) 216 terms[i].write(out); 217 } 218 219 public static Phrase read(DataInput in) throws IOException { 220 int length = in.readByte(); 221 Term[] terms = new Term[length]; 222 for (int i = 0; i < length; i++) 223 terms[i] = Term.read(in); 224 return new Phrase(terms); 225 } 226 227 public String toString() { 228 StringBuffer buffer = new StringBuffer (); 229 buffer.append("\""); 230 for (int i = 0; i < terms.length; i++) { 231 buffer.append(terms[i].toString()); 232 if (i != terms.length-1) 233 buffer.append(" "); 234 } 235 buffer.append("\""); 236 return buffer.toString(); 237 } 238 239 public boolean equals(Object o) { 240 if (!(o instanceof Phrase)) return false; 241 Phrase other = (Phrase)o; 242 if (!(this.terms.length == this.terms.length)) 243 return false; 244 for (int i = 0; i < terms.length; i++) { 245 if (!this.terms[i].equals(other.terms[i])) 246 return false; 247 } 248 return true; 249 } 250 251 public int hashCode() { 252 int hashCode = terms.length; 253 for (int i = 0; i < terms.length; i++) { 254 hashCode ^= terms[i].hashCode(); 255 } 256 return hashCode; 257 } 258 259 } 260 261 262 private ArrayList clauses = new ArrayList (); 263 264 private static final Clause[] CLAUSES_PROTO = new Clause[0]; 265 266 267 public Clause[] getClauses() { 268 return (Clause[])clauses.toArray(CLAUSES_PROTO); 269 } 270 271 272 public void addRequiredTerm(String term) { 273 addRequiredTerm(term, Clause.DEFAULT_FIELD); 274 } 275 276 277 public void addRequiredTerm(String term, String field) { 278 clauses.add(new Clause(new Term(term), field, true, false)); 279 } 280 281 282 public void addProhibitedTerm(String term) { 283 addProhibitedTerm(term, Clause.DEFAULT_FIELD); 284 } 285 286 287 public void addProhibitedTerm(String term, String field) { 288 clauses.add(new Clause(new Term(term), field, false, true)); 289 } 290 291 292 public void addRequiredPhrase(String [] terms) { 293 addRequiredPhrase(terms, Clause.DEFAULT_FIELD); 294 } 295 296 297 public void addRequiredPhrase(String [] terms, String field) { 298 if (terms.length == 0) { } else if (terms.length == 1) { 300 addRequiredTerm(terms[0], field); } else { 302 clauses.add(new Clause(new Phrase(terms), field, true, false)); 303 } 304 } 305 306 307 public void addProhibitedPhrase(String [] terms) { 308 addProhibitedPhrase(terms, Clause.DEFAULT_FIELD); 309 } 310 311 312 public void addProhibitedPhrase(String [] terms, String field) { 313 if (terms.length == 0) { } else if (terms.length == 1) { 315 addProhibitedTerm(terms[0], field); } else { 317 clauses.add(new Clause(new Phrase(terms), field, false, true)); 318 } 319 } 320 321 public void write(DataOutput out) throws IOException { 322 out.writeByte(clauses.size()); 323 for (int i = 0; i < clauses.size(); i++) 324 ((Clause)clauses.get(i)).write(out); 325 } 326 327 public static Query read(DataInput in) throws IOException { 328 Query result = new Query(); 329 result.readFields(in); 330 return result; 331 } 332 333 public void readFields(DataInput in) throws IOException { 334 clauses.clear(); 335 int length = in.readByte(); 336 for (int i = 0; i < length; i++) 337 clauses.add(Clause.read(in)); 338 } 339 340 public String toString() { 341 StringBuffer buffer = new StringBuffer (); 342 for (int i = 0; i < clauses.size(); i++) { 343 buffer.append(clauses.get(i).toString()); 344 if (i != clauses.size()-1) 345 buffer.append(" "); 346 } 347 return buffer.toString(); 348 } 349 350 public boolean equals(Object o) { 351 if (!(o instanceof Query)) return false; 352 Query other = (Query)o; 353 return this.clauses.equals(other.clauses); 354 } 355 356 public int hashCode() { 357 return this.clauses.hashCode(); 358 } 359 360 public Object clone() { 361 Query clone = null; 362 try { 363 clone = (Query)super.clone(); 364 } catch (CloneNotSupportedException e) { 365 throw new RuntimeException (e); 366 } 367 clone.clauses = (ArrayList )clauses.clone(); 368 return clone; 369 } 370 371 372 374 public String [] getTerms() { 375 ArrayList result = new ArrayList (); 376 for (int i = 0; i < clauses.size(); i++) { 377 Clause clause = (Clause)clauses.get(i); 378 if (!clause.isProhibited()) { 379 if (clause.isPhrase()) { 380 Term[] terms = clause.getPhrase().getTerms(); 381 for (int j = 0; j < terms.length; j++) { 382 result.add(terms[j].toString()); 383 } 384 } else { 385 result.add(clause.getTerm().toString()); 386 } 387 } 388 } 389 return (String [])result.toArray(new String [result.size()]); 390 } 391 392 393 394 public static Query parse(String queryString) throws IOException { 395 return fixup(NutchAnalysis.parseQuery(queryString)); 396 } 397 398 399 private static Query fixup(Query input) { 400 Query output = new Query(); 402 Clause[] clauses = input.getClauses(); 403 for (int i = 0; i < clauses.length; i++) { 404 Clause c = clauses[i]; 405 if (!QueryFilters.isField(c.getField())) { ArrayList terms = new ArrayList (); if (c.isPhrase()) { 408 terms.addAll(Arrays.asList(c.getPhrase().getTerms())); 409 } else { 410 terms.add(c.getTerm()); 411 } 412 terms.add(0, new Term(c.getField())); c = (Clause)c.clone(); 414 c.field = Clause.DEFAULT_FIELD; c.termOrPhrase 416 = new Phrase((Term[])terms.toArray(new Term[terms.size()])); 417 } 418 output.clauses.add(c); } 420 return output; 421 } 422 423 424 public static void main(String [] args) throws Exception { 425 BufferedReader in = new BufferedReader (new InputStreamReader (System.in)); 426 while (true) { 427 System.out.print("Query: "); 428 String line = in.readLine(); 429 Query query = parse(line); 430 System.out.println("Parsed: " + query); 431 System.out.println("Translated: " + QueryFilters.filter(query)); 432 } 433 } 434 } 435 | Popular Tags |