| 1 2 package org.apache.lucene.queryParser; 3 4 import java.util.Vector ; 5 import java.io.*; 6 import java.text.*; 7 import java.util.*; 8 import org.apache.lucene.index.Term; 9 import org.apache.lucene.analysis.*; 10 import org.apache.lucene.document.*; 11 import org.apache.lucene.search.*; 12 import org.apache.lucene.util.Parameter; 13 14 63 64 public class QueryParser implements QueryParserConstants { 65 66 private static final int CONJ_NONE = 0; 67 private static final int CONJ_AND = 1; 68 private static final int CONJ_OR = 2; 69 70 private static final int MOD_NONE = 0; 71 private static final int MOD_NOT = 10; 72 private static final int MOD_REQ = 11; 73 74 75 public static final int DEFAULT_OPERATOR_OR = 0; 76 77 public static final int DEFAULT_OPERATOR_AND = 1; 78 79 82 public static final Operator AND_OPERATOR = Operator.AND; 83 84 public static final Operator OR_OPERATOR = Operator.OR; 85 86 87 private Operator operator = OR_OPERATOR; 88 89 boolean lowercaseExpandedTerms = true; 90 91 Analyzer analyzer; 92 String field; 93 int phraseSlop = 0; 94 float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; 95 int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; 96 Locale locale = Locale.getDefault(); 97 98 101 static public final class Operator extends Parameter { 102 private Operator(String name) { 103 super(name); 104 } 105 static public final Operator OR = new Operator("OR"); 106 static public final Operator AND = new Operator("AND"); 107 } 108 109 117 static public Query parse(String query, String field, Analyzer analyzer) 118 throws ParseException { 119 QueryParser parser = new QueryParser(field, analyzer); 120 return parser.parse(query); 121 } 122 123 127 public QueryParser(String f, Analyzer a) { 128 this(new FastCharStream(new StringReader(""))); 129 analyzer = a; 130 field = f; 131 } 132 133 137 public Query parse(String query) throws ParseException { 138 ReInit(new FastCharStream(new StringReader(query))); 139 try { 140 return Query(field); 141 } 142 catch (TokenMgrError tme) { 143 throw new ParseException(tme.getMessage()); 144 } 145 catch (BooleanQuery.TooManyClauses tmc) { 146 throw new ParseException("Too many boolean clauses"); 147 } 148 } 149 150 153 public Analyzer getAnalyzer() { 154 return analyzer; 155 } 156 157 160 public String getField() { 161 return field; 162 } 163 164 167 public float getFuzzyMinSim() { 168 return fuzzyMinSim; 169 } 170 171 175 public void setFuzzyMinSim(float fuzzyMinSim) { 176 this.fuzzyMinSim = fuzzyMinSim; 177 } 178 179 183 public int getFuzzyPrefixLength() { 184 return fuzzyPrefixLength; 185 } 186 187 191 public void setFuzzyPrefixLength(int fuzzyPrefixLength) { 192 this.fuzzyPrefixLength = fuzzyPrefixLength; 193 } 194 195 199 public void setPhraseSlop(int phraseSlop) { 200 this.phraseSlop = phraseSlop; 201 } 202 203 206 public int getPhraseSlop() { 207 return phraseSlop; 208 } 209 210 219 public void setOperator(int op) { 220 if (op == DEFAULT_OPERATOR_AND) 221 this.operator = AND_OPERATOR; 222 else if (op == DEFAULT_OPERATOR_OR) 223 this.operator = OR_OPERATOR; 224 else 225 throw new IllegalArgumentException ("Unknown operator " + op); 226 } 227 228 236 public void setDefaultOperator(Operator op) { 237 this.operator = op; 238 } 239 240 245 public int getOperator() { 246 if(operator == AND_OPERATOR) 247 return DEFAULT_OPERATOR_AND; 248 else if(operator == OR_OPERATOR) 249 return DEFAULT_OPERATOR_OR; 250 else 251 throw new IllegalStateException ("Unknown operator " + operator); 252 } 253 254 258 public Operator getDefaultOperator() { 259 return operator; 260 } 261 262 267 public void setLowercaseWildcardTerms(boolean lowercaseExpandedTerms) { 268 this.lowercaseExpandedTerms = lowercaseExpandedTerms; 269 } 270 271 275 public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { 276 this.lowercaseExpandedTerms = lowercaseExpandedTerms; 277 } 278 279 282 public boolean getLowercaseWildcardTerms() { 283 return lowercaseExpandedTerms; 284 } 285 286 289 public boolean getLowercaseExpandedTerms() { 290 return lowercaseExpandedTerms; 291 } 292 293 296 public void setLocale(Locale locale) { 297 this.locale = locale; 298 } 299 300 303 public Locale getLocale() { 304 return locale; 305 } 306 307 protected void addClause(Vector clauses, int conj, int mods, Query q) { 308 boolean required, prohibited; 309 310 if (clauses.size() > 0 && conj == CONJ_AND) { 313 BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); 314 if (!c.isProhibited()) 315 c.setOccur(BooleanClause.Occur.MUST); 316 } 317 318 if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { 319 BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); 324 if (!c.isProhibited()) 325 c.setOccur(BooleanClause.Occur.SHOULD); 326 } 327 328 if (q == null) 331 return; 332 333 if (operator == OR_OPERATOR) { 334 prohibited = (mods == MOD_NOT); 337 required = (mods == MOD_REQ); 338 if (conj == CONJ_AND && !prohibited) { 339 required = true; 340 } 341 } else { 342 prohibited = (mods == MOD_NOT); 345 required = (!prohibited && conj != CONJ_OR); 346 } 347 if (required && !prohibited) 348 clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST)); 349 else if (!required && !prohibited) 350 clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD)); 351 else if (!required && prohibited) 352 clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT)); 353 else 354 throw new RuntimeException ("Clause cannot be both required and prohibited"); 355 } 356 357 364 protected Query getFieldQuery(String field, 365 Analyzer analyzer, 366 String queryText) throws ParseException { 367 return getFieldQuery(field, queryText); 368 } 369 370 373 protected Query getFieldQuery(String field, String queryText) throws ParseException { 374 377 TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); 378 Vector v = new Vector (); 379 org.apache.lucene.analysis.Token t; 380 int positionCount = 0; 381 boolean severalTokensAtSamePosition = false; 382 383 while (true) { 384 try { 385 t = source.next(); 386 } 387 catch (IOException e) { 388 t = null; 389 } 390 if (t == null) 391 break; 392 v.addElement(t); 393 if (t.getPositionIncrement() != 0) 394 positionCount += t.getPositionIncrement(); 395 else 396 severalTokensAtSamePosition = true; 397 } 398 try { 399 source.close(); 400 } 401 catch (IOException e) { 402 } 404 405 if (v.size() == 0) 406 return null; 407 else if (v.size() == 1) { 408 t = (org.apache.lucene.analysis.Token) v.elementAt(0); 409 return new TermQuery(new Term(field, t.termText())); 410 } else { 411 if (severalTokensAtSamePosition) { 412 if (positionCount == 1) { 413 BooleanQuery q = new BooleanQuery(true); 415 for (int i = 0; i < v.size(); i++) { 416 t = (org.apache.lucene.analysis.Token) v.elementAt(i); 417 TermQuery currentQuery = new TermQuery( 418 new Term(field, t.termText())); 419 q.add(currentQuery, BooleanClause.Occur.SHOULD); 420 } 421 return q; 422 } 423 else { 424 MultiPhraseQuery mpq = new MultiPhraseQuery(); 426 mpq.setSlop(phraseSlop); 427 List multiTerms = new ArrayList(); 428 for (int i = 0; i < v.size(); i++) { 429 t = (org.apache.lucene.analysis.Token) v.elementAt(i); 430 if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) { 431 mpq.add((Term[])multiTerms.toArray(new Term[0])); 432 multiTerms.clear(); 433 } 434 multiTerms.add(new Term(field, t.termText())); 435 } 436 mpq.add((Term[])multiTerms.toArray(new Term[0])); 437 return mpq; 438 } 439 } 440 else { 441 PhraseQuery q = new PhraseQuery(); 442 q.setSlop(phraseSlop); 443 for (int i = 0; i < v.size(); i++) { 444 q.add(new Term(field, ((org.apache.lucene.analysis.Token) 445 v.elementAt(i)).termText())); 446 447 } 448 return q; 449 } 450 } 451 } 452 453 460 protected Query getFieldQuery(String field, 461 Analyzer analyzer, 462 String queryText, 463 int slop) throws ParseException { 464 return getFieldQuery(field, queryText, slop); 465 } 466 467 474 protected Query getFieldQuery(String field, String queryText, int slop) 475 throws ParseException { 476 Query query = getFieldQuery(field, queryText); 477 478 if (query instanceof PhraseQuery) { 479 ((PhraseQuery) query).setSlop(slop); 480 } 481 if (query instanceof MultiPhraseQuery) { 482 ((MultiPhraseQuery) query).setSlop(slop); 483 } 484 485 return query; 486 } 487 488 495 protected Query getRangeQuery(String field, 496 Analyzer analyzer, 497 String part1, 498 String part2, 499 boolean inclusive) throws ParseException { 500 return getRangeQuery(field, part1, part2, inclusive); 501 } 502 503 506 protected Query getRangeQuery(String field, 507 String part1, 508 String part2, 509 boolean inclusive) throws ParseException 510 { 511 if (lowercaseExpandedTerms) { 512 part1 = part1.toLowerCase(); 513 part2 = part2.toLowerCase(); 514 } 515 try { 516 DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); 517 df.setLenient(true); 518 Date d1 = df.parse(part1); 519 Date d2 = df.parse(part2); 520 if (inclusive) { 521 Calendar cal = Calendar.getInstance(locale); 525 cal.setTime(d2); 526 cal.set(Calendar.HOUR_OF_DAY, 23); 527 cal.set(Calendar.MINUTE, 59); 528 cal.set(Calendar.SECOND, 59); 529 cal.set(Calendar.MILLISECOND, 999); 530 d2 = cal.getTime(); 531 } 532 part1 = DateField.dateToString(d1); 533 part2 = DateField.dateToString(d2); 534 } 535 catch (Exception e) { } 536 537 return new RangeQuery(new Term(field, part1), 538 new Term(field, part2), 539 inclusive); 540 } 541 542 555 protected Query getBooleanQuery(Vector clauses) throws ParseException { 556 return getBooleanQuery(clauses, false); 557 } 558 559 573 protected Query getBooleanQuery(Vector clauses, boolean disableCoord) 574 throws ParseException 575 { 576 BooleanQuery query = new BooleanQuery(disableCoord); 577 for (int i = 0; i < clauses.size(); i++) { 578 query.add((BooleanClause)clauses.elementAt(i)); 579 } 580 return query; 581 } 582 583 604 protected Query getWildcardQuery(String field, String termStr) throws ParseException 605 { 606 if (lowercaseExpandedTerms) { 607 termStr = termStr.toLowerCase(); 608 } 609 Term t = new Term(field, termStr); 610 return new WildcardQuery(t); 611 } 612 613 636 protected Query getPrefixQuery(String field, String termStr) throws ParseException 637 { 638 if (lowercaseExpandedTerms) { 639 termStr = termStr.toLowerCase(); 640 } 641 Term t = new Term(field, termStr); 642 return new PrefixQuery(t); 643 } 644 645 648 protected Query getFuzzyQuery(String field, String termStr) throws ParseException { 649 return getFuzzyQuery(field, termStr, fuzzyMinSim); 650 } 651 652 663 protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException 664 { 665 if (lowercaseExpandedTerms) { 666 termStr = termStr.toLowerCase(); 667 } 668 Term t = new Term(field, termStr); 669 return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength); 670 } 671 672 676 private String discardEscapeChar(String input) { 677 char[] caSource = input.toCharArray(); 678 char[] caDest = new char[caSource.length]; 679 int j = 0; 680 for (int i = 0; i < caSource.length; i++) { 681 if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { 682 caDest[j++]=caSource[i]; 683 } 684 } 685 return new String (caDest, 0, j); 686 } 687 688 692 public static String escape(String s) { 693 StringBuffer sb = new StringBuffer (); 694 for (int i = 0; i < s.length(); i++) { 695 char c = s.charAt(i); 696 if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' 698 || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' 699 || c == '*' || c == '?') { 700 sb.append('\\'); 701 } 702 sb.append(c); 703 } 704 return sb.toString(); 705 } 706 707 712 public static void main(String [] args) throws Exception { 713 if (args.length == 0) { 714 System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>"); 715 System.exit(0); 716 } 717 QueryParser qp = new QueryParser("field", 718 new org.apache.lucene.analysis.SimpleAnalyzer()); 719 Query q = qp.parse(args[0]); 720 System.out.println(q.toString("field")); 721 } 722 723 final public int Conjunction() throws ParseException { 726 int ret = CONJ_NONE; 727 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 728 case AND: 729 case OR: 730 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 731 case AND: 732 jj_consume_token(AND); 733 ret = CONJ_AND; 734 break; 735 case OR: 736 jj_consume_token(OR); 737 ret = CONJ_OR; 738 break; 739 default: 740 jj_la1[0] = jj_gen; 741 jj_consume_token(-1); 742 throw new ParseException(); 743 } 744 break; 745 default: 746 jj_la1[1] = jj_gen; 747 ; 748 } 749 {if (true) return ret;} 750 throw new Error ("Missing return statement in function"); 751 } 752 753 final public int Modifiers() throws ParseException { 754 int ret = MOD_NONE; 755 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 756 case NOT: 757 case PLUS: 758 case MINUS: 759 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 760 case PLUS: 761 jj_consume_token(PLUS); 762 ret = MOD_REQ; 763 break; 764 case MINUS: 765 jj_consume_token(MINUS); 766 ret = MOD_NOT; 767 break; 768 case NOT: 769 jj_consume_token(NOT); 770 ret = MOD_NOT; 771 break; 772 default: 773 jj_la1[2] = jj_gen; 774 jj_consume_token(-1); 775 throw new ParseException(); 776 } 777 break; 778 default: 779 jj_la1[3] = jj_gen; 780 ; 781 } 782 {if (true) return ret;} 783 throw new Error ("Missing return statement in function"); 784 } 785 786 final public Query Query(String field) throws ParseException { 787 Vector clauses = new Vector (); 788 Query q, firstQuery=null; 789 int conj, mods; 790 mods = Modifiers(); 791 q = Clause(field); 792 addClause(clauses, CONJ_NONE, mods, q); 793 if (mods == MOD_NONE) 794 firstQuery=q; 795 label_1: 796 while (true) { 797 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 798 case AND: 799 case OR: 800 case NOT: 801 case PLUS: 802 case MINUS: 803 case LPAREN: 804 case QUOTED: 805 case TERM: 806 case PREFIXTERM: 807 case WILDTERM: 808 case RANGEIN_START: 809 case RANGEEX_START: 810 case NUMBER: 811 ; 812 break; 813 default: 814 jj_la1[4] = jj_gen; 815 break label_1; 816 } 817 conj = Conjunction(); 818 mods = Modifiers(); 819 q = Clause(field); 820 addClause(clauses, conj, mods, q); 821 } 822 if (clauses.size() == 1 && firstQuery != null) 823 {if (true) return firstQuery;} 824 else { 825 {if (true) return getBooleanQuery(clauses);} 826 } 827 throw new Error ("Missing return statement in function"); 828 } 829 830 final public Query Clause(String field) throws ParseException { 831 Query q; 832 Token fieldToken=null, boost=null; 833 if (jj_2_1(2)) { 834 fieldToken = jj_consume_token(TERM); 835 jj_consume_token(COLON); 836 field=discardEscapeChar(fieldToken.image); 837 } else { 838 ; 839 } 840 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 841 case QUOTED: 842 case TERM: 843 case PREFIXTERM: 844 case WILDTERM: 845 case RANGEIN_START: 846 case RANGEEX_START: 847 case NUMBER: 848 q = Term(field); 849 break; 850 case LPAREN: 851 jj_consume_token(LPAREN); 852 q = Query(field); 853 jj_consume_token(RPAREN); 854 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 855 case CARAT: 856 jj_consume_token(CARAT); 857 boost = jj_consume_token(NUMBER); 858 break; 859 default: 860 jj_la1[5] = jj_gen; 861 ; 862 } 863 break; 864 default: 865 jj_la1[6] = jj_gen; 866 jj_consume_token(-1); 867 throw new ParseException(); 868 } 869 if (boost != null) { 870 float f = (float)1.0; 871 try { 872 f = Float.valueOf(boost.image).floatValue(); 873 q.setBoost(f); 874 } catch (Exception ignored) { } 875 } 876 {if (true) return q;} 877 throw new Error ("Missing return statement in function"); 878 } 879 880 final public Query Term(String field) throws ParseException { 881 Token term, boost=null, fuzzySlop=null, goop1, goop2; 882 boolean prefix = false; 883 boolean wildcard = false; 884 boolean fuzzy = false; 885 boolean rangein = false; 886 Query q; 887 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 888 case TERM: 889 case PREFIXTERM: 890 case WILDTERM: 891 case NUMBER: 892 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 893 case TERM: 894 term = jj_consume_token(TERM); 895 break; 896 case PREFIXTERM: 897 term = jj_consume_token(PREFIXTERM); 898 prefix=true; 899 break; 900 case WILDTERM: 901 term = jj_consume_token(WILDTERM); 902 wildcard=true; 903 break; 904 case NUMBER: 905 term = jj_consume_token(NUMBER); 906 break; 907 default: 908 jj_la1[7] = jj_gen; 909 jj_consume_token(-1); 910 throw new ParseException(); 911 } 912 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 913 case FUZZY_SLOP: 914 fuzzySlop = jj_consume_token(FUZZY_SLOP); 915 fuzzy=true; 916 break; 917 default: 918 jj_la1[8] = jj_gen; 919 ; 920 } 921 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 922 case CARAT: 923 jj_consume_token(CARAT); 924 boost = jj_consume_token(NUMBER); 925 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { 926 case FUZZY_SLOP: 927 fuzzySlop = jj_consume_token(FUZZY_SLOP); 928 fuzzy=true; 929 break; 930 default: 931 jj_la1[9] = jj_gen; 932 ; 933 } 934 break; 935 default: 936 jj_la1[10] = jj_gen; 937 ; 938 } 939 String termImage=discardEscapeChar(term.image); 940 if (wildcard) { 941 q = getWildcardQuery(field, termImage); 942 } else if (prefix) { 943 q = getPrefixQuery(field, 944 discardEscapeChar(term.image.substring 945 (0, term.image.length()-1))); 946 } else if (fuzzy) { 947 float fms = fuzzyMinSim; 948 try { 949 &nbs
|