QueryParser


1   /* Generated By:JavaCC: Do not edit this line. QueryParser.java */
2   package org.apache.lucene.queryParser;
3   
4   import java.util.Vector  ;
5   import java.io.*;
6   import java.text.*;
7   import java.util.*;
8   import org.apache.lucene.index.Term;
9   import org.apache.lucene.analysis.*;
10  import org.apache.lucene.document.*;
11  import org.apache.lucene.search.*;
12  import org.apache.lucene.util.Parameter;
13  
14  /**
15   * This class is generated by JavaCC.  The most important method is
16   * {@link #parse(String)}.
17   *
18   * The syntax for query strings is as follows:
19   * A Query is a series of clauses.
20   * A clause may be prefixed by:
21   * <ul>
22   * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
23   * that the clause is required or prohibited respectively; or
24   * <li> a term followed by a colon, indicating the field to be searched.
25   * This enables one to construct queries which search multiple fields.
26   * </ul>
27   *
28   * A clause may be either:
29   * <ul>
30   * <li> a term, indicating all the documents that contain this term; or
31   * <li> a nested query, enclosed in parentheses.  Note that this may be used
32   * with a <code>+</code>/<code>-</code> prefix to require any of a set of
33   * terms.
34   * </ul>
35   *
36   * Thus, in BNF, the query grammar is:
37   * <pre>
38   *   Query  ::= ( Clause )*
39   *   Clause ::= ["+", "-"] [&lt;TERM&gt; ":"] ( &lt;TERM&gt; | "(" Query ")" )
40   * </pre>
41   *
42   * <p>
43   * Examples of appropriately formatted queries can be found in the <a
44   * HREF="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax
45   * documentation</a>.
46   * </p>
47   *
48   * <p>In {@link RangeQuery}s, QueryParser tries to detect date values, e.g. <tt>date:[6/1/2005 TO 6/4/2005]</tt>
49   * produces a range query that searches for "date" fields between 2005-06-01 and 2005-06-04. Note
50   * that the format of the accpeted input depends on {@link #setLocale(Locale) the locale}. This
51   * feature also assumes that your index uses the {@link DateField} class to store dates.
52   * If you use a different format (e.g. {@link DateTools}) and you still want QueryParser
53   * to turn local dates in range queries into valid queries you need to create your own
54   * query parser that inherits QueryParser and overwrites
55   * {@link #getRangeQuery(String, String, String, boolean)}.</p>
56   *
57   * <p>Note that QueryParser is <em>not</em> thread-safe.</p>
58   *
59   * @author Brian Goetz
60   * @author Peter Halacsy
61   * @author Tatu Saloranta
62   */
63  
64  public class QueryParser implements QueryParserConstants {
65  
66    private static final int CONJ_NONE   = 0;
67    private static final int CONJ_AND    = 1;
68    private static final int CONJ_OR     = 2;
69  
70    private static final int MOD_NONE    = 0;
71    private static final int MOD_NOT     = 10;
72    private static final int MOD_REQ     = 11;
73  
74    /** @deprecated use {@link #OR_OPERATOR} instead */
75    public static final int DEFAULT_OPERATOR_OR  = 0;
76    /** @deprecated use {@link #AND_OPERATOR} instead */
77    public static final int DEFAULT_OPERATOR_AND = 1;
78  
79    // make it possible to call setDefaultOperator() without accessing 
80    // the nested class:
81    /** Alternative form of QueryParser.Operator.AND */
82    public static final Operator AND_OPERATOR = Operator.AND;
83    /** Alternative form of QueryParser.Operator.OR */
84    public static final Operator OR_OPERATOR = Operator.OR;
85  
86    /** The actual operator that parser uses to combine query terms */
87    private Operator operator = OR_OPERATOR;
88  
89    boolean lowercaseExpandedTerms = true;
90  
91    Analyzer analyzer;
92    String   field;
93    int phraseSlop = 0;
94    float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
95    int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
96    Locale locale = Locale.getDefault();
97  
98    /** The default operator for parsing queries. 
99     * Use {@link QueryParser#setDefaultOperator} to change it.
100    */
101   static public final class Operator extends Parameter {
102     private Operator(String   name) {
103       super(name);
104     }
105     static public final Operator OR = new Operator("OR");
106     static public final Operator AND = new Operator("AND");
107   }
108 
109   /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
110    *  @param query  the query string to be parsed.
111    *  @param field  the default field for query terms.
112    *  @param analyzer   used to find terms in the query text.
113    *  @throws ParseException if the parsing fails
114    *
115    *  @deprecated Use an instance of QueryParser and the {@link #parse(String)} method instead.
116    */
117   static public Query parse(String   query, String   field, Analyzer analyzer)
118        throws ParseException {
119     QueryParser parser = new QueryParser(field, analyzer);
120     return parser.parse(query);
121   }
122 
123   /** Constructs a query parser.
124    *  @param f  the default field for query terms.
125    *  @param a   used to find terms in the query text.
126    */
127   public QueryParser(String   f, Analyzer a) {
128     this(new FastCharStream(new StringReader("")));
129     analyzer = a;
130     field = f;
131   }
132 
133   /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
134    *  @param query  the query string to be parsed.
135    *  @throws ParseException if the parsing fails
136    */
137   public Query parse(String   query) throws ParseException {
138     ReInit(new FastCharStream(new StringReader(query)));
139     try {
140       return Query(field);
141     }
142     catch (TokenMgrError tme) {
143       throw new ParseException(tme.getMessage());
144     }
145     catch (BooleanQuery.TooManyClauses tmc) {
146       throw new ParseException("Too many boolean clauses");
147     }
148   }
149 
150    /**
151    * @return Returns the analyzer.
152    */
153   public Analyzer getAnalyzer() {
154     return analyzer;
155   }
156 
157   /**
158    * @return Returns the field.
159    */
160   public String   getField() {
161     return field;
162   }
163 
164    /**
165    * Get the minimal similarity for fuzzy queries.
166    */
167   public float getFuzzyMinSim() {
168       return fuzzyMinSim;
169   }
170 
171   /**
172    * Set the minimum similarity for fuzzy queries.
173    * Default is 0.5f.
174    */
175   public void setFuzzyMinSim(float fuzzyMinSim) {
176       this.fuzzyMinSim = fuzzyMinSim;
177   }
178 
179    /**
180    * Get the prefix length for fuzzy queries. 
181    * @return Returns the fuzzyPrefixLength.
182    */
183   public int getFuzzyPrefixLength() {
184     return fuzzyPrefixLength;
185   }
186 
187   /**
188    * Set the prefix length for fuzzy queries. Default is 0.
189    * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
190    */
191   public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
192     this.fuzzyPrefixLength = fuzzyPrefixLength;
193   }
194 
195   /**
196    * Sets the default slop for phrases.  If zero, then exact phrase matches
197    * are required.  Default value is zero.
198    */
199   public void setPhraseSlop(int phraseSlop) {
200     this.phraseSlop = phraseSlop;
201   }
202 
203   /**
204    * Gets the default slop for phrases.
205    */
206   public int getPhraseSlop() {
207     return phraseSlop;
208   }
209 
210   /**
211    * Sets the boolean operator of the QueryParser.
212    * In default mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers
213    * are considered optional: for example <code>capital of Hungary</code> is equal to
214    * <code>capital OR of OR Hungary</code>.<br/>
215    * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
216    * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
217    * @deprecated use {@link #setDefaultOperator(QueryParser.Operator)} instead
218    */
219   public void setOperator(int op) {
220     if (op == DEFAULT_OPERATOR_AND)
221       this.operator = AND_OPERATOR;
222     else if (op == DEFAULT_OPERATOR_OR)
223       this.operator = OR_OPERATOR;
224     else
225       throw new IllegalArgumentException  ("Unknown operator " + op);
226   }
227 
228   /**
229    * Sets the boolean operator of the QueryParser.
230    * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
231    * are considered optional: for example <code>capital of Hungary</code> is equal to
232    * <code>capital OR of OR Hungary</code>.<br/>
233    * In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the
234    * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
235    */
236   public void setDefaultOperator(Operator op) {
237     this.operator = op;
238   }
239 
240   /**
241    * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND
242    * or DEFAULT_OPERATOR_OR.
243    * @deprecated use {@link #getDefaultOperator()} instead
244    */
245   public int getOperator() {
246     if(operator == AND_OPERATOR)
247       return DEFAULT_OPERATOR_AND;
248     else if(operator == OR_OPERATOR)
249       return DEFAULT_OPERATOR_OR;
250     else
251       throw new IllegalStateException  ("Unknown operator " + operator);
252   }
253 
254   /**
255    * Gets implicit operator setting, which will be either AND_OPERATOR
256    * or OR_OPERATOR.
257    */
258   public Operator getDefaultOperator() {
259     return operator;
260   }
261 
262   /**
263    * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
264    * lower-cased or not.  Default is <code>true</code>.
265    * @deprecated use {@link #setLowercaseExpandedTerms(boolean)} instead
266    */
267   public void setLowercaseWildcardTerms(boolean lowercaseExpandedTerms) {
268     this.lowercaseExpandedTerms = lowercaseExpandedTerms;
269   }
270 
271   /**
272    * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
273    * lower-cased or not.  Default is <code>true</code>.
274    */
275   public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
276     this.lowercaseExpandedTerms = lowercaseExpandedTerms;
277   }
278 
279   /**
280    * @deprecated use {@link #getLowercaseExpandedTerms()} instead
281    */
282   public boolean getLowercaseWildcardTerms() {
283     return lowercaseExpandedTerms;
284   }
285 
286   /**
287    * @see #setLowercaseExpandedTerms(boolean)
288    */
289   public boolean getLowercaseExpandedTerms() {
290     return lowercaseExpandedTerms;
291   }
292 
293   /**
294    * Set locale used by date range parsing.
295    */
296   public void setLocale(Locale locale) {
297     this.locale = locale;
298   }
299 
300   /**
301    * Returns current locale, allowing access by subclasses.
302    */
303   public Locale getLocale() {
304     return locale;
305   }
306 
307   protected void addClause(Vector   clauses, int conj, int mods, Query q) {
308     boolean required, prohibited;
309 
310     // If this term is introduced by AND, make the preceding term required,
311     // unless it's already prohibited
312     if (clauses.size() > 0 && conj == CONJ_AND) {
313       BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
314       if (!c.isProhibited())
315         c.setOccur(BooleanClause.Occur.MUST);
316     }
317 
318     if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
319       // If this term is introduced by OR, make the preceding term optional,
320       // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
321       // notice if the input is a OR b, first term is parsed as required; without
322       // this modification a OR b would parsed as +a OR b
323       BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
324       if (!c.isProhibited())
325         c.setOccur(BooleanClause.Occur.SHOULD);
326     }
327 
328     // We might have been passed a null query; the term might have been
329     // filtered away by the analyzer.
330     if (q == null)
331       return;
332 
333     if (operator == OR_OPERATOR) {
334       // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
335       // introduced by NOT or -; make sure not to set both.
336       prohibited = (mods == MOD_NOT);
337       required = (mods == MOD_REQ);
338       if (conj == CONJ_AND && !prohibited) {
339         required = true;
340       }
341     } else {
342       // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
343       // if not PROHIBITED and not introduced by OR
344       prohibited = (mods == MOD_NOT);
345       required   = (!prohibited && conj != CONJ_OR);
346     }
347     if (required && !prohibited)
348       clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
349     else if (!required && !prohibited)
350       clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD));
351     else if (!required && prohibited)
352       clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
353     else
354       throw new RuntimeException  ("Clause cannot be both required and prohibited");
355   }
356 
357   /**
358    * Note that parameter analyzer is ignored. Calls inside the parser always
359    * use class member analyzer.
360    *
361    * @exception ParseException throw in overridden method to disallow
362    * @deprecated use {@link #getFieldQuery(String, String)}
363    */
364   protected Query getFieldQuery(String   field,
365                                                     Analyzer analyzer,
366                                                     String   queryText)  throws ParseException {
367     return getFieldQuery(field, queryText);
368   }
369 
370   /**
371    * @exception ParseException throw in overridden method to disallow
372    */
373   protected Query getFieldQuery(String   field, String   queryText)  throws ParseException {
374     // Use the analyzer to get all the tokens, and then build a TermQuery,
375     // PhraseQuery, or nothing based on the term count
376 
377     TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
378     Vector   v = new Vector  ();
379     org.apache.lucene.analysis.Token t;
380     int positionCount = 0;
381     boolean severalTokensAtSamePosition = false;
382 
383     while (true) {
384       try {
385         t = source.next();
386       }
387       catch (IOException e) {
388         t = null;
389       }
390       if (t == null)
391         break;
392       v.addElement(t);
393       if (t.getPositionIncrement() != 0)
394         positionCount += t.getPositionIncrement();
395       else
396         severalTokensAtSamePosition = true;
397     }
398     try {
399       source.close();
400     }
401     catch (IOException e) {
402       // ignore
403     }
404 
405     if (v.size() == 0)
406       return null;
407     else if (v.size() == 1) {
408       t = (org.apache.lucene.analysis.Token) v.elementAt(0);
409       return new TermQuery(new Term(field, t.termText()));
410     } else {
411       if (severalTokensAtSamePosition) {
412         if (positionCount == 1) {
413           // no phrase query:
414           BooleanQuery q = new BooleanQuery(true);
415           for (int i = 0; i < v.size(); i++) {
416             t = (org.apache.lucene.analysis.Token) v.elementAt(i);
417             TermQuery currentQuery = new TermQuery(
418                 new Term(field, t.termText()));
419             q.add(currentQuery, BooleanClause.Occur.SHOULD);
420           }
421           return q;
422         }
423         else {
424           // phrase query:
425           MultiPhraseQuery mpq = new MultiPhraseQuery();
426           mpq.setSlop(phraseSlop);
427           List multiTerms = new ArrayList();
428           for (int i = 0; i < v.size(); i++) {
429             t = (org.apache.lucene.analysis.Token) v.elementAt(i);
430             if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
431               mpq.add((Term[])multiTerms.toArray(new Term[0]));
432               multiTerms.clear();
433             }
434             multiTerms.add(new Term(field, t.termText()));
435           }
436           mpq.add((Term[])multiTerms.toArray(new Term[0]));
437           return mpq;
438         }
439       }
440       else {
441         PhraseQuery q = new PhraseQuery();
442         q.setSlop(phraseSlop);
443         for (int i = 0; i < v.size(); i++) {
444           q.add(new Term(field, ((org.apache.lucene.analysis.Token)
445               v.elementAt(i)).termText()));
446 
447         }
448         return q;
449       }
450     }
451   }
452 
453   /**
454    * Note that parameter analyzer is ignored. Calls inside the parser always
455    * use class member analyzer.
456    *
457    * @exception ParseException throw in overridden method to disallow
458    * @deprecated use {@link #getFieldQuery(String, String, int)}
459    */
460   protected Query getFieldQuery(String   field,
461                                                     Analyzer analyzer,
462                                                     String   queryText,
463                                                     int slop) throws ParseException {
464     return getFieldQuery(field, queryText, slop);
465   }
466 
467   /**
468    * Base implementation delegates to {@link #getFieldQuery(String,String)}.
469    * This method may be overridden, for example, to return
470    * a SpanNearQuery instead of a PhraseQuery.
471    *
472    * @exception ParseException throw in overridden method to disallow
473    */
474   protected Query getFieldQuery(String   field, String   queryText, int slop)
475         throws ParseException {
476     Query query = getFieldQuery(field, queryText);
477 
478     if (query instanceof PhraseQuery) {
479       ((PhraseQuery) query).setSlop(slop);
480     }
481     if (query instanceof MultiPhraseQuery) {
482       ((MultiPhraseQuery) query).setSlop(slop);
483     }
484 
485     return query;
486   }
487 
488   /**
489    * Note that parameter analyzer is ignored. Calls inside the parser always
490    * use class member analyzer.
491    *
492    * @exception ParseException throw in overridden method to disallow
493    * @deprecated use {@link #getRangeQuery(String, String, String, boolean)}
494    */
495   protected Query getRangeQuery(String   field,
496       Analyzer analyzer,
497       String   part1,
498       String   part2,
499       boolean inclusive) throws ParseException {
500     return getRangeQuery(field, part1, part2, inclusive);
501   }
502 
503   /**
504    * @exception ParseException throw in overridden method to disallow
505    */
506   protected Query getRangeQuery(String   field,
507                                 String   part1,
508                                 String   part2,
509                                 boolean inclusive) throws ParseException
510   {
511     if (lowercaseExpandedTerms) {
512       part1 = part1.toLowerCase();
513       part2 = part2.toLowerCase();
514     }
515     try {
516       DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
517       df.setLenient(true);
518       Date d1 = df.parse(part1);
519       Date d2 = df.parse(part2);
520       if (inclusive) {
521         // The user can only specify the date, not the time, so make sure
522         // the time is set to the latest possible time of that date to really
523         // include all documents:
524         Calendar cal = Calendar.getInstance(locale);
525         cal.setTime(d2);
526         cal.set(Calendar.HOUR_OF_DAY, 23);
527         cal.set(Calendar.MINUTE, 59);
528         cal.set(Calendar.SECOND, 59);
529         cal.set(Calendar.MILLISECOND, 999);
530         d2 = cal.getTime();
531       }
532       part1 = DateField.dateToString(d1);
533       part2 = DateField.dateToString(d2);
534     }
535     catch (Exception   e) { }
536 
537     return new RangeQuery(new Term(field, part1),
538                           new Term(field, part2),
539                           inclusive);
540   }
541 
542   /**
543    * Factory method for generating query, given a set of clauses.
544    * By default creates a boolean query composed of clauses passed in.
545    *
546    * Can be overridden by extending classes, to modify query being
547    * returned.
548    *
549    * @param clauses Vector that contains {@link BooleanClause} instances
550    *    to join.
551    *
552    * @return Resulting {@link Query} object.
553    * @exception ParseException throw in overridden method to disallow
554    */
555   protected Query getBooleanQuery(Vector   clauses) throws ParseException {
556     return getBooleanQuery(clauses, false);
557   }
558 
559   /**
560    * Factory method for generating query, given a set of clauses.
561    * By default creates a boolean query composed of clauses passed in.
562    *
563    * Can be overridden by extending classes, to modify query being
564    * returned.
565    *
566    * @param clauses Vector that contains {@link BooleanClause} instances
567    *    to join.
568    * @param disableCoord true if coord scoring should be disabled.
569    *
570    * @return Resulting {@link Query} object.
571    * @exception ParseException throw in overridden method to disallow
572    */
573   protected Query getBooleanQuery(Vector   clauses, boolean disableCoord)
574     throws ParseException
575   {
576     BooleanQuery query = new BooleanQuery(disableCoord);
577     for (int i = 0; i < clauses.size(); i++) {
578   query.add((BooleanClause)clauses.elementAt(i));
579     }
580     return query;
581   }
582 
583   /**
584    * Factory method for generating a query. Called when parser
585    * parses an input term token that contains one or more wildcard
586    * characters (? and *), but is not a prefix term token (one
587    * that has just a single * character at the end)
588    *<p>
589    * Depending on settings, prefix term may be lower-cased
590    * automatically. It will not go through the default Analyzer,
591    * however, since normal Analyzers are unlikely to work properly
592    * with wildcard templates.
593    *<p>
594    * Can be overridden by extending classes, to provide custom handling for
595    * wildcard queries, which may be necessary due to missing analyzer calls.
596    *
597    * @param field Name of the field query will use.
598    * @param termStr Term token that contains one or more wild card
599    *   characters (? or *), but is not simple prefix term
600    *
601    * @return Resulting {@link Query} built for the term
602    * @exception ParseException throw in overridden method to disallow
603    */
604   protected Query getWildcardQuery(String   field, String   termStr) throws ParseException
605   {
606     if (lowercaseExpandedTerms) {
607       termStr = termStr.toLowerCase();
608     }
609     Term t = new Term(field, termStr);
610     return new WildcardQuery(t);
611   }
612 
613   /**
614    * Factory method for generating a query (similar to
615    * {@link #getWildcardQuery}). Called when parser parses an input term
616    * token that uses prefix notation; that is, contains a single '*' wildcard
617    * character as its last character. Since this is a special case
618    * of generic wildcard term, and such a query can be optimized easily,
619    * this usually results in a different query object.
620    *<p>
621    * Depending on settings, a prefix term may be lower-cased
622    * automatically. It will not go through the default Analyzer,
623    * however, since normal Analyzers are unlikely to work properly
624    * with wildcard templates.
625    *<p>
626    * Can be overridden by extending classes, to provide custom handling for
627    * wild card queries, which may be necessary due to missing analyzer calls.
628    *
629    * @param field Name of the field query will use.
630    * @param termStr Term token to use for building term for the query
631    *    (<b>without</b> trailing '*' character!)
632    *
633    * @return Resulting {@link Query} built for the term
634    * @exception ParseException throw in overridden method to disallow
635    */
636   protected Query getPrefixQuery(String   field, String   termStr) throws ParseException
637   {
638     if (lowercaseExpandedTerms) {
639       termStr = termStr.toLowerCase();
640     }
641     Term t = new Term(field, termStr);
642     return new PrefixQuery(t);
643   }
644 
645  /**
646    * @deprecated use {@link #getFuzzyQuery(String, String, float)}
647    */
648   protected Query getFuzzyQuery(String   field, String   termStr) throws ParseException {
649     return getFuzzyQuery(field, termStr, fuzzyMinSim);
650   }
651 
652    /**
653    * Factory method for generating a query (similar to
654    * {@link #getWildcardQuery}). Called when parser parses
655    * an input term token that has the fuzzy suffix (~) appended.
656    *
657    * @param field Name of the field query will use.
658    * @param termStr Term token to use for building term for the query
659    *
660    * @return Resulting {@link Query} built for the term
661    * @exception ParseException throw in overridden method to disallow
662    */
663   protected Query getFuzzyQuery(String   field, String   termStr, float minSimilarity) throws ParseException
664   {
665     if (lowercaseExpandedTerms) {
666       termStr = termStr.toLowerCase();
667     }
668     Term t = new Term(field, termStr);
669     return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
670   }
671 
672   /**
673    * Returns a String where the escape char has been
674    * removed, or kept only once if there was a double escape.
675    */
676   private String   discardEscapeChar(String   input) {
677     char[] caSource = input.toCharArray();
678     char[] caDest = new char[caSource.length];
679     int j = 0;
680     for (int i = 0; i < caSource.length; i++) {
681       if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
682         caDest[j++]=caSource[i];
683       }
684     }
685     return new String  (caDest, 0, j);
686   }
687 
688   /**
689    * Returns a String where those characters that QueryParser
690    * expects to be escaped are escaped by a preceding <code>\</code>.
691    */
692   public static String   escape(String   s) {
693     StringBuffer   sb = new StringBuffer  ();
694     for (int i = 0; i < s.length(); i++) {
695       char c = s.charAt(i);
696       // NOTE: keep this in sync with _ESCAPED_CHAR below!
697       if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
698         || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
699         || c == '*' || c == '?') {
700         sb.append('\\');
701       }
702       sb.append(c);
703     }
704     return sb.toString();
705   }
706 
707   /**
708    * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
709    * Usage:<br>
710    * <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
711    */
712   public static void main(String  [] args) throws Exception   {
713     if (args.length == 0) {
714       System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
715       System.exit(0);
716     }
717     QueryParser qp = new QueryParser("field",
718                            new org.apache.lucene.analysis.SimpleAnalyzer());
719     Query q = qp.parse(args[0]);
720     System.out.println(q.toString("field"));
721   }
722 
723 // *   Query  ::= ( Clause )*
724 // *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
725   final public int Conjunction() throws ParseException {
726   int ret = CONJ_NONE;
727     switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
728     case AND:
729     case OR:
730       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
731       case AND:
732         jj_consume_token(AND);
733             ret = CONJ_AND;
734         break;
735       case OR:
736         jj_consume_token(OR);
737               ret = CONJ_OR;
738         break;
739       default:
740         jj_la1[0] = jj_gen;
741         jj_consume_token(-1);
742         throw new ParseException();
743       }
744       break;
745     default:
746       jj_la1[1] = jj_gen;
747       ;
748     }
749     {if (true) return ret;}
750     throw new Error  ("Missing return statement in function");
751   }
752 
753   final public int Modifiers() throws ParseException {
754   int ret = MOD_NONE;
755     switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
756     case NOT:
757     case PLUS:
758     case MINUS:
759       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
760       case PLUS:
761         jj_consume_token(PLUS);
762               ret = MOD_REQ;
763         break;
764       case MINUS:
765         jj_consume_token(MINUS);
766                  ret = MOD_NOT;
767         break;
768       case NOT:
769         jj_consume_token(NOT);
770                ret = MOD_NOT;
771         break;
772       default:
773         jj_la1[2] = jj_gen;
774         jj_consume_token(-1);
775         throw new ParseException();
776       }
777       break;
778     default:
779       jj_la1[3] = jj_gen;
780       ;
781     }
782     {if (true) return ret;}
783     throw new Error  ("Missing return statement in function");
784   }
785 
786   final public Query Query(String   field) throws ParseException {
787   Vector   clauses = new Vector  ();
788   Query q, firstQuery=null;
789   int conj, mods;
790     mods = Modifiers();
791     q = Clause(field);
792     addClause(clauses, CONJ_NONE, mods, q);
793     if (mods == MOD_NONE)
794         firstQuery=q;
795     label_1:
796     while (true) {
797       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
798       case AND:
799       case OR:
800       case NOT:
801       case PLUS:
802       case MINUS:
803       case LPAREN:
804       case QUOTED:
805       case TERM:
806       case PREFIXTERM:
807       case WILDTERM:
808       case RANGEIN_START:
809       case RANGEEX_START:
810       case NUMBER:
811         ;
812         break;
813       default:
814         jj_la1[4] = jj_gen;
815         break label_1;
816       }
817       conj = Conjunction();
818       mods = Modifiers();
819       q = Clause(field);
820       addClause(clauses, conj, mods, q);
821     }
822       if (clauses.size() == 1 && firstQuery != null)
823         {if (true) return firstQuery;}
824       else {
825   {if (true) return getBooleanQuery(clauses);}
826       }
827     throw new Error  ("Missing return statement in function");
828   }
829 
830   final public Query Clause(String   field) throws ParseException {
831   Query q;
832   Token fieldToken=null, boost=null;
833     if (jj_2_1(2)) {
834       fieldToken = jj_consume_token(TERM);
835       jj_consume_token(COLON);
836       field=discardEscapeChar(fieldToken.image);
837     } else {
838       ;
839     }
840     switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
841     case QUOTED:
842     case TERM:
843     case PREFIXTERM:
844     case WILDTERM:
845     case RANGEIN_START:
846     case RANGEEX_START:
847     case NUMBER:
848       q = Term(field);
849       break;
850     case LPAREN:
851       jj_consume_token(LPAREN);
852       q = Query(field);
853       jj_consume_token(RPAREN);
854       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
855       case CARAT:
856         jj_consume_token(CARAT);
857         boost = jj_consume_token(NUMBER);
858         break;
859       default:
860         jj_la1[5] = jj_gen;
861         ;
862       }
863       break;
864     default:
865       jj_la1[6] = jj_gen;
866       jj_consume_token(-1);
867       throw new ParseException();
868     }
869       if (boost != null) {
870         float f = (float)1.0;
871   try {
872     f = Float.valueOf(boost.image).floatValue();
873           q.setBoost(f);
874   } catch (Exception   ignored) { }
875       }
876       {if (true) return q;}
877     throw new Error  ("Missing return statement in function");
878   }
879 
880   final public Query Term(String   field) throws ParseException {
881   Token term, boost=null, fuzzySlop=null, goop1, goop2;
882   boolean prefix = false;
883   boolean wildcard = false;
884   boolean fuzzy = false;
885   boolean rangein = false;
886   Query q;
887     switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
888     case TERM:
889     case PREFIXTERM:
890     case WILDTERM:
891     case NUMBER:
892       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
893       case TERM:
894         term = jj_consume_token(TERM);
895         break;
896       case PREFIXTERM:
897         term = jj_consume_token(PREFIXTERM);
898                              prefix=true;
899         break;
900       case WILDTERM:
901         term = jj_consume_token(WILDTERM);
902                            wildcard=true;
903         break;
904       case NUMBER:
905         term = jj_consume_token(NUMBER);
906         break;
907       default:
908         jj_la1[7] = jj_gen;
909         jj_consume_token(-1);
910         throw new ParseException();
911       }
912       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
913       case FUZZY_SLOP:
914         fuzzySlop = jj_consume_token(FUZZY_SLOP);
915                                 fuzzy=true;
916         break;
917       default:
918         jj_la1[8] = jj_gen;
919         ;
920       }
921       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
922       case CARAT:
923         jj_consume_token(CARAT);
924         boost = jj_consume_token(NUMBER);
925         switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
926         case FUZZY_SLOP:
927           fuzzySlop = jj_consume_token(FUZZY_SLOP);
928                                                          fuzzy=true;
929           break;
930         default:
931           jj_la1[9] = jj_gen;
932           ;
933         }
934         break;
935       default:
936         jj_la1[10] = jj_gen;
937         ;
938       }
939        String   termImage=discardEscapeChar(term.image);
940        if (wildcard) {
941        q = getWildcardQuery(field, termImage);
942        } else if (prefix) {
943          q = getPrefixQuery(field,
944            discardEscapeChar(term.image.substring
945           (0, term.image.length()-1)));
946        } else if (fuzzy) {
947           float fms = fuzzyMinSim;
948           try {
949             fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
950           } catch (Exception   ignored) { }
951          if(fms < 0.0f || fms > 1.0f){
952            {if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");}
953          }
954          if(fms == fuzzyMinSim)
955            q = getFuzzyQuery(field, termImage);
956          else
957            q = getFuzzyQuery(field, termImage, fms);
958        } else {
959          q = getFieldQuery(field, analyzer, termImage);
960        }
961       break;
962     case RANGEIN_START:
963       jj_consume_token(RANGEIN_START);
964       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
965       case RANGEIN_GOOP:
966         goop1 = jj_consume_token(RANGEIN_GOOP);
967         break;
968       case RANGEIN_QUOTED:
969         goop1 = jj_consume_token(RANGEIN_QUOTED);
970         break;
971       default:
972         jj_la1[11] = jj_gen;
973         jj_consume_token(-1);
974         throw new ParseException();
975       }
976       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
977       case RANGEIN_TO:
978         jj_consume_token(RANGEIN_TO);
979         break;
980       default:
981         jj_la1[12] = jj_gen;
982         ;
983       }
984       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
985       case RANGEIN_GOOP:
986         goop2 = jj_consume_token(RANGEIN_GOOP);
987         break;
988       case RANGEIN_QUOTED:
989         goop2 = jj_consume_token(RANGEIN_QUOTED);
990         break;
991       default:
992         jj_la1[13] = jj_gen;
993         jj_consume_token(-1);
994         throw new ParseException();
995       }
996       jj_consume_token(RANGEIN_END);
997       switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
998       case CARAT:
999         jj_consume_token(CARAT);
1000        boost = jj_consume_token(NUMBER);
1001        break;
1002      default:
1003        jj_la1[14] = jj_gen;
1004        ;
1005      }
1006          if (goop1.kind == RANGEIN_QUOTED) {
1007            goop1.image = goop1.image.substring(1, goop1.image.length()-1);
1008          } else {
1009            goop1.image = discardEscapeChar(goop1.image);
1010          }
1011          if (goop2.kind == RANGEIN_QUOTED) {
1012            goop2.image = goop2.image.substring(1, goop2.image.length()-1);
1013      } else {
1014        goop2.image = discardEscapeChar(goop2.image);
1015      }
1016          q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
1017      break;
1018    case RANGEEX_START:
1019      jj_consume_token(RANGEEX_START);
1020      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
1021      case RANGEEX_GOOP:
1022        goop1 = jj_consume_token(RANGEEX_GOOP);
1023        break;
1024      case RANGEEX_QUOTED:
1025        goop1 = jj_consume_token(RANGEEX_QUOTED);
1026        break;
1027      default:
1028        jj_la1[15] = jj_gen;
1029        jj_consume_token(-1);
1030        throw new ParseException();
1031      }
1032      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
1033      case RANGEEX_TO:
1034        jj_consume_token(RANGEEX_TO);
1035        break;
1036      default:
1037        jj_la1[16] = jj_gen;
1038        ;
1039      }
1040      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
1041      case RANGEEX_GOOP:
1042        goop2 = jj_consume_token(RANGEEX_GOOP);
1043        break;
1044      case RANGEEX_QUOTED:
1045        goop2 = jj_consume_token(RANGEEX_QUOTED);
1046        break;
1047      default:
1048        jj_la1[17] = jj_gen;
1049        jj_consume_token(-1);
1050        throw new ParseException();
1051      }
1052      jj_consume_token(RANGEEX_END);
1053      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
1054      case CARAT:
1055        jj_consume_token(CARAT);
1056        boost = jj_consume_token(NUMBER);
1057        break;
1058      default:
1059        jj_la1[18] = jj_gen;
1060        ;
1061      }
1062          if (goop1.kind == RANGEEX_QUOTED) {
1063            goop1.image = goop1.image.substring(1, goop1.image.length()-1);
1064          } else {
1065            goop1.image = discardEscapeChar(goop1.image);
1066          }
1067          if (goop2.kind == RANGEEX_QUOTED) {
1068            goop2.image = goop2.image.substring(1, goop2.image.length()-1);
1069      } else {
1070        goop2.image = discardEscapeChar(goop2.image);
1071      }
1072
1073          q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
1074      break;
1075    case QUOTED:
1076      term = jj_consume_token(QUOTED);
1077      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
1078      case FUZZY_SLOP:
1079        fuzzySlop = jj_consume_token(FUZZY_SLOP);
1080        break;
1081      default:
1082        jj_la1[19] = jj_gen;
1083        ;
1084      }
1085      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
1086      case CARAT:
1087        jj_consume_token(CARAT);
1088        boost = jj_consume_token(NUMBER);
1089        break;
1090      default:
1091        jj_la1[20] = jj_gen;
1092        ;
1093      }
1094         int s = phraseSlop;
1095
1096         if (fuzzySlop != null) {
1097           try {
1098             s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
1099           }
1100           catch (Exception   ignored) { }
1101         }
1102         q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s);
1103      break;
1104    default:
1105      jj_la1[21] = jj_gen;
1106      jj_consume_token(-1);
1107      throw new ParseException();
1108    }
1109    if (boost != null) {
1110      float f = (float) 1.0;
1111      try {
1112        f = Float.valueOf(boost.image).floatValue();
1113      }
1114      catch (Exception   ignored) {
1115    /* Should this be handled somehow? (defaults to "no boost", if
1116     * boost number is invalid)
1117     */
1118      }
1119
1120      // avoid boosting null queries, such as those caused by stop words
1121      if (q != null) {
1122        q.setBoost(f);
1123      }
1124    }
1125    {if (true) return q;}
1126    throw new Error  ("Missing return statement in function");
1127  }
1128
1129  final private boolean jj_2_1(int xla) {
1130    jj_la = xla; jj_lastpos = jj_scanpos = token;
1131    try { return !jj_3_1(); }
1132    catch(LookaheadSuccess ls) { return true; }
1133    finally { jj_save(0, xla); }
1134  }
1135
1136  final private boolean jj_3_1() {
1137    if (jj_scan_token(TERM)) return true;
1138    if (jj_scan_token(COLON)) return true;
1139    return false;
1140  }
1141
1142  public QueryParserTokenManager token_source;
1143  public Token token, jj_nt;
1144  private int jj_ntk;
1145  private Token jj_scanpos, jj_lastpos;
1146  private int jj_la;
1147  public boolean lookingAhead = false;
1148  private boolean jj_semLA;
1149  private int jj_gen;
1150  final private int[] jj_la1 = new int[22];
1151  static private int[] jj_la1_0;
1152  static {
1153      jj_la1_0();
1154   }
1155   private static void jj_la1_0() {
1156      jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,};
1157   }
1158  final private JJCalls[] jj_2_rtns = new JJCalls[1];
1159  private boolean jj_rescan = false;
1160  private int jj_gc = 0;
1161
1162  public QueryParser(CharStream stream) {
1163    token_source = new QueryParserTokenManager(stream);
1164    token = new Token();
1165    jj_ntk = -1;
1166    jj_gen = 0;
1167    for (int i = 0; i < 22; i++) jj_la1[i] = -1;
1168    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
1169  }
1170
1171  public void ReInit(CharStream stream) {
1172    token_source.ReInit(stream);
1173    token = new Token();
1174    jj_ntk = -1;
1175    jj_gen = 0;
1176    for (int i = 0; i < 22; i++) jj_la1[i] = -1;
1177    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
1178  }
1179
1180  public QueryParser(QueryParserTokenManager tm) {
1181    token_source = tm;
1182    token = new Token();
1183    jj_ntk = -1;
1184    jj_gen = 0;
1185    for (int i = 0; i < 22; i++) jj_la1[i] = -1;
1186    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
1187  }
1188
1189  public void ReInit(QueryParserTokenManager tm) {
1190    token_source = tm;
1191    token = new Token();
1192    jj_ntk = -1;
1193    jj_gen = 0;
1194    for (int i = 0; i < 22; i++) jj_la1[i] = -1;
1195    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
1196  }
1197
1198  final private Token jj_consume_token(int kind) throws ParseException {
1199    Token oldToken;
1200    if ((oldToken = token).next != null) token = token.next;
1201    else token = token.next = token_source.getNextToken();
1202    jj_ntk = -1;
1203    if (token.kind == kind) {
1204      jj_gen++;
1205      if (++jj_gc > 100) {
1206        jj_gc = 0;
1207        for (int i = 0; i < jj_2_rtns.length; i++) {
1208          JJCalls c = jj_2_rtns[i];
1209          while (c != null) {
1210            if (c.gen < jj_gen) c.first = null;
1211            c = c.next;
1212          }
1213        }
1214      }
1215      return token;
1216    }
1217    token = oldToken;
1218    jj_kind = kind;
1219    throw generateParseException();
1220  }
1221
1222  static private final class LookaheadSuccess extends java.lang.Error   { }
1223  final private LookaheadSuccess jj_ls = new LookaheadSuccess();
1224  final private boolean jj_scan_token(int kind) {
1225    if (jj_scanpos == jj_lastpos) {
1226      jj_la--;
1227      if (jj_scanpos.next == null) {
1228        jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.getNextToken();
1229      } else {
1230        jj_lastpos = jj_scanpos = jj_scanpos.next;
1231      }
1232    } else {
1233      jj_scanpos = jj_scanpos.next;
1234    }
1235    if (jj_rescan) {
1236      int i = 0; Token tok = token;
1237      while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; }
1238      if (tok != null) jj_add_error_token(kind, i);
1239    }
1240    if (jj_scanpos.kind != kind) return true;
1241    if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls;
1242    return false;
1243  }
1244
1245  final public Token getNextToken() {
1246    if (token.next != null) token = token.next;
1247    else token = token.next = token_source.getNextToken();
1248    jj_ntk = -1;
1249    jj_gen++;
1250    return token;
1251  }
1252
1253  final public Token getToken(int index) {
1254    Token t = lookingAhead ? jj_scanpos : token;
1255    for (int i = 0; i < index; i++) {
1256      if (t.next != null) t = t.next;
1257      else t = t.next = token_source.getNextToken();
1258    }
1259    return t;
1260  }
1261
1262  final private int jj_ntk() {
1263    if ((jj_nt=token.next) == null)
1264      return (jj_ntk = (token.next=token_source.getNextToken()).kind);
1265    else
1266      return (jj_ntk = jj_nt.kind);
1267  }
1268
1269  private java.util.Vector   jj_expentries = new java.util.Vector  ();
1270  private int[] jj_expentry;
1271  private int jj_kind = -1;
1272  private int[] jj_lasttokens = new int[100];
1273  private int jj_endpos;
1274
1275  private void jj_add_error_token(int kind, int pos) {
1276    if (pos >= 100) return;
1277    if (pos == jj_endpos + 1) {
1278      jj_lasttokens[jj_endpos++] = kind;
1279    } else if (jj_endpos != 0) {
1280      jj_expentry = new int[jj_endpos];
1281      for (int i = 0; i < jj_endpos; i++) {
1282        jj_expentry[i] = jj_lasttokens[i];
1283      }
1284      boolean exists = false;
1285      for (java.util.Enumeration   e = jj_expentries.elements(); e.hasMoreElements();) {
1286        int[] oldentry = (int[])(e.nextElement());
1287        if (oldentry.length == jj_expentry.length) {
1288          exists = true;
1289          for (int i = 0; i < jj_expentry.length; i++) {
1290            if (oldentry[i] != jj_expentry[i]) {
1291              exists = false;
1292              break;
1293            }
1294          }
1295          if (exists) break;
1296        }
1297      }
1298      if (!exists) jj_expentries.addElement(jj_expentry);
1299      if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
1300    }
1301  }
1302
1303  public ParseException generateParseException() {
1304    jj_expentries.removeAllElements();
1305    boolean[] la1tokens = new boolean[32];
1306    for (int i = 0; i < 32; i++) {
1307      la1tokens[i] = false;
1308    }
1309    if (jj_kind >= 0) {
1310      la1tokens[jj_kind] = true;
1311      jj_kind = -1;
1312    }
1313    for (int i = 0; i < 22; i++) {
1314      if (jj_la1[i] == jj_gen) {
1315        for (int j = 0; j < 32; j++) {
1316          if ((jj_la1_0[i] & (1<<j)) != 0) {
1317            la1tokens[j] = true;
1318          }
1319        }
1320      }
1321    }
1322    for (int i = 0; i < 32; i++) {
1323      if (la1tokens[i]) {
1324        jj_expentry = new int[1];
1325        jj_expentry[0] = i;
1326        jj_expentries.addElement(jj_expentry);
1327      }
1328    }
1329    jj_endpos = 0;
1330    jj_rescan_token();
1331    jj_add_error_token(0, 0);
1332    int[][] exptokseq = new int[jj_expentries.size()][];
1333    for (int i = 0; i < jj_expentries.size(); i++) {
1334      exptokseq[i] = (int[])jj_expentries.elementAt(i);
1335    }
1336    return new ParseException(token, exptokseq, tokenImage);
1337  }
1338
1339  final public void enable_tracing() {
1340  }
1341
1342  final public void disable_tracing() {
1343  }
1344
1345  final private void jj_rescan_token() {
1346    jj_rescan = true;
1347    for (int i = 0; i < 1; i++) {
1348      JJCalls p = jj_2_rtns[i];
1349      do {
1350        if (p.gen > jj_gen) {
1351          jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
1352          switch (i) {
1353            case 0: jj_3_1(); break;
1354          }
1355        }
1356        p = p.next;
1357      } while (p != null);
1358    }
1359    jj_rescan = false;
1360  }
1361
1362  final private void jj_save(int index, int xla) {
1363    JJCalls p = jj_2_rtns[index];
1364    while (p.gen > jj_gen) {
1365      if (p.next == null) { p = p.next = new JJCalls(); break; }
1366      p = p.next;
1367    }
1368    p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla;
1369  }
1370
1371  static final class JJCalls {
1372    int gen;
1373    Token first;
1374    int arg;
1375    JJCalls next;
1376  }
1377
1378}
1379
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Free Books Free Magazines
Popular Tags