Java > Open Source Codes > gnu > regexp > RE


1   /*
2    *  gnu/regexp/RE.java
3    *  Copyright (C) 1998-2001 Wes Biggs
4    *
5    *  This library is free software; you can redistribute it and/or modify
6    *  it under the terms of the GNU Lesser General Public License as published
7    *  by the Free Software Foundation; either version 2.1 of the License, or
8    *  (at your option) any later version.
9    *
10   *  This library is distributed in the hope that it will be useful,
11   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   *  GNU Lesser General Public License for more details.
14   *
15   *  You should have received a copy of the GNU Lesser General Public License
16   *  along with this program; if not, write to the Free Software
17   *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18   */
19  
20  package gnu.regexp;
21  import java.io.InputStream  ;
22  import java.io.Reader  ;
23  import java.io.Serializable  ;
24  import java.util.Locale  ;
25  import java.util.PropertyResourceBundle  ;
26  import java.util.ResourceBundle  ;
27  import java.util.Vector  ;
28  
29  class IntPair implements Serializable   {
30    public int first, second;
31  }
32  
33  class CharUnit implements Serializable   {
34    public char ch;
35    public boolean bk;
36  }
37  
38  /**
39   * RE provides the user interface for compiling and matching regular
40   * expressions.
41   * <P>
42   * A regular expression object (class RE) is compiled by constructing it
43   * from a String, StringBuffer or character array, with optional 
44   * compilation flags (below)
45   * and an optional syntax specification (see RESyntax; if not specified,
46   * <code>RESyntax.RE_SYNTAX_PERL5</code> is used).
47   * <P>
48   * Various methods attempt to match input text against a compiled
49   * regular expression.  These methods are:
50   * <LI><code>isMatch</code>: returns true if the input text in its entirety
51   * matches the regular expression pattern.
52   * <LI><code>getMatch</code>: returns the first match found in the input text,
53   * or null if no match is found.
54   * <LI><code>getAllMatches</code>: returns an array of all non-overlapping 
55   * matches found in the input text.  If no matches are found, the array is
56   * zero-length.
57   * <LI><code>substitute</code>: substitute the first occurence of the pattern
58   * in the input text with a replacement string (which may include
59   * metacharacters $0-$9, see REMatch.substituteInto).
60   * <LI><code>substituteAll</code>: same as above, but repeat for each match
61   * before returning.
62   * <LI><code>getMatchEnumeration</code>: returns an REMatchEnumeration object
63   * that allows iteration over the matches (see REMatchEnumeration for some
64   * reasons why you may want to do this instead of using <code>getAllMatches</code>.
65   * <P>
66   *
67   * These methods all have similar argument lists.  The input can be a
68   * String, a character array, a StringBuffer, a Reader or an
69   * InputStream of some sort.  Note that when using a Reader or
70   * InputStream, the stream read position cannot be guaranteed after
71   * attempting a match (this is not a bug, but a consequence of the way
72   * regular expressions work).  Using an REMatchEnumeration can
73   * eliminate most positioning problems.
74   *
75   * <P>
76   *
77   * The optional index argument specifies the offset from the beginning
78   * of the text at which the search should start (see the descriptions
79   * of some of the execution flags for how this can affect positional
80   * pattern operators).  For a Reader or InputStream, this means an
81   * offset from the current read position, so subsequent calls with the
82   * same index argument on a Reader or an InputStream will not
83   * necessarily access the same position on the stream, whereas
84   * repeated searches at a given index in a fixed string will return
85   * consistent results.
86   *
87   * <P>
88   * You can optionally affect the execution environment by using a
89   * combination of execution flags (constants listed below).
90   * 
91   * <P>
92   * All operations on a regular expression are performed in a
93   * thread-safe manner.
94   *
95   * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
96   * @version 1.1.4-dev, to be released
97   */
98  
99  public class RE extends REToken {
100   // This String will be returned by getVersion()
101   private static final String   VERSION = "1.1.4-dev";
102 
103   // The localized strings are kept in a separate file
104   private static ResourceBundle   messages = PropertyResourceBundle.getBundle("gnu/regexp/MessagesBundle", Locale.getDefault());
105 
106   // These are, respectively, the first and last tokens in our linked list
107   // If there is only one token, firstToken == lastToken
108   private REToken firstToken, lastToken;
109 
110   // This is the number of subexpressions in this regular expression,
111   // with a minimum value of zero.  Returned by getNumSubs()
112   private int numSubs;
113 
114     /** Minimum length, in characters, of any possible match. */
115     private int minimumLength;
116 
117   /**
118    * Compilation flag. Do  not  differentiate  case.   Subsequent
119    * searches  using  this  RE will be case insensitive.
120    */
121   public static final int REG_ICASE = 2;
122 
123   /**
124    * Compilation flag. The match-any-character operator (dot)
125    * will match a newline character.  When set this overrides the syntax
126    * bit RE_DOT_NEWLINE (see RESyntax for details).  This is equivalent to
127    * the "/s" operator in Perl.
128    */
129   public static final int REG_DOT_NEWLINE = 4;
130 
131   /**
132    * Compilation flag. Use multiline mode.  In this mode, the ^ and $
133    * anchors will match based on newlines within the input. This is
134    * equivalent to the "/m" operator in Perl.
135    */
136   public static final int REG_MULTILINE = 8;
137 
138   /**
139    * Execution flag.
140    * The match-beginning operator (^) will not match at the beginning
141    * of the input string. Useful for matching on a substring when you
142    * know the context of the input is such that position zero of the
143    * input to the match test is not actually position zero of the text.
144    * <P>
145    * This example demonstrates the results of various ways of matching on
146    * a substring.
147    * <P>
148    * <CODE>
149    * String s = "food bar fool";<BR>
150    * RE exp = new RE("^foo.");<BR>
151    * REMatch m0 = exp.getMatch(s);<BR>
152    * REMatch m1 = exp.getMatch(s.substring(8));<BR>
153    * REMatch m2 = exp.getMatch(s.substring(8),0,RE.REG_NOTBOL); <BR>
154    * REMatch m3 = exp.getMatch(s,8);                            <BR>
155    * REMatch m4 = exp.getMatch(s,8,RE.REG_ANCHORINDEX);         <BR>
156    * <P>
157    * // Results:<BR>
158    * //  m0 = "food"<BR>
159    * //  m1 = "fool"<BR>
160    * //  m2 = null<BR>
161    * //  m3 = null<BR>
162    * //  m4 = "fool"<BR>
163    * </CODE>
164    */
165   public static final int REG_NOTBOL = 16;
166 
167   /**
168    * Execution flag.
169    * The match-end operator ($) does not match at the end
170    * of the input string. Useful for matching on substrings.
171    */
172   public static final int REG_NOTEOL = 32;
173 
174   /**
175    * Execution flag.
176    * When a match method is invoked that starts matching at a non-zero
177    * index into the input, treat the input as if it begins at the index
178    * given.  The effect of this flag is that the engine does not "see"
179    * any text in the input before the given index.  This is useful so
180    * that the match-beginning operator (^) matches not at position 0
181    * in the input string, but at the position the search started at
182    * (based on the index input given to the getMatch function).  See
183    * the example under REG_NOTBOL.  It also affects the use of the \&lt;
184    * and \b operators.
185    */
186   public static final int REG_ANCHORINDEX = 64;
187 
188   /**
189    * Execution flag.
190    * The substitute and substituteAll methods will not attempt to
191    * interpolate occurrences of $1-$9 in the replacement text with
192    * the corresponding subexpressions.  For example, you may want to
193    * replace all matches of "one dollar" with "$1".
194    */
195   public static final int REG_NO_INTERPOLATE = 128;
196 
197   /** Returns a string representing the version of the gnu.regexp package. */
198   public static final String   version() {
199     return VERSION;
200   }
201 
202   // Retrieves a message from the ResourceBundle
203   static final String   getLocalizedMessage(String   key) {
204     return messages.getString(key);
205   }
206 
207   /**
208    * Constructs a regular expression pattern buffer without any compilation
209    * flags set, and using the default syntax (RESyntax.RE_SYNTAX_PERL5).
210    *
211    * @param pattern A regular expression pattern, in the form of a String,
212    *   StringBuffer or char[].  Other input types will be converted to
213    *   strings using the toString() method.
214    * @exception REException The input pattern could not be parsed.
215    * @exception NullPointerException The pattern was null.
216    */
217   public RE(Object   pattern) throws REException {
218     this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0);
219   }
220 
221   /**
222    * Constructs a regular expression pattern buffer using the specified
223    * compilation flags and the default syntax (RESyntax.RE_SYNTAX_PERL5).
224    *
225    * @param pattern A regular expression pattern, in the form of a String,
226    *   StringBuffer, or char[].  Other input types will be converted to
227    *   strings using the toString() method.
228    * @param cflags The logical OR of any combination of the compilation flags listed above.
229    * @exception REException The input pattern could not be parsed.
230    * @exception NullPointerException The pattern was null.
231    */
232   public RE(Object   pattern, int cflags) throws REException {
233     this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0);
234   }
235 
236   /**
237    * Constructs a regular expression pattern buffer using the specified
238    * compilation flags and regular expression syntax.
239    *
240    * @param pattern A regular expression pattern, in the form of a String,
241    *   StringBuffer, or char[].  Other input types will be converted to
242    *   strings using the toString() method.
243    * @param cflags The logical OR of any combination of the compilation flags listed above.
244    * @param syntax The type of regular expression syntax to use.
245    * @exception REException The input pattern could not be parsed.
246    * @exception NullPointerException The pattern was null.
247    */
248   public RE(Object   pattern, int cflags, RESyntax syntax) throws REException {
249     this(pattern,cflags,syntax,0,0);
250   }
251 
252   // internal constructor used for alternation
253   private RE(REToken first, REToken last,int subs, int subIndex, int minLength) {
254     super(subIndex);
255     firstToken = first;
256     lastToken = last;
257     numSubs = subs;
258     minimumLength = minLength;
259     addToken(new RETokenEndSub(subIndex));
260   }
261 
262   private RE(Object   patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
263     super(myIndex); // Subexpression index of this token.
264     initialize(patternObj, cflags, syntax, myIndex, nextSub);
265   }
266 
267     // For use by subclasses
268     protected RE() { super(0); }
269 
270     // The meat of construction
271   protected void initialize(Object   patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
272       char[] pattern;
273     if (patternObj instanceof String  ) {
274       pattern = ((String  ) patternObj).toCharArray();
275     } else if (patternObj instanceof char[]) {
276       pattern = (char[]) patternObj;
277     } else if (patternObj instanceof StringBuffer  ) {
278       pattern = new char [((StringBuffer  ) patternObj).length()];
279       ((StringBuffer  ) patternObj).getChars(0,pattern.length,pattern,0);
280     } else {
281     pattern = patternObj.toString().toCharArray();
282     }
283 
284     int pLength = pattern.length;
285 
286     numSubs = 0; // Number of subexpressions in this token.
287     Vector   branches = null;
288 
289     // linked list of tokens (sort of -- some closed loops can exist)
290     firstToken = lastToken = null;
291 
292     // Precalculate these so we don't pay for the math every time we
293     // need to access them.
294     boolean insens = ((cflags & REG_ICASE) > 0);
295 
296     // Parse pattern into tokens.  Does anyone know if it's more efficient
297     // to use char[] than a String.charAt()?  I'm assuming so.
298 
299     // index tracks the position in the char array
300     int index = 0;
301 
302     // this will be the current parse character (pattern[index])
303     CharUnit unit = new CharUnit();
304 
305     // This is used for {x,y} calculations
306     IntPair minMax = new IntPair();
307 
308     // Buffer a token so we can create a TokenRepeated, etc.
309     REToken currentToken = null;
310     char ch;
311 
312     while (index < pLength) {
313       // read the next character unit (including backslash escapes)
314       index = getCharUnit(pattern,index,unit);
315 
316       // ALTERNATION OPERATOR
317       //  \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
318       //  not available if RE_LIMITED_OPS is set
319 
320       // TODO: the '\n' literal here should be a test against REToken.newline,
321       // which unfortunately may be more than a single character.
322       if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk))
323          || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !unit.bk) )
324        && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
325     // make everything up to here be a branch. create vector if nec.
326     addToken(currentToken);
327     RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength);
328     minimumLength = 0;
329     if (branches == null) {
330         branches = new Vector  ();
331     }
332     branches.addElement(theBranch);
333     firstToken = lastToken = currentToken = null;
334       }
335       
336       // INTERVAL OPERATOR:
337       //  {x} | {x,} | {x,y}  (RE_INTERVALS && RE_NO_BK_BRACES)
338       //  \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES)
339       //
340       // OPEN QUESTION: 
341       //  what is proper interpretation of '{' at start of string?
342 
343       else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) {
344     int newIndex = getMinMax(pattern,index,minMax,syntax);
345         if (newIndex > index) {
346           if (minMax.first > minMax.second)
347             throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);
348           if (currentToken == null)
349             throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);
350           if (currentToken instanceof RETokenRepeated) 
351             throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);
352           if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
353             throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);
354           if ((currentToken.getMinimumLength() == 0) && (minMax.second == Integer.MAX_VALUE))
355             throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,newIndex);
356           index = newIndex;
357           currentToken = setRepeated(currentToken,minMax.first,minMax.second,index); 
358         }
359         else {
360           addToken(currentToken);
361           currentToken = new RETokenChar(subIndex,unit.ch,insens);
362         } 
363       }
364       
365       // LIST OPERATOR:
366       //  [...] | [^...]
367 
368       else if ((unit.ch == '[') && !unit.bk) {
369     Vector   options = new Vector  ();
370     boolean negative = false;
371     char lastChar = 0;
372     if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
373     
374     // Check for initial caret, negation
375     if ((ch = pattern[index]) == '^') {
376       negative = true;
377       if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
378       ch = pattern[index];
379     }
380 
381     // Check for leading right bracket literal
382     if (ch == ']') {
383       lastChar = ch;
384       if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
385     }
386 
387     while ((ch = pattern[index++]) != ']') {
388       if ((ch == '-') && (lastChar != 0)) {
389         if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
390         if ((ch = pattern[index]) == ']') {
391           options.addElement(new RETokenChar(subIndex,lastChar,insens));
392           lastChar = '-';
393         } else {
394           options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
395           lastChar = 0;
396           index++;
397         }
398           } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
399             if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
400         int posixID = -1;
401         boolean negate = false;
402             char asciiEsc = 0;
403         if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
404           switch (pattern[index]) {
405           case 'D':
406         negate = true;
407           case 'd':
408         posixID = RETokenPOSIX.DIGIT;
409         break;
410           case 'S':
411         negate = true;
412           case 's':
413         posixID = RETokenPOSIX.SPACE;
414         break;
415           case 'W':
416         negate = true;
417           case 'w':
418         posixID = RETokenPOSIX.ALNUM;
419         break;
420           }
421         }
422             else if ("nrt".indexOf(pattern[index]) != -1) {
423               switch (pattern[index]) {
424                 case 'n':
425                   asciiEsc = '\n';
426                   break;
427                 case 't':
428                   asciiEsc = '\t';
429                   break;
430                 case 'r':
431                   asciiEsc = '\r';
432                   break;
433               }
434             }
435         if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
436         
437         if (posixID != -1) {
438           options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
439         } else if (asciiEsc != 0) {
440           lastChar = asciiEsc;
441         } else {
442           lastChar = pattern[index];
443         }
444         ++index;
445       } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
446         StringBuffer   posixSet = new StringBuffer  ();
447         index = getPosixSet(pattern,index+1,posixSet);
448         int posixId = RETokenPOSIX.intValue(posixSet.toString());
449         if (posixId != -1)
450           options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
451       } else {
452         if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
453         lastChar = ch;
454       }
455       if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
456     } // while in list
457     // Out of list, index is one past ']'
458         
459     if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
460         
461     // Create a new RETokenOneOf
462     addToken(currentToken);
463     options.trimToSize();
464     currentToken = new RETokenOneOf(subIndex,options,negative);
465       }
466 
467       // SUBEXPRESSIONS
468       //  (...) | \(...\) depending on RE_NO_BK_PARENS
469 
470       else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) {
471     boolean pure = false;
472     boolean comment = false;
473         boolean lookAhead = false;
474         boolean negativelh = false;
475     if ((index+1 < pLength) && (pattern[index] == '?')) {
476       switch (pattern[index+1]) {
477           case '!':
478             if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
479               pure = true;
480               negativelh = true;
481               lookAhead = true;
482               index += 2;
483             }
484             break;
485           case '=':
486             if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
487               pure = true;
488               lookAhead = true;
489               index += 2;
490             }
491             break;
492       case ':':
493         if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
494           pure = true;
495           index += 2;
496         }
497         break;
498       case '#':
499         if (syntax.get(RESyntax.RE_COMMENTS)) {
500           comment = true;
501         }
502         break;
503           default:
504             throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
505       }
506     }
507 
508     if (index >= pLength) {
509         throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
510     }
511 
512     // find end of subexpression
513     int endIndex = index;
514     int nextIndex = index;
515     int nested = 0;
516 
517     while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0)
518         && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) )
519       if ((endIndex = nextIndex) >= pLength)
520         throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
521       else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
522         nested++;
523       else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
524         nested--;
525 
526     // endIndex is now position at a ')','\)' 
527     // nextIndex is end of string or position after ')' or '\)'
528 
529     if (comment) index = nextIndex;
530     else { // not a comment
531       // create RE subexpression as token.
532       addToken(currentToken);
533       if (!pure) {
534         numSubs++;
535       }
536 
537       int useIndex = (pure || lookAhead) ? 0 : nextSub + numSubs;
538       currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
539       numSubs += ((RE) currentToken).getNumSubs();
540 
541           if (lookAhead) {
542           currentToken = new RETokenLookAhead(currentToken,negativelh);
543       }
544 
545       index = nextIndex;
546     } // not a comment
547       } // subexpression
548     
549       // UNMATCHED RIGHT PAREN
550       // ) or \) throw exception if
551       // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
552       else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) {
553     throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
554       }
555 
556       // START OF LINE OPERATOR
557       //  ^
558 
559       else if ((unit.ch == '^') && !unit.bk) {
560     addToken(currentToken);
561     currentToken = null;
562     addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
563       }
564 
565       // END OF LINE OPERATOR
566       //  $
567 
568       else if ((unit.ch == '$') && !unit.bk) {
569     addToken(currentToken);
570     currentToken = null;
571     addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
572       }
573 
574       // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
575       //  .
576 
577       else if ((unit.ch == '.') && !unit.bk) {
578     addToken(currentToken);
579     currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
580       }
581 
582       // ZERO-OR-MORE REPEAT OPERATOR
583       //  *
584 
585       else if ((unit.ch == '*') && !unit.bk) {
586     if (currentToken == null)
587           throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
588     if (currentToken instanceof RETokenRepeated)
589           throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
590     if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
591       throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
592     if (currentToken.getMinimumLength() == 0)
593       throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
594     currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
595       }
596 
597       // ONE-OR-MORE REPEAT OPERATOR
598       //  + | \+ depending on RE_BK_PLUS_QM
599       //  not available if RE_LIMITED_OPS is set
600 
601       else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
602     if (currentToken == null)
603           throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
604     if (currentToken instanceof RETokenRepeated)
605           throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
606     if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
607       throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
608     if (currentToken.getMinimumLength() == 0)
609       throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
610     currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
611       }
612 
613       // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR
614       //  ? | \? depending on RE_BK_PLUS_QM
615       //  not available if RE_LIMITED_OPS is set
616       //  stingy matching if RE_STINGY_OPS is set and it follows a quantifier
617 
618       else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
619     if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
620 
621     // Check for stingy matching on RETokenRepeated
622     if (currentToken instanceof RETokenRepeated) {
623           if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy())
624             ((RETokenRepeated)currentToken).makeStingy();
625           else
626             throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
627         }
628         else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
629           throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
630     else
631       currentToken = setRepeated(currentToken,0,1,index);
632       }
633     
634       // BACKREFERENCE OPERATOR
635       //  \1 \2 ... \9
636       // not available if RE_NO_BK_REFS is set
637 
638       else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
639     addToken(currentToken);
640     currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens);
641       }
642 
643       // START OF STRING OPERATOR
644       //  \A if RE_STRING_ANCHORS is set
645       
646       else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
647     addToken(currentToken);
648     currentToken = new RETokenStart(subIndex,null);
649       }
650 
651       // WORD BREAK OPERATOR
652       //  \b if ????
653 
654       else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
655       addToken(currentToken);
656       currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);
657       } 
658 
659       // WORD BEGIN OPERATOR 
660       //  \< if ????
661       else if (unit.bk && (unit.ch == '<')) {
662       addToken(currentToken);
663       currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);
664       } 
665 
666       // WORD END OPERATOR 
667       //  \> if ????
668       else if (unit.bk && (unit.ch == '>')) {
669       addToken(currentToken);
670       currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);
671       } 
672 
673       // NON-WORD BREAK OPERATOR
674       // \B if ????
675 
676       else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
677       addToken(currentToken);
678       currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);
679       } 
680 
681       
682       // DIGIT OPERATOR
683       //  \d if RE_CHAR_CLASS_ESCAPES is set
684       
685       else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
686     addToken(currentToken);
687     currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
688       }
689 
690       // NON-DIGIT OPERATOR
691       //  \D
692 
693     else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
694       addToken(currentToken);
695       currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
696     }
697 
698     // NEWLINE ESCAPE
699         //  \n
700 
701     else if (unit.bk && (unit.ch == 'n')) {
702       addToken(currentToken);
703       currentToken = new RETokenChar(subIndex,'\n',false);
704     }
705 
706     // RETURN ESCAPE
707         //  \r
708 
709     else if (unit.bk && (unit.ch == 'r')) {
710       addToken(currentToken);
711       currentToken = new RETokenChar(subIndex,'\r',false);
712     }
713 
714     // WHITESPACE OPERATOR
715         //  \s if RE_CHAR_CLASS_ESCAPES is set
716 
717     else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
718       addToken(currentToken);
719       currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
720     }
721 
722     // NON-WHITESPACE OPERATOR
723         //  \S
724 
725     else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
726       addToken(currentToken);
727       currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
728     }
729 
730     // TAB ESCAPE
731         //  \t
732 
733     else if (unit.bk && (unit.ch == 't')) {
734       addToken(currentToken);
735       currentToken = new RETokenChar(subIndex,'\t',false);
736     }
737 
738     // ALPHANUMERIC OPERATOR
739         //  \w
740 
741     else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
742       addToken(currentToken);
743       currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
744     }
745 
746     // NON-ALPHANUMERIC OPERATOR
747         //  \W
748 
749     else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
750       addToken(currentToken);
751       currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
752     }
753 
754     // END OF STRING OPERATOR
755         //  \Z
756 
757     else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
758       addToken(currentToken);
759       currentToken = new RETokenEnd(subIndex,null);
760     }
761 
762     // NON-SPECIAL CHARACTER (or escape to make literal)
763         //  c | \* for example
764 
765     else {  // not a special character
766       addToken(currentToken);
767       currentToken = new RETokenChar(subIndex,unit.ch,insens);
768     } 
769       } // end while
770 
771     // Add final buffered token and an EndSub marker
772     addToken(currentToken);
773       
774     if (branches != null) {
775     branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength));
776     branches.trimToSize(); // compact the Vector
777     minimumLength = 0;
778     firstToken = lastToken = null;
779     addToken(new RETokenOneOf(subIndex,branches,false));
780     } 
781     else addToken(new RETokenEndSub(subIndex));
782 
783   }
784 
785   private static int getCharUnit(char[] input, int index, CharUnit unit) throws REException {
786     unit.ch = input[index++];
787     if (unit.bk = (unit.ch == '\\'))
788       if (index < input.length)
789     unit.ch = input[index++];
790       else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
791     return index;
792   }
793 
794   /**
795    * Checks if the regular expression matches the input in its entirety.
796    *
797    * @param input The input text.
798    */
799   public boolean isMatch(Object   input) {
800     return isMatch(input,0,0);
801   }
802   
803   /**
804    * Checks if the input string, starting from index, is an exact match of
805    * this regular expression.
806    *
807    * @param input The input text.
808    * @param index The offset index at which the search should be begin.
809    */
810   public boolean isMatch(Object   input,int index) {
811     return isMatch(input,index,0);
812   }
813   
814 
815   /**
816    * Checks if the input, starting from index and using the specified
817    * execution flags, is an exact match of this regular expression.
818    *
819    * @param input The input text.
820    * @param index The offset index at which the search should be begin.
821    * @param eflags The logical OR of any execution flags above.
822    */
823   public boolean isMatch(Object   input,int index,int eflags) {
824     return isMatchImpl(makeCharIndexed(input,index),index,eflags);
825   }
826 
827   private boolean isMatchImpl(CharIndexed input, int index, int eflags) {
828     if (firstToken == null)  // Trivial case
829       return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
830     REMatch m = new REMatch(numSubs, index, eflags);
831     if (firstToken.match(input, m)) {
832     while (m != null) {
833         if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
834         return true;
835         }
836         m = m.next;
837     }
838     }
839     return false;
840   }
841     
842   /**
843    * Returns the maximum number of subexpressions in this regular expression.
844    * If the expression contains branches, the value returned will be the
845    * maximum subexpressions in any of the branches.
846    */
847   public int getNumSubs() {
848     return numSubs;
849   }
850 
851   // Overrides REToken.setUncle
852   void setUncle(REToken uncle) {
853       if (lastToken != null) {
854       lastToken.setUncle(uncle);
855       } else super.setUncle(uncle); // to deal with empty subexpressions
856   }
857 
858   // Overrides REToken.chain
859 
860   boolean chain(REToken next) {
861     super.chain(next);
862     setUncle(next);
863     return true;
864   }
865 
866   /**
867    * Returns the minimum number of characters that could possibly
868    * constitute a match of this regular expression.
869    */
870   public int getMinimumLength() {
871       return minimumLength;
872   }
873 
874   /**
875    * Returns an array of all matches found in the input.
876    *
877    * If the regular expression allows the empty string to match, it will
878    * substitute matches at all positions except the end of the input.
879    *
880    * @param input The input text.
881    * @return a non-null (but possibly zero-length) array of matches
882    */
883   public REMatch[] getAllMatches(Object   input) {
884     return getAllMatches(input,0,0);
885   }
886 
887   /**
888    * Returns an array of all matches found in the input,
889    * beginning at the specified index position.
890    *
891    * If the regular expression allows the empty string to match, it will
892    * substitute matches at all positions except the end of the input.
893    *
894    * @param input The input text.
895    * @param index The offset index at which the search should be begin.
896    * @return a non-null (but possibly zero-length) array of matches
897    */
898   public REMatch[] getAllMatches(Object   input, int index) {
899     return getAllMatches(input,index,0);
900   }
901 
902   /**
903    * Returns an array of all matches found in the input string,
904    * beginning at the specified index position and using the specified
905    * execution flags.
906    *
907    * If the regular expression allows the empty string to match, it will
908    * substitute matches at all positions except the end of the input.
909    *
910    * @param input The input text.
911    * @param index The offset index at which the search should be begin.
912    * @param eflags The logical OR of any execution flags above.
913    * @return a non-null (but possibly zero-length) array of matches
914    */
915   public REMatch[] getAllMatches(Object   input, int index, int eflags) {
916     return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags);
917   }
918 
919   // this has been changed since 1.03 to be non-overlapping matches
920   private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) {
921     Vector   all = new Vector  ();
922     REMatch m = null;
923     while ((m = getMatchImpl(input,index,eflags,null)) != null) {
924       all.addElement(m);
925       index = m.getEndIndex();
926       if (m.end[0] == 0) {   // handle pathological case of zero-length match
927     index++;
928     input.move(1);
929       } else {
930     input.move(m.end[0]);
931       }
932       if (!input.isValid()) break;
933     }
934     REMatch[] mset = new REMatch[all.size()];
935     all.copyInto(mset);
936     return mset;
937   }
938   
939     /* Implements abstract method REToken.match() */
940     boolean match(CharIndexed input, REMatch mymatch) { 
941     if (firstToken == null) return next(input, mymatch);
942 
943     // Note the start of this subexpression
944     mymatch.start[subIndex] = mymatch.index;
945 
946     return firstToken.match(input, mymatch);
947     }
948   
949   /**
950    * Returns the first match found in the input.  If no match is found,
951    * null is returned.
952    *
953    * @param input The input text.
954    * @return An REMatch instance referencing the match, or null if none.
955    */
956   public REMatch getMatch(Object   input) {
957     return getMatch(input,0,0);
958   }
959   
960   /**
961    * Returns the first match found in the input, beginning
962    * the search at the specified index.  If no match is found,
963    * returns null.
964    *
965    * @param input The input text.
966    * @param index The offset within the text to begin looking for a match.
967    * @return An REMatch instance referencing the match, or null if none.
968    */
969   public REMatch getMatch(Object   input, int index) {
970     return getMatch(input,index,0);
971   }
972   
973   /**
974    * Returns the first match found in the input, beginning
975    * the search at the specified index, and using the specified
976    * execution flags.  If no match is found, returns null.
977    *
978    * @param input The input text.
979    * @param index The offset index at which the search should be begin.
980    * @param eflags The logical OR of any execution flags above.
981    * @return An REMatch instance referencing the match, or null if none.
982    */
983   public REMatch getMatch(Object   input, int index, int eflags) {
984     return getMatch(input,index,eflags,null);
985   }
986 
987   /**
988    * Returns the first match found in the input, beginning the search
989    * at the specified index, and using the specified execution flags.
990    * If no match is found, returns null.  If a StringBuffer is
991    * provided and is non-null, the contents of the input text from the
992    * index to the beginning of the match (or to the end of the input,
993    * if there is no match) are appended to the StringBuffer.
994    *
995    * @param input The input text.
996    * @param index The offset index at which the search should be begin.
997    * @param eflags The logical OR of any execution flags above.
998    * @param buffer The StringBuffer to save pre-match text in.
999    * @return An REMatch instance referencing the match, or null if none.  */
1000  public REMatch getMatch(Object   input, int index, int eflags, StringBuffer   buffer) {
1001    return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer);
1002  }
1003
1004  REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer   buffer) {
1005      // Create a new REMatch to hold results
1006      REMatch mymatch = new REMatch(numSubs, anchor, eflags);
1007      do {
1008      // Optimization: check if anchor + minimumLength > length
1009      if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
1010          if (match(input, mymatch)) {
1011          // Find longest match of them all to observe leftmost longest
1012          REMatch longest = mymatch;
1013          while ((mymatch = mymatch.next) != null) {
1014              if (mymatch.index > longest.index) {
1015              longest = mymatch;
1016              }
1017          }
1018          
1019          longest.end[0] = longest.index;
1020          longest.finish(input);
1021          return longest;
1022          }
1023      }
1024      mymatch.clear(++anchor);
1025      // Append character to buffer if needed
1026      if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
1027          buffer.append(input.charAt(0));
1028      }
1029      } while (input.move(1));
1030      
1031      return null;
1032  }
1033
1034  /**
1035   * Returns an REMatchEnumeration that can be used to iterate over the
1036   * matches found in the input text.
1037   *
1038   * @param input The input text.
1039   * @return A non-null REMatchEnumeration instance.
1040   */
1041  public REMatchEnumeration getMatchEnumeration(Object   input) {
1042    return getMatchEnumeration(input,0,0);
1043  }
1044
1045
1046  /**
1047   * Returns an REMatchEnumeration that can be used to iterate over the
1048   * matches found in the input text.
1049   *
1050   * @param input The input text.
1051   * @param index The offset index at which the search should be begin.
1052   * @return A non-null REMatchEnumeration instance, with its input cursor
1053   *  set to the index position specified.
1054   */
1055  public REMatchEnumeration getMatchEnumeration(Object   input, int index) {
1056    return getMatchEnumeration(input,index,0);
1057  }
1058
1059  /**
1060   * Returns an REMatchEnumeration that can be used to iterate over the
1061   * matches found in the input text.
1062   *
1063   * @param input The input text.
1064   * @param index The offset index at which the search should be begin.
1065   * @param eflags The logical OR of any execution flags above.
1066   * @return A non-null REMatchEnumeration instance, with its input cursor
1067   *  set to the index position specified.
1068   */
1069  public REMatchEnumeration getMatchEnumeration(Object   input, int index, int eflags) {
1070    return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags);
1071  }
1072
1073
1074  /**
1075   * Substitutes the replacement text for the first match found in the input.
1076   *
1077   * @param input The input text.
1078   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1079   * @return A String interpolating the substituted text.
1080   * @see REMatch#substituteInto
1081   */
1082  public String   substitute(Object   input,String   replace) {
1083    return substitute(input,replace,0,0);
1084  }
1085
1086  /**
1087   * Substitutes the replacement text for the first match found in the input
1088   * beginning at the specified index position.  Specifying an index
1089   * effectively causes the regular expression engine to throw away the
1090   * specified number of characters. 
1091   *
1092   * @param input The input text.
1093   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1094   * @param index The offset index at which the search should be begin.
1095   * @return A String containing the substring of the input, starting
1096   *   at the index position, and interpolating the substituted text.
1097   * @see REMatch#substituteInto
1098   */
1099  public String   substitute(Object   input,String   replace,int index) {
1100    return substitute(input,replace,index,0);
1101  }
1102
1103  /**
1104   * Substitutes the replacement text for the first match found in the input
1105   * string, beginning at the specified index position and using the
1106   * specified execution flags.
1107   *
1108   * @param input The input text.
1109   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1110   * @param index The offset index at which the search should be begin.
1111   * @param eflags The logical OR of any execution flags above.
1112   * @return A String containing the substring of the input, starting
1113   *   at the index position, and interpolating the substituted text.
1114   * @see REMatch#substituteInto
1115   */
1116  public String   substitute(Object   input,String   replace,int index,int eflags) {
1117    return substituteImpl(makeCharIndexed(input,index),replace,index,eflags);
1118  }
1119
1120  private String   substituteImpl(CharIndexed input,String   replace,int index,int eflags) {
1121    StringBuffer   buffer = new StringBuffer  ();
1122    REMatch m = getMatchImpl(input,index,eflags,buffer);
1123    if (m==null) return buffer.toString();
1124    buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1125           replace : m.substituteInto(replace) );
1126    if (input.move(m.end[0])) {
1127      do {
1128    buffer.append(input.charAt(0));
1129      } while (input.move(1));
1130    }
1131    return buffer.toString();
1132  }
1133  
1134  /**
1135   * Substitutes the replacement text for each non-overlapping match found 
1136   * in the input text.
1137   *
1138   * @param input The input text.
1139   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1140   * @return A String interpolating the substituted text.
1141   * @see REMatch#substituteInto
1142   */
1143  public String   substituteAll(Object   input,String   replace) {
1144    return substituteAll(input,replace,0,0);
1145  }
1146
1147  /**
1148   * Substitutes the replacement text for each non-overlapping match found 
1149   * in the input text, starting at the specified index.
1150   *
1151   * If the regular expression allows the empty string to match, it will
1152   * substitute matches at all positions except the end of the input.
1153   *
1154   * @param input The input text.
1155   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1156   * @param index The offset index at which the search should be begin.
1157   * @return A String containing the substring of the input, starting
1158   *   at the index position, and interpolating the substituted text.
1159   * @see REMatch#substituteInto
1160   */
1161  public String   substituteAll(Object   input,String   replace,int index) {
1162    return substituteAll(input,replace,index,0);
1163  }
1164 
1165  /**
1166   * Substitutes the replacement text for each non-overlapping match found 
1167   * in the input text, starting at the specified index and using the
1168   * specified execution flags.
1169   *
1170   * @param input The input text.
1171   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1172   * @param index The offset index at which the search should be begin.
1173   * @param eflags The logical OR of any execution flags above.
1174   * @return A String containing the substring of the input, starting
1175   *   at the index position, and interpolating the substituted text.
1176   * @see REMatch#substituteInto
1177   */
1178  public String   substituteAll(Object   input,String   replace,int index,int eflags) {
1179    return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags);
1180  }
1181
1182  private String   substituteAllImpl(CharIndexed input,String   replace,int index,int eflags) {
1183    StringBuffer   buffer = new StringBuffer  ();
1184    REMatch m;
1185    while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
1186    buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1187               replace : m.substituteInto(replace) );
1188      index = m.getEndIndex();
1189      if (m.end[0] == 0) {
1190    char ch = input.charAt(0);
1191    if (ch != CharIndexed.OUT_OF_BOUNDS) 
1192        buffer.append(ch);
1193    input.move(1);
1194      } else {
1195      input.move(m.end[0]);
1196      }
1197
1198      if (!input.isValid()) break;
1199    }
1200    return buffer.toString();
1201  }
1202  
1203  /* Helper function for constructor */
1204  private void addToken(REToken next) {
1205    if (next == null) return;
1206    minimumLength += next.getMinimumLength();
1207    if (firstToken == null) {
1208    lastToken = firstToken = next;
1209    } else {
1210      // if chain returns false, it "rejected" the token due to
1211      // an optimization, and next was combined with lastToken
1212      if (lastToken.chain(next)) {
1213      lastToken = next;
1214      }
1215    }
1216  }
1217
1218  private static REToken setRepeated(REToken current, int min, int max, int index) throws REException {
1219    if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
1220    return new RETokenRepeated(current.subIndex,current,min,max);
1221  }
1222
1223  private static int getPosixSet(char[] pattern,int index,StringBuffer   buf) {
1224    // Precondition: pattern[index-1] == ':'
1225    // we will return pos of closing ']'.
1226    int i;
1227    for (i=index; i<(pattern.length-1); i++) {
1228      if ((pattern[i] == ':') && (pattern[i+1] == ']'))
1229    return i+2;
1230      buf.append(pattern[i]);
1231    }
1232    return index; // didn't match up
1233  }
1234
1235  private int getMinMax(char[] input,int index,IntPair minMax,RESyntax syntax) throws REException {
1236    // Precondition: input[index-1] == '{', minMax != null
1237
1238    boolean mustMatch = !syntax.get(RESyntax.RE_NO_BK_BRACES);
1239    int startIndex = index;
1240    if (index == input.length) {
1241      if (mustMatch)
1242        throw new REException(getLocalizedMessage("unmatched.brace"),REException.REG_EBRACE,index);
1243      else
1244        return startIndex;
1245    }
1246    
1247    int min,max=0;
1248    CharUnit unit = new CharUnit();
1249    StringBuffer   buf = new StringBuffer  ();
1250    
1251    // Read string of digits
1252    do {
1253      index = getCharUnit(input,index,unit);
1254      if (Character.isDigit(unit.ch))
1255        buf.append(unit.ch);
1256    } while ((index != input.length) && Character.isDigit(unit.ch));
1257
1258    // Check for {} tomfoolery
1259    if (buf.length() == 0) {
1260      if (mustMatch)
1261        throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1262      else
1263        return startIndex;
1264    }
1265
1266    min = Integer.parseInt(buf.toString());
1267    
1268    if ((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk))
1269      max = min;
1270    else if (index == input.length)
1271      if (mustMatch)
1272        throw new REException(getLocalizedMessage("interval.no.end"),REException.REG_EBRACE,index);
1273      else
1274        return startIndex;
1275    else if ((unit.ch == ',') && !unit.bk) {
1276      buf = new StringBuffer  ();
1277      // Read string of digits
1278      while (((index = getCharUnit(input,index,unit)) != input.length) && Character.isDigit(unit.ch))
1279    buf.append(unit.ch);
1280
1281      if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))
1282        if (mustMatch)
1283          throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1284        else
1285          return startIndex;
1286
1287      // This is the case of {x,}
1288      if (buf.length() == 0) max = Integer.MAX_VALUE;
1289      else max = Integer.parseInt(buf.toString());
1290    } else
1291      if (mustMatch)
1292        throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1293      else
1294        return startIndex;
1295
1296    // We know min and max now, and they are valid.
1297
1298    minMax.first = min;
1299    minMax.second = max;
1300
1301    // return the index following the '}'
1302    return index;
1303  }
1304
1305   /**
1306    * Return a human readable form of the compiled regular expression,
1307    * useful for debugging.
1308    */
1309   public String   toString() {
1310     StringBuffer   sb = new StringBuffer  ();
1311     dump(sb);
1312     return sb.toString();
1313   }
1314
1315  void dump(StringBuffer   os) {
1316    os.append('(');
1317    if (subIndex == 0)
1318      os.append("?:");
1319    if (firstToken != null)
1320      firstToken.dumpAll(os);
1321    os.append(')');
1322  }
1323
1324  // Cast input appropriately or throw exception
1325  private static CharIndexed makeCharIndexed(Object   input, int index) {
1326      // We could let a String fall through to final input, but since
1327      // it's the most likely input type, we check it first.
1328    if (input instanceof String  )
1329      return new CharIndexedString((String  ) input,index);
1330    else if (input instanceof char[])
1331      return new CharIndexedCharArray((char[]) input,index);
1332    else if (input instanceof StringBuffer  )
1333      return new CharIndexedStringBuffer((StringBuffer  ) input,index);
1334    else if (input instanceof InputStream  )
1335      return new CharIndexedInputStream((InputStream  ) input,index);
1336    else if (input instanceof Reader  )
1337    return new CharIndexedReader((Reader  ) input, index);
1338    else if (input instanceof CharIndexed)
1339    return (CharIndexed) input; // do we lose index info?
1340    else 
1341    return new CharIndexedString(input.toString(), index);
1342  }
1343}
1344
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags