KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > gnu > regexp > RE


1 /*
2  * gnu/regexp/RE.java
3  * Copyright (C) 1998-2001 Wes Biggs
4  *
5  * This library is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published
7  * by the Free Software Foundation; either version 2.1 of the License, or
8  * (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18  */

19
20 package gnu.regexp;
21 import java.io.InputStream JavaDoc;
22 import java.io.Reader JavaDoc;
23 import java.io.Serializable JavaDoc;
24 import java.util.Locale JavaDoc;
25 import java.util.PropertyResourceBundle JavaDoc;
26 import java.util.ResourceBundle JavaDoc;
27 import java.util.Vector JavaDoc;
28
29 class IntPair implements Serializable JavaDoc {
30   public int first, second;
31 }
32
33 class CharUnit implements Serializable JavaDoc {
34   public char ch;
35   public boolean bk;
36 }
37
38 /**
39  * RE provides the user interface for compiling and matching regular
40  * expressions.
41  * <P>
42  * A regular expression object (class RE) is compiled by constructing it
43  * from a String, StringBuffer or character array, with optional
44  * compilation flags (below)
45  * and an optional syntax specification (see RESyntax; if not specified,
46  * <code>RESyntax.RE_SYNTAX_PERL5</code> is used).
47  * <P>
48  * Various methods attempt to match input text against a compiled
49  * regular expression. These methods are:
50  * <LI><code>isMatch</code>: returns true if the input text in its entirety
51  * matches the regular expression pattern.
52  * <LI><code>getMatch</code>: returns the first match found in the input text,
53  * or null if no match is found.
54  * <LI><code>getAllMatches</code>: returns an array of all non-overlapping
55  * matches found in the input text. If no matches are found, the array is
56  * zero-length.
57  * <LI><code>substitute</code>: substitute the first occurence of the pattern
58  * in the input text with a replacement string (which may include
59  * metacharacters $0-$9, see REMatch.substituteInto).
60  * <LI><code>substituteAll</code>: same as above, but repeat for each match
61  * before returning.
62  * <LI><code>getMatchEnumeration</code>: returns an REMatchEnumeration object
63  * that allows iteration over the matches (see REMatchEnumeration for some
64  * reasons why you may want to do this instead of using <code>getAllMatches</code>.
65  * <P>
66  *
67  * These methods all have similar argument lists. The input can be a
68  * String, a character array, a StringBuffer, a Reader or an
69  * InputStream of some sort. Note that when using a Reader or
70  * InputStream, the stream read position cannot be guaranteed after
71  * attempting a match (this is not a bug, but a consequence of the way
72  * regular expressions work). Using an REMatchEnumeration can
73  * eliminate most positioning problems.
74  *
75  * <P>
76  *
77  * The optional index argument specifies the offset from the beginning
78  * of the text at which the search should start (see the descriptions
79  * of some of the execution flags for how this can affect positional
80  * pattern operators). For a Reader or InputStream, this means an
81  * offset from the current read position, so subsequent calls with the
82  * same index argument on a Reader or an InputStream will not
83  * necessarily access the same position on the stream, whereas
84  * repeated searches at a given index in a fixed string will return
85  * consistent results.
86  *
87  * <P>
88  * You can optionally affect the execution environment by using a
89  * combination of execution flags (constants listed below).
90  *
91  * <P>
92  * All operations on a regular expression are performed in a
93  * thread-safe manner.
94  *
95  * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
96  * @version 1.1.4-dev, to be released
97  */

98
99 public class RE extends REToken {
100   // This String will be returned by getVersion()
101
private static final String JavaDoc VERSION = "1.1.4-dev";
102
103   // The localized strings are kept in a separate file
104
private static ResourceBundle JavaDoc messages = PropertyResourceBundle.getBundle("gnu/regexp/MessagesBundle", Locale.getDefault());
105
106   // These are, respectively, the first and last tokens in our linked list
107
// If there is only one token, firstToken == lastToken
108
private REToken firstToken, lastToken;
109
110   // This is the number of subexpressions in this regular expression,
111
// with a minimum value of zero. Returned by getNumSubs()
112
private int numSubs;
113
114     /** Minimum length, in characters, of any possible match. */
115     private int minimumLength;
116
117   /**
118    * Compilation flag. Do not differentiate case. Subsequent
119    * searches using this RE will be case insensitive.
120    */

121   public static final int REG_ICASE = 2;
122
123   /**
124    * Compilation flag. The match-any-character operator (dot)
125    * will match a newline character. When set this overrides the syntax
126    * bit RE_DOT_NEWLINE (see RESyntax for details). This is equivalent to
127    * the "/s" operator in Perl.
128    */

129   public static final int REG_DOT_NEWLINE = 4;
130
131   /**
132    * Compilation flag. Use multiline mode. In this mode, the ^ and $
133    * anchors will match based on newlines within the input. This is
134    * equivalent to the "/m" operator in Perl.
135    */

136   public static final int REG_MULTILINE = 8;
137
138   /**
139    * Execution flag.
140    * The match-beginning operator (^) will not match at the beginning
141    * of the input string. Useful for matching on a substring when you
142    * know the context of the input is such that position zero of the
143    * input to the match test is not actually position zero of the text.
144    * <P>
145    * This example demonstrates the results of various ways of matching on
146    * a substring.
147    * <P>
148    * <CODE>
149    * String s = "food bar fool";<BR>
150    * RE exp = new RE("^foo.");<BR>
151    * REMatch m0 = exp.getMatch(s);<BR>
152    * REMatch m1 = exp.getMatch(s.substring(8));<BR>
153    * REMatch m2 = exp.getMatch(s.substring(8),0,RE.REG_NOTBOL); <BR>
154    * REMatch m3 = exp.getMatch(s,8); <BR>
155    * REMatch m4 = exp.getMatch(s,8,RE.REG_ANCHORINDEX); <BR>
156    * <P>
157    * // Results:<BR>
158    * // m0 = "food"<BR>
159    * // m1 = "fool"<BR>
160    * // m2 = null<BR>
161    * // m3 = null<BR>
162    * // m4 = "fool"<BR>
163    * </CODE>
164    */

165   public static final int REG_NOTBOL = 16;
166
167   /**
168    * Execution flag.
169    * The match-end operator ($) does not match at the end
170    * of the input string. Useful for matching on substrings.
171    */

172   public static final int REG_NOTEOL = 32;
173
174   /**
175    * Execution flag.
176    * When a match method is invoked that starts matching at a non-zero
177    * index into the input, treat the input as if it begins at the index
178    * given. The effect of this flag is that the engine does not "see"
179    * any text in the input before the given index. This is useful so
180    * that the match-beginning operator (^) matches not at position 0
181    * in the input string, but at the position the search started at
182    * (based on the index input given to the getMatch function). See
183    * the example under REG_NOTBOL. It also affects the use of the \&lt;
184    * and \b operators.
185    */

186   public static final int REG_ANCHORINDEX = 64;
187
188   /**
189    * Execution flag.
190    * The substitute and substituteAll methods will not attempt to
191    * interpolate occurrences of $1-$9 in the replacement text with
192    * the corresponding subexpressions. For example, you may want to
193    * replace all matches of "one dollar" with "$1".
194    */

195   public static final int REG_NO_INTERPOLATE = 128;
196
197   /** Returns a string representing the version of the gnu.regexp package. */
198   public static final String JavaDoc version() {
199     return VERSION;
200   }
201
202   // Retrieves a message from the ResourceBundle
203
static final String JavaDoc getLocalizedMessage(String JavaDoc key) {
204     return messages.getString(key);
205   }
206
207   /**
208    * Constructs a regular expression pattern buffer without any compilation
209    * flags set, and using the default syntax (RESyntax.RE_SYNTAX_PERL5).
210    *
211    * @param pattern A regular expression pattern, in the form of a String,
212    * StringBuffer or char[]. Other input types will be converted to
213    * strings using the toString() method.
214    * @exception REException The input pattern could not be parsed.
215    * @exception NullPointerException The pattern was null.
216    */

217   public RE(Object JavaDoc pattern) throws REException {
218     this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0);
219   }
220
221   /**
222    * Constructs a regular expression pattern buffer using the specified
223    * compilation flags and the default syntax (RESyntax.RE_SYNTAX_PERL5).
224    *
225    * @param pattern A regular expression pattern, in the form of a String,
226    * StringBuffer, or char[]. Other input types will be converted to
227    * strings using the toString() method.
228    * @param cflags The logical OR of any combination of the compilation flags listed above.
229    * @exception REException The input pattern could not be parsed.
230    * @exception NullPointerException The pattern was null.
231    */

232   public RE(Object JavaDoc pattern, int cflags) throws REException {
233     this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0);
234   }
235
236   /**
237    * Constructs a regular expression pattern buffer using the specified
238    * compilation flags and regular expression syntax.
239    *
240    * @param pattern A regular expression pattern, in the form of a String,
241    * StringBuffer, or char[]. Other input types will be converted to
242    * strings using the toString() method.
243    * @param cflags The logical OR of any combination of the compilation flags listed above.
244    * @param syntax The type of regular expression syntax to use.
245    * @exception REException The input pattern could not be parsed.
246    * @exception NullPointerException The pattern was null.
247    */

248   public RE(Object JavaDoc pattern, int cflags, RESyntax syntax) throws REException {
249     this(pattern,cflags,syntax,0,0);
250   }
251
252   // internal constructor used for alternation
253
private RE(REToken first, REToken last,int subs, int subIndex, int minLength) {
254     super(subIndex);
255     firstToken = first;
256     lastToken = last;
257     numSubs = subs;
258     minimumLength = minLength;
259     addToken(new RETokenEndSub(subIndex));
260   }
261
262   private RE(Object JavaDoc patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
263     super(myIndex); // Subexpression index of this token.
264
initialize(patternObj, cflags, syntax, myIndex, nextSub);
265   }
266
267     // For use by subclasses
268
protected RE() { super(0); }
269
270     // The meat of construction
271
protected void initialize(Object JavaDoc patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
272       char[] pattern;
273     if (patternObj instanceof String JavaDoc) {
274       pattern = ((String JavaDoc) patternObj).toCharArray();
275     } else if (patternObj instanceof char[]) {
276       pattern = (char[]) patternObj;
277     } else if (patternObj instanceof StringBuffer JavaDoc) {
278       pattern = new char [((StringBuffer JavaDoc) patternObj).length()];
279       ((StringBuffer JavaDoc) patternObj).getChars(0,pattern.length,pattern,0);
280     } else {
281     pattern = patternObj.toString().toCharArray();
282     }
283
284     int pLength = pattern.length;
285
286     numSubs = 0; // Number of subexpressions in this token.
287
Vector JavaDoc branches = null;
288
289     // linked list of tokens (sort of -- some closed loops can exist)
290
firstToken = lastToken = null;
291
292     // Precalculate these so we don't pay for the math every time we
293
// need to access them.
294
boolean insens = ((cflags & REG_ICASE) > 0);
295
296     // Parse pattern into tokens. Does anyone know if it's more efficient
297
// to use char[] than a String.charAt()? I'm assuming so.
298

299     // index tracks the position in the char array
300
int index = 0;
301
302     // this will be the current parse character (pattern[index])
303
CharUnit unit = new CharUnit();
304
305     // This is used for {x,y} calculations
306
IntPair minMax = new IntPair();
307
308     // Buffer a token so we can create a TokenRepeated, etc.
309
REToken currentToken = null;
310     char ch;
311
312     while (index < pLength) {
313       // read the next character unit (including backslash escapes)
314
index = getCharUnit(pattern,index,unit);
315
316       // ALTERNATION OPERATOR
317
// \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
318
// not available if RE_LIMITED_OPS is set
319

320       // TODO: the '\n' literal here should be a test against REToken.newline,
321
// which unfortunately may be more than a single character.
322
if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk))
323          || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !unit.bk) )
324        && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
325     // make everything up to here be a branch. create vector if nec.
326
addToken(currentToken);
327     RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength);
328     minimumLength = 0;
329     if (branches == null) {
330         branches = new Vector JavaDoc();
331     }
332     branches.addElement(theBranch);
333     firstToken = lastToken = currentToken = null;
334       }
335       
336       // INTERVAL OPERATOR:
337
// {x} | {x,} | {x,y} (RE_INTERVALS && RE_NO_BK_BRACES)
338
// \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES)
339
//
340
// OPEN QUESTION:
341
// what is proper interpretation of '{' at start of string?
342

343       else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) {
344     int newIndex = getMinMax(pattern,index,minMax,syntax);
345         if (newIndex > index) {
346           if (minMax.first > minMax.second)
347             throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);
348           if (currentToken == null)
349             throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);
350           if (currentToken instanceof RETokenRepeated)
351             throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);
352           if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
353             throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);
354           if ((currentToken.getMinimumLength() == 0) && (minMax.second == Integer.MAX_VALUE))
355             throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,newIndex);
356           index = newIndex;
357           currentToken = setRepeated(currentToken,minMax.first,minMax.second,index);
358         }
359         else {
360           addToken(currentToken);
361           currentToken = new RETokenChar(subIndex,unit.ch,insens);
362         }
363       }
364       
365       // LIST OPERATOR:
366
// [...] | [^...]
367

368       else if ((unit.ch == '[') && !unit.bk) {
369     Vector JavaDoc options = new Vector JavaDoc();
370     boolean negative = false;
371     char lastChar = 0;
372     if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
373     
374     // Check for initial caret, negation
375
if ((ch = pattern[index]) == '^') {
376       negative = true;
377       if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
378       ch = pattern[index];
379     }
380
381     // Check for leading right bracket literal
382
if (ch == ']') {
383       lastChar = ch;
384       if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
385     }
386
387     while ((ch = pattern[index++]) != ']') {
388       if ((ch == '-') && (lastChar != 0)) {
389         if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
390         if ((ch = pattern[index]) == ']') {
391           options.addElement(new RETokenChar(subIndex,lastChar,insens));
392           lastChar = '-';
393         } else {
394           options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
395           lastChar = 0;
396           index++;
397         }
398           } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
399             if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
400         int posixID = -1;
401         boolean negate = false;
402             char asciiEsc = 0;
403         if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
404           switch (pattern[index]) {
405           case 'D':
406         negate = true;
407           case 'd':
408         posixID = RETokenPOSIX.DIGIT;
409         break;
410           case 'S':
411         negate = true;
412           case 's':
413         posixID = RETokenPOSIX.SPACE;
414         break;
415           case 'W':
416         negate = true;
417           case 'w':
418         posixID = RETokenPOSIX.ALNUM;
419         break;
420           }
421         }
422             else if ("nrt".indexOf(pattern[index]) != -1) {
423               switch (pattern[index]) {
424                 case 'n':
425                   asciiEsc = '\n';
426                   break;
427                 case 't':
428                   asciiEsc = '\t';
429                   break;
430                 case 'r':
431                   asciiEsc = '\r';
432                   break;
433               }
434             }
435         if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
436         
437         if (posixID != -1) {
438           options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
439         } else if (asciiEsc != 0) {
440           lastChar = asciiEsc;
441         } else {
442           lastChar = pattern[index];
443         }
444         ++index;
445       } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
446         StringBuffer JavaDoc posixSet = new StringBuffer JavaDoc();
447         index = getPosixSet(pattern,index+1,posixSet);
448         int posixId = RETokenPOSIX.intValue(posixSet.toString());
449         if (posixId != -1)
450           options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
451       } else {
452         if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
453         lastChar = ch;
454       }
455       if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
456     } // while in list
457
// Out of list, index is one past ']'
458

459     if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
460         
461     // Create a new RETokenOneOf
462
addToken(currentToken);
463     options.trimToSize();
464     currentToken = new RETokenOneOf(subIndex,options,negative);
465       }
466
467       // SUBEXPRESSIONS
468
// (...) | \(...\) depending on RE_NO_BK_PARENS
469

470       else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) {
471     boolean pure = false;
472     boolean comment = false;
473         boolean lookAhead = false;
474         boolean negativelh = false;
475     if ((index+1 < pLength) && (pattern[index] == '?')) {
476       switch (pattern[index+1]) {
477           case '!':
478             if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
479               pure = true;
480               negativelh = true;
481               lookAhead = true;
482               index += 2;
483             }
484             break;
485           case '=':
486             if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
487               pure = true;
488               lookAhead = true;
489               index += 2;
490             }
491             break;
492       case ':':
493         if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
494           pure = true;
495           index += 2;
496         }
497         break;
498       case '#':
499         if (syntax.get(RESyntax.RE_COMMENTS)) {
500           comment = true;
501         }
502         break;
503           default:
504             throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
505       }
506     }
507
508     if (index >= pLength) {
509         throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
510     }
511
512     // find end of subexpression
513
int endIndex = index;
514     int nextIndex = index;
515     int nested = 0;
516
517     while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0)
518         && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) )
519       if ((endIndex = nextIndex) >= pLength)
520         throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
521       else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
522         nested++;
523       else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
524         nested--;
525
526     // endIndex is now position at a ')','\)'
527
// nextIndex is end of string or position after ')' or '\)'
528

529     if (comment) index = nextIndex;
530     else { // not a comment
531
// create RE subexpression as token.
532
addToken(currentToken);
533       if (!pure) {
534         numSubs++;
535       }
536
537       int useIndex = (pure || lookAhead) ? 0 : nextSub + numSubs;
538       currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
539       numSubs += ((RE) currentToken).getNumSubs();
540
541           if (lookAhead) {
542           currentToken = new RETokenLookAhead(currentToken,negativelh);
543       }
544
545       index = nextIndex;
546     } // not a comment
547
} // subexpression
548

549       // UNMATCHED RIGHT PAREN
550
// ) or \) throw exception if
551
// !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
552
else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) {
553     throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
554       }
555
556       // START OF LINE OPERATOR
557
// ^
558

559       else if ((unit.ch == '^') && !unit.bk) {
560     addToken(currentToken);
561     currentToken = null;
562     addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
563       }
564
565       // END OF LINE OPERATOR
566
// $
567

568       else if ((unit.ch == '$') && !unit.bk) {
569     addToken(currentToken);
570     currentToken = null;
571     addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
572       }
573
574       // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
575
// .
576

577       else if ((unit.ch == '.') && !unit.bk) {
578     addToken(currentToken);
579     currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
580       }
581
582       // ZERO-OR-MORE REPEAT OPERATOR
583
// *
584

585       else if ((unit.ch == '*') && !unit.bk) {
586     if (currentToken == null)
587           throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
588     if (currentToken instanceof RETokenRepeated)
589           throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
590     if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
591       throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
592     if (currentToken.getMinimumLength() == 0)
593       throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
594     currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
595       }
596
597       // ONE-OR-MORE REPEAT OPERATOR
598
// + | \+ depending on RE_BK_PLUS_QM
599
// not available if RE_LIMITED_OPS is set
600

601       else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
602     if (currentToken == null)
603           throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
604     if (currentToken instanceof RETokenRepeated)
605           throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
606     if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
607       throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
608     if (currentToken.getMinimumLength() == 0)
609       throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
610     currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
611       }
612
613       // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR
614
// ? | \? depending on RE_BK_PLUS_QM
615
// not available if RE_LIMITED_OPS is set
616
// stingy matching if RE_STINGY_OPS is set and it follows a quantifier
617

618       else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
619     if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
620
621     // Check for stingy matching on RETokenRepeated
622
if (currentToken instanceof RETokenRepeated) {
623           if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy())
624             ((RETokenRepeated)currentToken).makeStingy();
625           else
626             throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
627         }
628         else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
629           throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
630     else
631       currentToken = setRepeated(currentToken,0,1,index);
632       }
633     
634       // BACKREFERENCE OPERATOR
635
// \1 \2 ... \9
636
// not available if RE_NO_BK_REFS is set
637

638       else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
639     addToken(currentToken);
640     currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens);
641       }
642
643       // START OF STRING OPERATOR
644
// \A if RE_STRING_ANCHORS is set
645

646       else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
647     addToken(currentToken);
648     currentToken = new RETokenStart(subIndex,null);
649       }
650
651       // WORD BREAK OPERATOR
652
// \b if ????
653

654       else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
655       addToken(currentToken);
656       currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);
657       }
658
659       // WORD BEGIN OPERATOR
660
// \< if ????
661
else if (unit.bk && (unit.ch == '<')) {
662       addToken(currentToken);
663       currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);
664       }
665
666       // WORD END OPERATOR
667
// \> if ????
668
else if (unit.bk && (unit.ch == '>')) {
669       addToken(currentToken);
670       currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);
671       }
672
673       // NON-WORD BREAK OPERATOR
674
// \B if ????
675

676       else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
677       addToken(currentToken);
678       currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);
679       }
680
681       
682       // DIGIT OPERATOR
683
// \d if RE_CHAR_CLASS_ESCAPES is set
684

685       else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
686     addToken(currentToken);
687     currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
688       }
689
690       // NON-DIGIT OPERATOR
691
// \D
692

693     else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
694       addToken(currentToken);
695       currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
696     }
697
698     // NEWLINE ESCAPE
699
// \n
700

701     else if (unit.bk && (unit.ch == 'n')) {
702       addToken(currentToken);
703       currentToken = new RETokenChar(subIndex,'\n',false);
704     }
705
706     // RETURN ESCAPE
707
// \r
708

709     else if (unit.bk && (unit.ch == 'r')) {
710       addToken(currentToken);
711       currentToken = new RETokenChar(subIndex,'\r',false);
712     }
713
714     // WHITESPACE OPERATOR
715
// \s if RE_CHAR_CLASS_ESCAPES is set
716

717     else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
718       addToken(currentToken);
719       currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
720     }
721
722     // NON-WHITESPACE OPERATOR
723
// \S
724

725     else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
726       addToken(currentToken);
727       currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
728     }
729
730     // TAB ESCAPE
731
// \t
732

733     else if (unit.bk && (unit.ch == 't')) {
734       addToken(currentToken);
735       currentToken = new RETokenChar(subIndex,'\t',false);
736     }
737
738     // ALPHANUMERIC OPERATOR
739
// \w
740

741     else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
742       addToken(currentToken);
743       currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
744     }
745
746     // NON-ALPHANUMERIC OPERATOR
747
// \W
748

749     else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
750       addToken(currentToken);
751       currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
752     }
753
754     // END OF STRING OPERATOR
755
// \Z
756

757     else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
758       addToken(currentToken);
759       currentToken = new RETokenEnd(subIndex,null);
760     }
761
762     // NON-SPECIAL CHARACTER (or escape to make literal)
763
// c | \* for example
764

765     else { // not a special character
766
addToken(currentToken);
767       currentToken = new RETokenChar(subIndex,unit.ch,insens);
768     }
769       } // end while
770

771     // Add final buffered token and an EndSub marker
772
addToken(currentToken);
773       
774     if (branches != null) {
775     branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength));
776     branches.trimToSize(); // compact the Vector
777
minimumLength = 0;
778     firstToken = lastToken = null;
779     addToken(new RETokenOneOf(subIndex,branches,false));
780     }
781     else addToken(new RETokenEndSub(subIndex));
782
783   }
784
785   private static int getCharUnit(char[] input, int index, CharUnit unit) throws REException {
786     unit.ch = input[index++];
787     if (unit.bk = (unit.ch == '\\'))
788       if (index < input.length)
789     unit.ch = input[index++];
790       else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
791     return index;
792   }
793
794   /**
795    * Checks if the regular expression matches the input in its entirety.
796    *
797    * @param input The input text.
798    */

799   public boolean isMatch(Object JavaDoc input) {
800     return isMatch(input,0,0);
801   }
802   
803   /**
804    * Checks if the input string, starting from index, is an exact match of
805    * this regular expression.
806    *
807    * @param input The input text.
808    * @param index The offset index at which the search should be begin.
809    */

810   public boolean isMatch(Object JavaDoc input,int index) {
811     return isMatch(input,index,0);
812   }
813   
814
815   /**
816    * Checks if the input, starting from index and using the specified
817    * execution flags, is an exact match of this regular expression.
818    *
819    * @param input The input text.
820    * @param index The offset index at which the search should be begin.
821    * @param eflags The logical OR of any execution flags above.
822    */

823   public boolean isMatch(Object JavaDoc input,int index,int eflags) {
824     return isMatchImpl(makeCharIndexed(input,index),index,eflags);
825   }
826
827   private boolean isMatchImpl(CharIndexed input, int index, int eflags) {
828     if (firstToken == null) // Trivial case
829
return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
830     REMatch m = new REMatch(numSubs, index, eflags);
831     if (firstToken.match(input, m)) {
832     while (m != null) {
833         if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
834         return true;
835         }
836         m = m.next;
837     }
838     }
839     return false;
840   }
841     
842   /**
843    * Returns the maximum number of subexpressions in this regular expression.
844    * If the expression contains branches, the value returned will be the
845    * maximum subexpressions in any of the branches.
846    */

847   public int getNumSubs() {
848     return numSubs;
849   }
850
851   // Overrides REToken.setUncle
852
void setUncle(REToken uncle) {
853       if (lastToken != null) {
854       lastToken.setUncle(uncle);
855       } else super.setUncle(uncle); // to deal with empty subexpressions
856
}
857
858   // Overrides REToken.chain
859

860   boolean chain(REToken next) {
861     super.chain(next);
862     setUncle(next);
863     return true;
864   }
865
866   /**
867    * Returns the minimum number of characters that could possibly
868    * constitute a match of this regular expression.
869    */

870   public int getMinimumLength() {
871       return minimumLength;
872   }
873
874   /**
875    * Returns an array of all matches found in the input.
876    *
877    * If the regular expression allows the empty string to match, it will
878    * substitute matches at all positions except the end of the input.
879    *
880    * @param input The input text.
881    * @return a non-null (but possibly zero-length) array of matches
882    */

883   public REMatch[] getAllMatches(Object JavaDoc input) {
884     return getAllMatches(input,0,0);
885   }
886
887   /**
888    * Returns an array of all matches found in the input,
889    * beginning at the specified index position.
890    *
891    * If the regular expression allows the empty string to match, it will
892    * substitute matches at all positions except the end of the input.
893    *
894    * @param input The input text.
895    * @param index The offset index at which the search should be begin.
896    * @return a non-null (but possibly zero-length) array of matches
897    */

898   public REMatch[] getAllMatches(Object JavaDoc input, int index) {
899     return getAllMatches(input,index,0);
900   }
901
902   /**
903    * Returns an array of all matches found in the input string,
904    * beginning at the specified index position and using the specified
905    * execution flags.
906    *
907    * If the regular expression allows the empty string to match, it will
908    * substitute matches at all positions except the end of the input.
909    *
910    * @param input The input text.
911    * @param index The offset index at which the search should be begin.
912    * @param eflags The logical OR of any execution flags above.
913    * @return a non-null (but possibly zero-length) array of matches
914    */

915   public REMatch[] getAllMatches(Object JavaDoc input, int index, int eflags) {
916     return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags);
917   }
918
919   // this has been changed since 1.03 to be non-overlapping matches
920
private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) {
921     Vector JavaDoc all = new Vector JavaDoc();
922     REMatch m = null;
923     while ((m = getMatchImpl(input,index,eflags,null)) != null) {
924       all.addElement(m);
925       index = m.getEndIndex();
926       if (m.end[0] == 0) { // handle pathological case of zero-length match
927
index++;
928     input.move(1);
929       } else {
930     input.move(m.end[0]);
931       }
932       if (!input.isValid()) break;
933     }
934     REMatch[] mset = new REMatch[all.size()];
935     all.copyInto(mset);
936     return mset;
937   }
938   
939     /* Implements abstract method REToken.match() */
940     boolean match(CharIndexed input, REMatch mymatch) {
941     if (firstToken == null) return next(input, mymatch);
942
943     // Note the start of this subexpression
944
mymatch.start[subIndex] = mymatch.index;
945
946     return firstToken.match(input, mymatch);
947     }
948   
949   /**
950    * Returns the first match found in the input. If no match is found,
951    * null is returned.
952    *
953    * @param input The input text.
954    * @return An REMatch instance referencing the match, or null if none.
955    */

956   public REMatch getMatch(Object JavaDoc input) {
957     return getMatch(input,0,0);
958   }
959   
960   /**
961    * Returns the first match found in the input, beginning
962    * the search at the specified index. If no match is found,
963    * returns null.
964    *
965    * @param input The input text.
966    * @param index The offset within the text to begin looking for a match.
967    * @return An REMatch instance referencing the match, or null if none.
968    */

969   public REMatch getMatch(Object JavaDoc input, int index) {
970     return getMatch(input,index,0);
971   }
972   
973   /**
974    * Returns the first match found in the input, beginning
975    * the search at the specified index, and using the specified
976    * execution flags. If no match is found, returns null.
977    *
978    * @param input The input text.
979    * @param index The offset index at which the search should be begin.
980    * @param eflags The logical OR of any execution flags above.
981    * @return An REMatch instance referencing the match, or null if none.
982    */

983   public REMatch getMatch(Object JavaDoc input, int index, int eflags) {
984     return getMatch(input,index,eflags,null);
985   }
986
987   /**
988    * Returns the first match found in the input, beginning the search
989    * at the specified index, and using the specified execution flags.
990    * If no match is found, returns null. If a StringBuffer is
991    * provided and is non-null, the contents of the input text from the
992    * index to the beginning of the match (or to the end of the input,
993    * if there is no match) are appended to the StringBuffer.
994    *
995    * @param input The input text.
996    * @param index The offset index at which the search should be begin.
997    * @param eflags The logical OR of any execution flags above.
998    * @param buffer The StringBuffer to save pre-match text in.
999    * @return An REMatch instance referencing the match, or null if none. */

1000  public REMatch getMatch(Object JavaDoc input, int index, int eflags, StringBuffer JavaDoc buffer) {
1001    return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer);
1002  }
1003
1004  REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer JavaDoc buffer) {
1005      // Create a new REMatch to hold results
1006
REMatch mymatch = new REMatch(numSubs, anchor, eflags);
1007      do {
1008      // Optimization: check if anchor + minimumLength > length
1009
if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
1010          if (match(input, mymatch)) {
1011          // Find longest match of them all to observe leftmost longest
1012
REMatch longest = mymatch;
1013          while ((mymatch = mymatch.next) != null) {
1014              if (mymatch.index > longest.index) {
1015              longest = mymatch;
1016              }
1017          }
1018          
1019          longest.end[0] = longest.index;
1020          longest.finish(input);
1021          return longest;
1022          }
1023      }
1024      mymatch.clear(++anchor);
1025      // Append character to buffer if needed
1026
if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
1027          buffer.append(input.charAt(0));
1028      }
1029      } while (input.move(1));
1030      
1031      return null;
1032  }
1033
1034  /**
1035   * Returns an REMatchEnumeration that can be used to iterate over the
1036   * matches found in the input text.
1037   *
1038   * @param input The input text.
1039   * @return A non-null REMatchEnumeration instance.
1040   */

1041  public REMatchEnumeration getMatchEnumeration(Object JavaDoc input) {
1042    return getMatchEnumeration(input,0,0);
1043  }
1044
1045
1046  /**
1047   * Returns an REMatchEnumeration that can be used to iterate over the
1048   * matches found in the input text.
1049   *
1050   * @param input The input text.
1051   * @param index The offset index at which the search should be begin.
1052   * @return A non-null REMatchEnumeration instance, with its input cursor
1053   * set to the index position specified.
1054   */

1055  public REMatchEnumeration getMatchEnumeration(Object JavaDoc input, int index) {
1056    return getMatchEnumeration(input,index,0);
1057  }
1058
1059  /**
1060   * Returns an REMatchEnumeration that can be used to iterate over the
1061   * matches found in the input text.
1062   *
1063   * @param input The input text.
1064   * @param index The offset index at which the search should be begin.
1065   * @param eflags The logical OR of any execution flags above.
1066   * @return A non-null REMatchEnumeration instance, with its input cursor
1067   * set to the index position specified.
1068   */

1069  public REMatchEnumeration getMatchEnumeration(Object JavaDoc input, int index, int eflags) {
1070    return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags);
1071  }
1072
1073
1074  /**
1075   * Substitutes the replacement text for the first match found in the input.
1076   *
1077   * @param input The input text.
1078   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1079   * @return A String interpolating the substituted text.
1080   * @see REMatch#substituteInto
1081   */

1082  public String JavaDoc substitute(Object JavaDoc input,String JavaDoc replace) {
1083    return substitute(input,replace,0,0);
1084  }
1085
1086  /**
1087   * Substitutes the replacement text for the first match found in the input
1088   * beginning at the specified index position. Specifying an index
1089   * effectively causes the regular expression engine to throw away the
1090   * specified number of characters.
1091   *
1092   * @param input The input text.
1093   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1094   * @param index The offset index at which the search should be begin.
1095   * @return A String containing the substring of the input, starting
1096   * at the index position, and interpolating the substituted text.
1097   * @see REMatch#substituteInto
1098   */

1099  public String JavaDoc substitute(Object JavaDoc input,String JavaDoc replace,int index) {
1100    return substitute(input,replace,index,0);
1101  }
1102
1103  /**
1104   * Substitutes the replacement text for the first match found in the input
1105   * string, beginning at the specified index position and using the
1106   * specified execution flags.
1107   *
1108   * @param input The input text.
1109   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1110   * @param index The offset index at which the search should be begin.
1111   * @param eflags The logical OR of any execution flags above.
1112   * @return A String containing the substring of the input, starting
1113   * at the index position, and interpolating the substituted text.
1114   * @see REMatch#substituteInto
1115   */

1116  public String JavaDoc substitute(Object JavaDoc input,String JavaDoc replace,int index,int eflags) {
1117    return substituteImpl(makeCharIndexed(input,index),replace,index,eflags);
1118  }
1119
1120  private String JavaDoc substituteImpl(CharIndexed input,String JavaDoc replace,int index,int eflags) {
1121    StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
1122    REMatch m = getMatchImpl(input,index,eflags,buffer);
1123    if (m==null) return buffer.toString();
1124    buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1125           replace : m.substituteInto(replace) );
1126    if (input.move(m.end[0])) {
1127      do {
1128    buffer.append(input.charAt(0));
1129      } while (input.move(1));
1130    }
1131    return buffer.toString();
1132  }
1133  
1134  /**
1135   * Substitutes the replacement text for each non-overlapping match found
1136   * in the input text.
1137   *
1138   * @param input The input text.
1139   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1140   * @return A String interpolating the substituted text.
1141   * @see REMatch#substituteInto
1142   */

1143  public String JavaDoc substituteAll(Object JavaDoc input,String JavaDoc replace) {
1144    return substituteAll(input,replace,0,0);
1145  }
1146
1147  /**
1148   * Substitutes the replacement text for each non-overlapping match found
1149   * in the input text, starting at the specified index.
1150   *
1151   * If the regular expression allows the empty string to match, it will
1152   * substitute matches at all positions except the end of the input.
1153   *
1154   * @param input The input text.
1155   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1156   * @param index The offset index at which the search should be begin.
1157   * @return A String containing the substring of the input, starting
1158   * at the index position, and interpolating the substituted text.
1159   * @see REMatch#substituteInto
1160   */

1161  public String JavaDoc substituteAll(Object JavaDoc input,String JavaDoc replace,int index) {
1162    return substituteAll(input,replace,index,0);
1163  }
1164 
1165  /**
1166   * Substitutes the replacement text for each non-overlapping match found
1167   * in the input text, starting at the specified index and using the
1168   * specified execution flags.
1169   *
1170   * @param input The input text.
1171   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1172   * @param index The offset index at which the search should be begin.
1173   * @param eflags The logical OR of any execution flags above.
1174   * @return A String containing the substring of the input, starting
1175   * at the index position, and interpolating the substituted text.
1176   * @see REMatch#substituteInto
1177   */

1178  public String JavaDoc substituteAll(Object JavaDoc input,String JavaDoc replace,int index,int eflags) {
1179    return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags);
1180  }
1181
1182  private String JavaDoc substituteAllImpl(CharIndexed input,String JavaDoc replace,int index,int eflags) {
1183    StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
1184    REMatch m;
1185    while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
1186    buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1187               replace : m.substituteInto(replace) );
1188      index = m.getEndIndex();
1189      if (m.end[0] == 0) {
1190    char ch = input.charAt(0);
1191    if (ch != CharIndexed.OUT_OF_BOUNDS)
1192        buffer.append(ch);
1193    input.move(1);
1194      } else {
1195      input.move(m.end[0]);
1196      }
1197
1198      if (!input.isValid()) break;
1199    }
1200    return buffer.toString();
1201  }
1202  
1203  /* Helper function for constructor */
1204  private void addToken(REToken next) {
1205    if (next == null) return;
1206    minimumLength += next.getMinimumLength();
1207    if (firstToken == null) {
1208    lastToken = firstToken = next;
1209    } else {
1210      // if chain returns false, it "rejected" the token due to
1211
// an optimization, and next was combined with lastToken
1212
if (lastToken.chain(next)) {
1213      lastToken = next;
1214      }
1215    }
1216  }
1217
1218  private static REToken setRepeated(REToken current, int min, int max, int index) throws REException {
1219    if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
1220    return new RETokenRepeated(current.subIndex,current,min,max);
1221  }
1222
1223  private static int getPosixSet(char[] pattern,int index,StringBuffer JavaDoc buf) {
1224    // Precondition: pattern[index-1] == ':'
1225
// we will return pos of closing ']'.
1226
int i;
1227    for (i=index; i<(pattern.length-1); i++) {
1228      if ((pattern[i] == ':') && (pattern[i+1] == ']'))
1229    return i+2;
1230      buf.append(pattern[i]);
1231    }
1232    return index; // didn't match up
1233
}
1234
1235  private int getMinMax(char[] input,int index,IntPair minMax,RESyntax syntax) throws REException {
1236    // Precondition: input[index-1] == '{', minMax != null
1237

1238    boolean mustMatch = !syntax.get(RESyntax.RE_NO_BK_BRACES);
1239    int startIndex = index;
1240    if (index == input.length) {
1241      if (mustMatch)
1242        throw new REException(getLocalizedMessage("unmatched.brace"),REException.REG_EBRACE,index);
1243      else
1244        return startIndex;
1245    }
1246    
1247    int min,max=0;
1248    CharUnit unit = new CharUnit();
1249    StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
1250    
1251    // Read string of digits
1252
do {
1253      index = getCharUnit(input,index,unit);
1254      if (Character.isDigit(unit.ch))
1255        buf.append(unit.ch);
1256    } while ((index != input.length) && Character.isDigit(unit.ch));
1257
1258    // Check for {} tomfoolery
1259
if (buf.length() == 0) {
1260      if (mustMatch)
1261        throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1262      else
1263        return startIndex;
1264    }
1265
1266    min = Integer.parseInt(buf.toString());
1267    
1268    if ((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk))
1269      max = min;
1270    else if (index == input.length)
1271      if (mustMatch)
1272        throw new REException(getLocalizedMessage("interval.no.end"),REException.REG_EBRACE,index);
1273      else
1274        return startIndex;
1275    else if ((unit.ch == ',') && !unit.bk) {
1276      buf = new StringBuffer JavaDoc();
1277      // Read string of digits
1278
while (((index = getCharUnit(input,index,unit)) != input.length) && Character.isDigit(unit.ch))
1279    buf.append(unit.ch);
1280
1281      if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))
1282        if (mustMatch)
1283          throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1284        else
1285          return startIndex;
1286
1287      // This is the case of {x,}
1288
if (buf.length() == 0) max = Integer.MAX_VALUE;
1289      else max = Integer.parseInt(buf.toString());
1290    } else
1291      if (mustMatch)
1292        throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1293      else
1294        return startIndex;
1295
1296    // We know min and max now, and they are valid.
1297

1298    minMax.first = min;
1299    minMax.second = max;
1300
1301    // return the index following the '}'
1302
return index;
1303  }
1304
1305   /**
1306    * Return a human readable form of the compiled regular expression,
1307    * useful for debugging.
1308    */

1309   public String JavaDoc toString() {
1310     StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1311     dump(sb);
1312     return sb.toString();
1313   }
1314
1315  void dump(StringBuffer JavaDoc os) {
1316    os.append('(');
1317    if (subIndex == 0)
1318      os.append("?:");
1319    if (firstToken != null)
1320      firstToken.dumpAll(os);
1321    os.append(')');
1322  }
1323
1324  // Cast input appropriately or throw exception
1325
private static CharIndexed makeCharIndexed(Object JavaDoc input, int index) {
1326      // We could let a String fall through to final input, but since
1327
// it's the most likely input type, we check it first.
1328
if (input instanceof String JavaDoc)
1329      return new CharIndexedString((String JavaDoc) input,index);
1330    else if (input instanceof char[])
1331      return new CharIndexedCharArray((char[]) input,index);
1332    else if (input instanceof StringBuffer JavaDoc)
1333      return new CharIndexedStringBuffer((StringBuffer JavaDoc) input,index);
1334    else if (input instanceof InputStream JavaDoc)
1335      return new CharIndexedInputStream((InputStream JavaDoc) input,index);
1336    else if (input instanceof Reader JavaDoc)
1337    return new CharIndexedReader((Reader JavaDoc) input, index);
1338    else if (input instanceof CharIndexed)
1339    return (CharIndexed) input; // do we lose index info?
1340
else
1341    return new CharIndexedString(input.toString(), index);
1342  }
1343}
1344
Popular Tags