KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > java > util > regex > Matcher


1 /*
2  * @(#)Matcher.java 1.58 04/06/28
3  *
4  * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
5  * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6  */

7
8 package java.util.regex;
9
10
11 /**
12  * An engine that performs match operations on a {@link java.lang.CharSequence
13  * </code>character sequence<code>} by interpreting a {@link Pattern}.
14  *
15  * <p> A matcher is created from a pattern by invoking the pattern's {@link
16  * Pattern#matcher matcher} method. Once created, a matcher can be used to
17  * perform three different kinds of match operations:
18  *
19  * <ul>
20  *
21  * <li><p> The {@link #matches matches} method attempts to match the entire
22  * input sequence against the pattern. </p></li>
23  *
24  * <li><p> The {@link #lookingAt lookingAt} method attempts to match the
25  * input sequence, starting at the beginning, against the pattern. </p></li>
26  *
27  * <li><p> The {@link #find find} method scans the input sequence looking for
28  * the next subsequence that matches the pattern. </p></li>
29  *
30  * </ul>
31  *
32  * <p> Each of these methods returns a boolean indicating success or failure.
33  * More information about a successful match can be obtained by querying the
34  * state of the matcher.
35  *
36  * <p> A matcher finds matches in a subset of its input called the
37  * <i>region</i>. By default, the region contains all of the matcher's input.
38  * The region can be modified via the{@link #region region} method and queried
39  * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}
40  * methods. The way that the region boundaries interact with some pattern
41  * constructs can be changed. See {@link #useAnchoringBounds
42  * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds}
43  * for more details.
44  *
45  * <p> This class also defines methods for replacing matched subsequences with
46  * new strings whose contents can, if desired, be computed from the match
47  * result. The {@link #appendReplacement appendReplacement} and {@link
48  * #appendTail appendTail} methods can be used in tandem in order to collect
49  * the result into an existing string buffer, or the more convenient {@link
50  * #replaceAll replaceAll} method can be used to create a string in which every
51  * matching subsequence in the input sequence is replaced.
52  *
53  * <p> The explicit state of a matcher includes the start and end indices of
54  * the most recent successful match. It also includes the start and end
55  * indices of the input subsequence captured by each <a
56  * HREF="Pattern.html#cg">capturing group</a> in the pattern as well as a total
57  * count of such subsequences. As a convenience, methods are also provided for
58  * returning these captured subsequences in string form.
59  *
60  * <p> The explicit state of a matcher is initially undefined; attempting to
61  * query any part of it before a successful match will cause an {@link
62  * IllegalStateException} to be thrown. The explicit state of a matcher is
63  * recomputed by every match operation.
64  *
65  * <p> The implicit state of a matcher includes the input character sequence as
66  * well as the <i>append position</i>, which is initially zero and is updated
67  * by the {@link #appendReplacement appendReplacement} method.
68  *
69  * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
70  * method or, if a new input sequence is desired, its {@link
71  * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a
72  * matcher discards its explicit state information and sets the append position
73  * to zero.
74  *
75  * <p> Instances of this class are not safe for use by multiple concurrent
76  * threads. </p>
77  *
78  *
79  * @author Mike McCloskey
80  * @author Mark Reinhold
81  * @author JSR-51 Expert Group
82  * @version 1.58, 04/06/28
83  * @since 1.4
84  * @spec JSR-51
85  */

86
87 public final class Matcher implements MatchResult JavaDoc {
88
89     /**
90      * The Pattern object that created this Matcher.
91      */

92     Pattern JavaDoc parentPattern;
93
94     /**
95      * The storage used by groups. They may contain invalid values if
96      * a group was skipped during the matching.
97      */

98     int[] groups;
99
100     /**
101      * The range within the sequence that is to be matched. Anchors
102      * will match at these "hard" boundaries. Changing the region
103      * changes these values.
104      */

105     int from, to;
106
107     /**
108      * The original string being matched.
109      */

110     CharSequence JavaDoc text;
111
112     /**
113      * Matcher state used by the last node. NOANCHOR is used when a
114      * match does not have to consume all of the input. ENDANCHOR is
115      * the mode used for matching all the input.
116      */

117     static final int ENDANCHOR = 1;
118     static final int NOANCHOR = 0;
119     int acceptMode = NOANCHOR;
120
121     /**
122      * The range of string that last matched the pattern. If the last
123      * match failed then first is -1; last initially holds 0 then it
124      * holds the index of the end of the last match (which is where the
125      * next search starts).
126      */

127     int first = -1, last = 0;
128
129     /**
130      * The end index of what matched in the last match operation.
131      */

132     int oldLast = -1;
133
134     /**
135      * The index of the last position appended in a substitution.
136      */

137     int lastAppendPosition = 0;
138
139     /**
140      * Storage used by nodes to tell what repetition they are on in
141      * a pattern, and where groups begin. The nodes themselves are stateless,
142      * so they rely on this field to hold state during a match.
143      */

144     int[] locals;
145
146     /**
147      * Boolean indicating whether or not more input could change
148      * the results of the last match.
149      *
150      * If hitEnd is true, and a match was found, then more input
151      * might cause a different match to be found.
152      * If hitEnd is true and a match was not found, then more
153      * input could cause a match to be found.
154      * If hitEnd is false and a match was found, then more input
155      * will not change the match.
156      * If hitEnd is false and a match was not found, then more
157      * input will not cause a match to be found.
158      */

159     boolean hitEnd;
160
161     /**
162      * Boolean indicating whether or not more input could change
163      * a positive match into a negative one.
164      *
165      * If requireEnd is true, and a match was found, then more
166      * input could cause the match to be lost.
167      * If requireEnd is false and a match was found, then more
168      * input might change the match but the match won't be lost.
169      * If a match was not found, then requireEnd has no meaning.
170      */

171     boolean requireEnd;
172
173     /**
174      * If transparentBounds is true then the boundaries of this
175      * matcher's region are transparent to lookahead, lookbehind,
176      * and boundary matching constructs that try to see beyond them.
177      */

178     boolean transparentBounds = false;
179
180     /**
181      * If anchoringBounds is true then the boundaries of this
182      * matcher's region match anchors such as ^ and $.
183      */

184     boolean anchoringBounds = true;
185
186     /**
187      * No default constructor.
188      */

189     Matcher() {
190     }
191
192     /**
193      * All matchers have the state used by Pattern during a match.
194      */

195     Matcher(Pattern JavaDoc parent, CharSequence JavaDoc text) {
196         this.parentPattern = parent;
197         this.text = text;
198
199         // Allocate state storage
200
int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
201         groups = new int[parentGroupCount * 2];
202         locals = new int[parent.localCount];
203
204         // Put fields into initial states
205
reset();
206     }
207
208     /**
209      * Returns the pattern that is interpreted by this matcher.
210      *
211      * @return The pattern for which this matcher was created
212      */

213     public Pattern JavaDoc pattern() {
214         return parentPattern;
215     }
216
217     /**
218      * Returns the match state of this matcher as a {@link MatchResult}.
219      * The result is unaffected by subsequent operations performed upon this
220      * matcher.
221      *
222      * @return a <code>MatchResult</code> with the state of this matcher
223      */

224     public MatchResult JavaDoc toMatchResult() {
225         Matcher JavaDoc result = new Matcher JavaDoc(this.parentPattern, text.toString());
226         result.first = this.first;
227         result.last = this.last;
228         result.groups = (int[])(this.groups.clone());
229         return result;
230     }
231
232     /**
233       * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to
234       * find matches with.
235       *
236       * <p> This method causes this matcher to lose information
237       * about the groups of the last match that occurred. The
238       * matcher's position in the input is maintained and its
239       * last append position is unaffected.</p>
240       *
241       * @param newPattern
242       * The new pattern used by this matcher
243       * @return This matcher
244       * @throws IllegalArgumentException
245       * If newPattern is <tt>null</tt>
246       * @since 1.5
247       */

248     public Matcher JavaDoc usePattern(Pattern JavaDoc newPattern) {
249         if (newPattern == null)
250             throw new IllegalArgumentException JavaDoc("Pattern cannot be null");
251         parentPattern = newPattern;
252      
253         // Reallocate state storage
254
int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
255         groups = new int[parentGroupCount * 2];
256         locals = new int[newPattern.localCount];
257         for (int i = 0; i < groups.length; i++)
258             groups[i] = -1;
259         for (int i = 0; i < locals.length; i++)
260             locals[i] = -1;
261         return this;
262     }
263
264     /**
265      * Resets this matcher.
266      *
267      * <p> Resetting a matcher discards all of its explicit state information
268      * and sets its append position to zero. The matcher's region is set to the
269      * default region, which is its entire character sequence. The anchoring
270      * and transparency of this matcher's region boundaries are unaffected.
271      *
272      * @return This matcher
273      */

274     public Matcher JavaDoc reset() {
275         first = -1;
276         last = 0;
277         oldLast = -1;
278         for(int i=0; i<groups.length; i++)
279             groups[i] = -1;
280         for(int i=0; i<locals.length; i++)
281             locals[i] = -1;
282         lastAppendPosition = 0;
283         from = 0;
284         to = getTextLength();
285     return this;
286     }
287
288     /**
289      * Resets this matcher with a new input sequence.
290      *
291      * <p> Resetting a matcher discards all of its explicit state information
292      * and sets its append position to zero. The matcher's region is set to
293      * the default region, which is its entire character sequence. The
294      * anchoring and transparency of this matcher's region boundaries are
295      * unaffected.
296      *
297      * @param input
298      * The new input character sequence
299      *
300      * @return This matcher
301      */

302     public Matcher JavaDoc reset(CharSequence JavaDoc input) {
303         text = input;
304         return reset();
305     }
306
307     /**
308      * Returns the start index of the previous match. </p>
309      *
310      * @return The index of the first character matched
311      *
312      * @throws IllegalStateException
313      * If no match has yet been attempted,
314      * or if the previous match operation failed
315      */

316     public int start() {
317         if (first < 0)
318             throw new IllegalStateException JavaDoc("No match available");
319         return first;
320     }
321
322     /**
323      * Returns the start index of the subsequence captured by the given group
324      * during the previous match operation.
325      *
326      * <p> <a HREF="Pattern.html#cg">Capturing groups</a> are indexed from left
327      * to right, starting at one. Group zero denotes the entire pattern, so
328      * the expression <i>m.</i><tt>start(0)</tt> is equivalent to
329      * <i>m.</i><tt>start()</tt>. </p>
330      *
331      * @param group
332      * The index of a capturing group in this matcher's pattern
333      *
334      * @return The index of the first character captured by the group,
335      * or <tt>-1</tt> if the match was successful but the group
336      * itself did not match anything
337      *
338      * @throws IllegalStateException
339      * If no match has yet been attempted,
340      * or if the previous match operation failed
341      *
342      * @throws IndexOutOfBoundsException
343      * If there is no capturing group in the pattern
344      * with the given index
345      */

346     public int start(int group) {
347         if (first < 0)
348             throw new IllegalStateException JavaDoc("No match available");
349         if (group > groupCount())
350             throw new IndexOutOfBoundsException JavaDoc("No group " + group);
351         return groups[group * 2];
352     }
353
354     /**
355      * Returns the offset after the last character matched. </p>
356      *
357      * @return The offset after the last character matched
358      *
359      * @throws IllegalStateException
360      * If no match has yet been attempted,
361      * or if the previous match operation failed
362      */

363     public int end() {
364         if (first < 0)
365             throw new IllegalStateException JavaDoc("No match available");
366         return last;
367     }
368
369     /**
370      * Returns the offset after the last character of the subsequence
371      * captured by the given group during the previous match operation.
372      *
373      * <p> <a HREF="Pattern.html#cg">Capturing groups</a> are indexed from left
374      * to right, starting at one. Group zero denotes the entire pattern, so
375      * the expression <i>m.</i><tt>end(0)</tt> is equivalent to
376      * <i>m.</i><tt>end()</tt>. </p>
377      *
378      * @param group
379      * The index of a capturing group in this matcher's pattern
380      *
381      * @return The offset after the last character captured by the group,
382      * or <tt>-1</tt> if the match was successful
383      * but the group itself did not match anything
384      *
385      * @throws IllegalStateException
386      * If no match has yet been attempted,
387      * or if the previous match operation failed
388      *
389      * @throws IndexOutOfBoundsException
390      * If there is no capturing group in the pattern
391      * with the given index
392      */

393     public int end(int group) {
394         if (first < 0)
395             throw new IllegalStateException JavaDoc("No match available");
396         if (group > groupCount())
397             throw new IndexOutOfBoundsException JavaDoc("No group " + group);
398         return groups[group * 2 + 1];
399     }
400
401     /**
402      * Returns the input subsequence matched by the previous match.
403      *
404      * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
405      * the expressions <i>m.</i><tt>group()</tt> and
406      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt>&nbsp;<i>m.</i><tt>end())</tt>
407      * are equivalent. </p>
408      *
409      * <p> Note that some patterns, for example <tt>a*</tt>, match the empty
410      * string. This method will return the empty string when the pattern
411      * successfully matches the empty string in the input. </p>
412      *
413      * @return The (possibly empty) subsequence matched by the previous match,
414      * in string form
415      *
416      * @throws IllegalStateException
417      * If no match has yet been attempted,
418      * or if the previous match operation failed
419      */

420     public String JavaDoc group() {
421         return group(0);
422     }
423
424     /**
425      * Returns the input subsequence captured by the given group during the
426      * previous match operation.
427      *
428      * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
429      * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and
430      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt>&nbsp;<i>m.</i><tt>end(</tt><i>g</i><tt>))</tt>
431      * are equivalent. </p>
432      *
433      * <p> <a HREF="Pattern.html#cg">Capturing groups</a> are indexed from left
434      * to right, starting at one. Group zero denotes the entire pattern, so
435      * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>.
436      * </p>
437      *
438      * <p> If the match was successful but the group specified failed to match
439      * any part of the input sequence, then <tt>null</tt> is returned. Note
440      * that some groups, for example <tt>(a*)</tt>, match the empty string.
441      * This method will return the empty string when such a group successfully
442      * matches the empty string in the input. </p>
443      *
444      * @param group
445      * The index of a capturing group in this matcher's pattern
446      *
447      * @return The (possibly empty) subsequence captured by the group
448      * during the previous match, or <tt>null</tt> if the group
449      * failed to match part of the input
450      *
451      * @throws IllegalStateException
452      * If no match has yet been attempted,
453      * or if the previous match operation failed
454      *
455      * @throws IndexOutOfBoundsException
456      * If there is no capturing group in the pattern
457      * with the given index
458      */

459     public String JavaDoc group(int group) {
460         if (first < 0)
461             throw new IllegalStateException JavaDoc("No match found");
462         if (group < 0 || group > groupCount())
463             throw new IndexOutOfBoundsException JavaDoc("No group " + group);
464         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
465             return null;
466         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
467     }
468
469     /**
470      * Returns the number of capturing groups in this matcher's pattern.
471      *
472      * <p> Group zero denotes the entire pattern by convention. It is not
473      * included in this count.
474      *
475      * <p> Any non-negative integer smaller than or equal to the value
476      * returned by this method is guaranteed to be a valid group index for
477      * this matcher. </p>
478      *
479      * @return The number of capturing groups in this matcher's pattern
480      */

481     public int groupCount() {
482         return parentPattern.capturingGroupCount - 1;
483     }
484
485     /**
486      * Attempts to match the entire region against the pattern.
487      *
488      * <p> If the match succeeds then more information can be obtained via the
489      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p>
490      *
491      * @return <tt>true</tt> if, and only if, the entire region sequence
492      * matches this matcher's pattern
493      */

494     public boolean matches() {
495         return match(from, ENDANCHOR);
496     }
497
498     /**
499      * Attempts to find the next subsequence of the input sequence that matches
500      * the pattern.
501      *
502      * <p> This method starts at the beginning of this matcher's region, or, if
503      * a previous invocation of the method was successful and the matcher has
504      * not since been reset, at the first character not matched by the previous
505      * match.
506      *
507      * <p> If the match succeeds then more information can be obtained via the
508      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p>
509      *
510      * @return <tt>true</tt> if, and only if, a subsequence of the input
511      * sequence matches this matcher's pattern
512      */

513     public boolean find() {
514         int nextSearchIndex = last;
515         if (nextSearchIndex == first)
516             nextSearchIndex++;
517
518         // If next search starts before region, start it at region
519
if (nextSearchIndex < from)
520             nextSearchIndex = from;
521
522         // If next search starts beyond region then it fails
523
if (nextSearchIndex > to) {
524             for (int i = 0; i < groups.length; i++)
525                 groups[i] = -1;
526             return false;
527         }
528         return search(nextSearchIndex);
529     }
530
531     /**
532      * Resets this matcher and then attempts to find the next subsequence of
533      * the input sequence that matches the pattern, starting at the specified
534      * index.
535      *
536      * <p> If the match succeeds then more information can be obtained via the
537      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent
538      * invocations of the {@link #find()} method will start at the first
539      * character not matched by this match. </p>
540      *
541      * @throws IndexOutOfBoundsException
542      * If start is less than zero or if start is greater than the
543      * length of the input sequence.
544      *
545      * @return <tt>true</tt> if, and only if, a subsequence of the input
546      * sequence starting at the given index matches this matcher's
547      * pattern
548      */

549     public boolean find(int start) {
550         int limit = getTextLength();
551         if ((start < 0) || (start > limit))
552             throw new IndexOutOfBoundsException JavaDoc("Illegal start index");
553         reset();
554         return search(start);
555     }
556
557     /**
558      * Attempts to match the input sequence, starting at the beginning of the
559      * region, against the pattern.
560      *
561      * <p> Like the {@link #matches matches} method, this method always starts
562      * at the beginning of the region; unlike that method, it does not
563      * require that the entire region be matched.
564      *
565      * <p> If the match succeeds then more information can be obtained via the
566      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p>
567      *
568      * @return <tt>true</tt> if, and only if, a prefix of the input
569      * sequence matches this matcher's pattern
570      */

571     public boolean lookingAt() {
572         return match(from, NOANCHOR);
573     }
574
575     /**
576      * Returns a literal replacement <code>String</code> for the specified
577      * <code>String</code>.
578      *
579      * This method produces a <code>String</code> that will work
580      * use as a literal replacement <code>s</code> in the
581      * <code>appendReplacement</code> method of the {@link Matcher} class.
582      * The <code>String</code> produced will match the sequence of characters
583      * in <code>s</code> treated as a literal sequence. Slashes ('\') and
584      * dollar signs ('$') will be given no special meaning.
585      *
586      * @param s The string to be literalized
587      * @return A literal string replacement
588      * @since 1.5
589      */

590     public static String JavaDoc quoteReplacement(String JavaDoc s) {
591         if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
592             return s;
593         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
594         for (int i=0; i<s.length(); i++) {
595             char c = s.charAt(i);
596             if (c == '\\') {
597                 sb.append('\\'); sb.append('\\');
598             } else if (c == '$') {
599                 sb.append('\\'); sb.append('$');
600             } else {
601                 sb.append(c);
602             }
603         }
604         return sb.toString();
605     }
606
607     /**
608      * Implements a non-terminal append-and-replace step.
609      *
610      * <p> This method performs the following actions: </p>
611      *
612      * <ol>
613      *
614      * <li><p> It reads characters from the input sequence, starting at the
615      * append position, and appends them to the given string buffer. It
616      * stops after reading the last character preceding the previous match,
617      * that is, the character at index {@link
618      * #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>. </p></li>
619      *
620      * <li><p> It appends the given replacement string to the string buffer.
621      * </p></li>
622      *
623      * <li><p> It sets the append position of this matcher to the index of
624      * the last character matched, plus one, that is, to {@link #end()}.
625      * </p></li>
626      *
627      * </ol>
628      *
629      * <p> The replacement string may contain references to subsequences
630      * captured during the previous match: Each occurrence of
631      * <tt>$</tt><i>g</i><tt></tt> will be replaced by the result of
632      * evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>.
633      * The first number after the <tt>$</tt> is always treated as part of
634      * the group reference. Subsequent numbers are incorporated into g if
635      * they would form a legal group reference. Only the numerals '0'
636      * through '9' are considered as potential components of the group
637      * reference. If the second group matched the string <tt>"foo"</tt>, for
638      * example, then passing the replacement string <tt>"$2bar"</tt> would
639      * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar
640      * sign (<tt>$</tt>) may be included as a literal in the replacement
641      * string by preceding it with a backslash (<tt>\$</tt>).
642      *
643      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
644      * the replacement string may cause the results to be different than if it
645      * were being treated as a literal replacement string. Dollar signs may be
646      * treated as references to captured subsequences as described above, and
647      * backslashes are used to escape literal characters in the replacement
648      * string.
649      *
650      * <p> This method is intended to be used in a loop together with the
651      * {@link #appendTail appendTail} and {@link #find find} methods. The
652      * following code, for example, writes <tt>one dog two dogs in the
653      * yard</tt> to the standard-output stream: </p>
654      *
655      * <blockquote><pre>
656      * Pattern p = Pattern.compile("cat");
657      * Matcher m = p.matcher("one cat two cats in the yard");
658      * StringBuffer sb = new StringBuffer();
659      * while (m.find()) {
660      * m.appendReplacement(sb, "dog");
661      * }
662      * m.appendTail(sb);
663      * System.out.println(sb.toString());</pre></blockquote>
664      *
665      * @param sb
666      * The target string buffer
667      *
668      * @param replacement
669      * The replacement string
670      *
671      * @return This matcher
672      *
673      * @throws IllegalStateException
674      * If no match has yet been attempted,
675      * or if the previous match operation failed
676      *
677      * @throws IndexOutOfBoundsException
678      * If the replacement string refers to a capturing group
679      * that does not exist in the pattern
680      */

681     public Matcher JavaDoc appendReplacement(StringBuffer JavaDoc sb, String JavaDoc replacement) {
682
683         // If no match, return error
684
if (first < 0)
685             throw new IllegalStateException JavaDoc("No match available");
686
687         // Process substitution string to replace group references with groups
688
int cursor = 0;
689         String JavaDoc s = replacement;
690         StringBuffer JavaDoc result = new StringBuffer JavaDoc();
691
692         while (cursor < replacement.length()) {
693             char nextChar = replacement.charAt(cursor);
694             if (nextChar == '\\') {
695                 cursor++;
696                 nextChar = replacement.charAt(cursor);
697                 result.append(nextChar);
698                 cursor++;
699             } else if (nextChar == '$') {
700                 // Skip past $
701
cursor++;
702
703                 // The first number is always a group
704
int refNum = (int)replacement.charAt(cursor) - '0';
705                 if ((refNum < 0)||(refNum > 9))
706                     throw new IllegalArgumentException JavaDoc(
707                         "Illegal group reference");
708                 cursor++;
709
710                 // Capture the largest legal group string
711
boolean done = false;
712                 while (!done) {
713                     if (cursor >= replacement.length()) {
714                         break;
715                     }
716                     int nextDigit = replacement.charAt(cursor) - '0';
717                     if ((nextDigit < 0)||(nextDigit > 9)) { // not a number
718
break;
719                     }
720                     int newRefNum = (refNum * 10) + nextDigit;
721                     if (groupCount() < newRefNum) {
722                         done = true;
723                     } else {
724                         refNum = newRefNum;
725                         cursor++;
726                     }
727                 }
728
729                 // Append group
730
if (group(refNum) != null)
731                     result.append(group(refNum));
732             } else {
733                 result.append(nextChar);
734                 cursor++;
735             }
736         }
737
738         // Append the intervening text
739
sb.append(getSubSequence(lastAppendPosition, first));
740         // Append the match substitution
741
sb.append(result.toString());
742
743         lastAppendPosition = last;
744     return this;
745     }
746
747     /**
748      * Implements a terminal append-and-replace step.
749      *
750      * <p> This method reads characters from the input sequence, starting at
751      * the append position, and appends them to the given string buffer. It is
752      * intended to be invoked after one or more invocations of the {@link
753      * #appendReplacement appendReplacement} method in order to copy the
754      * remainder of the input sequence. </p>
755      *
756      * @param sb
757      * The target string buffer
758      *
759      * @return The target string buffer
760      */

761     public StringBuffer JavaDoc appendTail(StringBuffer JavaDoc sb) {
762         sb.append(getSubSequence(lastAppendPosition, getTextLength()).toString());
763     return sb;
764     }
765
766     /**
767      * Replaces every subsequence of the input sequence that matches the
768      * pattern with the given replacement string.
769      *
770      * <p> This method first resets this matcher. It then scans the input
771      * sequence looking for matches of the pattern. Characters that are not
772      * part of any match are appended directly to the result string; each match
773      * is replaced in the result by the replacement string. The replacement
774      * string may contain references to captured subsequences as in the {@link
775      * #appendReplacement appendReplacement} method.
776      *
777      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
778      * the replacement string may cause the results to be different than if it
779      * were being treated as a literal replacement string. Dollar signs may be
780      * treated as references to captured subsequences as described above, and
781      * backslashes are used to escape literal characters in the replacement
782      * string.
783      *
784      * <p> Given the regular expression <tt>a*b</tt>, the input
785      * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string
786      * <tt>"-"</tt>, an invocation of this method on a matcher for that
787      * expression would yield the string <tt>"-foo-foo-foo-"</tt>.
788      *
789      * <p> Invoking this method changes this matcher's state. If the matcher
790      * is to be used in further matching operations then it should first be
791      * reset. </p>
792      *
793      * @param replacement
794      * The replacement string
795      *
796      * @return The string constructed by replacing each matching subsequence
797      * by the replacement string, substituting captured subsequences
798      * as needed
799      */

800     public String JavaDoc replaceAll(String JavaDoc replacement) {
801         reset();
802         boolean result = find();
803         if (result) {
804             StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
805             do {
806                 appendReplacement(sb, replacement);
807                 result = find();
808             } while (result);
809             appendTail(sb);
810             return sb.toString();
811         }
812         return text.toString();
813     }
814
815     /**
816      * Replaces the first subsequence of the input sequence that matches the
817      * pattern with the given replacement string.
818      *
819      * <p> This method first resets this matcher. It then scans the input
820      * sequence looking for a match of the pattern. Characters that are not
821      * part of the match are appended directly to the result string; the match
822      * is replaced in the result by the replacement string. The replacement
823      * string may contain references to captured subsequences as in the {@link
824      * #appendReplacement appendReplacement} method.
825      *
826      * <p> Given the regular expression <tt>dog</tt>, the input
827      * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string
828      * <tt>"cat"</tt>, an invocation of this method on a matcher for that
829      * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>. </p>
830      *
831      * <p> Invoking this method changes this matcher's state. If the matcher
832      * is to be used in further matching operations then it should first be
833      * reset. </p>
834      *
835      * @param replacement
836      * The replacement string
837      * @return The string constructed by replacing the first matching
838      * subsequence by the replacement string, substituting captured
839      * @throws NullPointerException if <code>replacement</code> is null.
840      * subsequences as needed
841      */

842     public String JavaDoc replaceFirst(String JavaDoc replacement) {
843         if (replacement == null)
844             throw new NullPointerException JavaDoc("replacement");
845         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
846         reset();
847         if (find())
848             appendReplacement(sb, replacement);
849         appendTail(sb);
850         return sb.toString();
851     }
852
853     /**
854      * Sets the limits of this matcher's region. The region is the part of the
855      * input sequence that will be searched to find a match. Invoking this
856      * method resets the matcher, and then sets the region to start at the
857      * index specified by the <code>start</code> parameter and end at the
858      * index specified by the <code>end</code> parameter.
859      *
860      * <p>Depending on the transparency and anchoring being used (see
861      * {@link #useTransparentBounds useTransparentBounds} and
862      * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
863      * as anchors may behave differently at or around the boundaries of the
864      * region.
865      *
866      * @param start
867      * The index to start searching at (inclusive)
868      * @param end
869      * The index to end searching at (exclusive)
870      * @throws IndexOutOfBoundsException
871      * If start or end is less than zero, if
872      * start is greater than the length of the input sequence, if
873      * end is greater than the length of the input sequence, or if
874      * start is greater than end.
875      * @return this matcher
876      * @since 1.5
877      */

878     public Matcher JavaDoc region(int start, int end) {
879         if ((start < 0) || (start > getTextLength()))
880             throw new IndexOutOfBoundsException JavaDoc("start");
881         if ((end < 0) || (end > getTextLength()))
882             throw new IndexOutOfBoundsException JavaDoc("end");
883         if (start > end)
884             throw new IndexOutOfBoundsException JavaDoc("start > end");
885         reset();
886         from = start;
887         to = end;
888         return this;
889     }
890
891     /**
892      * Reports the start index of this matcher's region. The
893      * searches this matcher conducts are limited to finding matches
894      * within {@link #regionStart regionStart} (inclusive) and
895      * {@link #regionEnd regionEnd} (exclusive).
896      *
897      * @return The starting point of this matcher's region
898      * @since 1.5
899      */

900     public int regionStart() {
901         return from;
902     }
903
904     /**
905      * Reports the end index (exclusive) of this matcher's region.
906      * The searches this matcher conducts are limited to finding matches
907      * within {@link #regionStart regionStart} (inclusive) and
908      * {@link #regionEnd regionEnd} (exclusive).
909      *
910      * @return the ending point of this matcher's region
911      * @since 1.5
912      */

913     public int regionEnd() {
914         return to;
915     }
916
917     /**
918      * Queries the transparency of region bounds for this matcher.
919      *
920      * <p> This method returns <tt>true</tt> if this matcher uses
921      * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i>
922      * bounds.
923      *
924      * <p> See {@link #useTransparentBounds useTransparentBounds} for a
925      * description of transparent and opaque bounds.
926      *
927      * <p> By default, a matcher uses opaque region boundaries.
928      *
929      * @return <tt>true</tt> iff this matcher is using transparent bounds,
930      * <tt>false</tt> otherwise.
931      * @see java.util.regex.Matcher#useTransparentBounds(boolean)
932      * @since 1.5
933      */

934     public boolean hasTransparentBounds() {
935         return transparentBounds;
936     }
937
938     /**
939      * Sets the transparency of region bounds for this matcher.
940      *
941      * <p> Invoking this method with an argument of <tt>true</tt> will set this
942      * matcher to use <i>transparent</i> bounds. If the boolean
943      * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used.
944      *
945      * <p> Using transparent bounds, the boundaries of this
946      * matcher's region are transparent to lookahead, lookbehind,
947      * and boundary matching constructs. Those constructs can see beyond the
948      * boundaries of the region to see if a match is appropriate.
949      *
950      * <p> Using opaque bounds, the boundaries of this matcher's
951      * region are opaque to lookahead, lookbehind, and boundary matching
952      * constructs that may try to see beyond them. Those constructs cannot
953      * look past the boundaries so they will fail to match anything outside
954      * of the region.
955      *
956      * <p> By default, a matcher uses opaque bounds.
957      *
958      * @param b a boolean indicating whether to use opaque or transparent
959      * regions
960      * @return this matcher
961      * @see java.util.regex.Matcher#hasTransparentBounds
962      * @since 1.5
963      */

964     public Matcher JavaDoc useTransparentBounds(boolean b) {
965         transparentBounds = b;
966         return this;
967     }
968  
969     /**
970      * Queries the anchoring of region bounds for this matcher.
971      *
972      * <p> This method returns <tt>true</tt> if this matcher uses
973      * <i>anchoring</i> bounds, <tt>false</tt> otherwise.
974      *
975      * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
976      * description of anchoring bounds.
977      *
978      * <p> By default, a matcher uses anchoring region boundaries.
979      *
980      * @return <tt>true</tt> iff this matcher is using anchoring bounds,
981      * <tt>false</tt> otherwise.
982      * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
983      * @since 1.5
984      */

985     public boolean hasAnchoringBounds() {
986         return anchoringBounds;
987     }
988
989     /**
990      * Sets the anchoring of region bounds for this matcher.
991      *
992      * <p> Invoking this method with an argument of <tt>true</tt> will set this
993      * matcher to use <i>anchoring</i> bounds. If the boolean
994      * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be
995      * used.
996      *
997      * <p> Using anchoring bounds, the boundaries of this
998      * matcher's region match anchors such as ^ and $.
999      *
1000     * <p> Without anchoring bounds, the boundaries of this
1001     * matcher's region will not match anchors such as ^ and $.
1002     *
1003     * <p> By default, a matcher uses anchoring region boundaries.
1004     *
1005     * @param b a boolean indicating whether or not to use anchoring bounds.
1006     * @return this matcher
1007     * @see java.util.regex.Matcher#hasAnchoringBounds
1008     * @since 1.5
1009     */

1010    public Matcher JavaDoc useAnchoringBounds(boolean b) {
1011        anchoringBounds = b;
1012        return this;
1013    }
1014
1015    /**
1016     * <p>Returns the string representation of this matcher. The
1017     * string representation of a <code>Matcher</code> contains information
1018     * that may be useful for debugging. The exact format is unspecified.
1019     *
1020     * @return The string representation of this matcher
1021     * @since 1.5
1022     */

1023    public String JavaDoc toString() {
1024        StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1025    sb.append("java.util.regex.Matcher");
1026    sb.append("[pattern=" + pattern());
1027    sb.append(" region=");
1028    sb.append(regionStart() + "," + regionEnd());
1029        sb.append(" lastmatch=");
1030        if ((first >= 0) && (group() != null)) {
1031            sb.append(group());
1032        }
1033    sb.append("]");
1034    return sb.toString();
1035    }
1036
1037    /**
1038     * <p>Returns true if the end of input was hit by the search engine in
1039     * the last match operation performed by this matcher.
1040     *
1041     * <p>When this method returns true, then it is possible that more input
1042     * would have changed the result of the last search.
1043     *
1044     * @return true iff the end of input was hit in the last match; false
1045     * otherwise
1046     * @since 1.5
1047     */

1048    public boolean hitEnd() {
1049        return hitEnd;
1050    }
1051
1052    /**
1053     * <p>Returns true if more input could change a positive match into a
1054     * negative one.
1055     *
1056     * <p>If this method returns true, and a match was found, then more
1057     * input could cause the match to be lost. If this method returns false
1058     * and a match was found, then more input might change the match but the
1059     * match won't be lost. If a match was not found, then requireEnd has no
1060     * meaning.
1061     *
1062     * @return true iff more input could change a positive match into a
1063     * negative one.
1064     * @since 1.5
1065     */

1066    public boolean requireEnd() {
1067        return requireEnd;
1068    }
1069
1070    /**
1071     * Initiates a search to find a Pattern within the given bounds.
1072     * The groups are filled with default values and the match of the root
1073     * of the state machine is called. The state machine will hold the state
1074     * of the match as it proceeds in this matcher.
1075     *
1076     * Matcher.from is not set here, because it is the "hard" boundary
1077     * of the start of the search which anchors will set to. The from param
1078     * is the "soft" boundary of the start of the search, meaning that the
1079     * regex tries to match at that index but ^ won't match there. Subsequent
1080     * calls to the search methods start at a new "soft" boundary which is
1081     * the end of the previous match.
1082     */

1083    boolean search(int from) {
1084        this.hitEnd = false;
1085        this.requireEnd = false;
1086        from = from < 0 ? 0 : from;
1087        this.first = from;
1088        this.oldLast = oldLast < 0 ? from : oldLast;
1089        for (int i = 0; i < groups.length; i++)
1090            groups[i] = -1;
1091        acceptMode = NOANCHOR;
1092        boolean result = parentPattern.root.match(this, from, text);
1093        if (!result)
1094            this.first = -1;
1095        this.oldLast = this.last;
1096        return result;
1097    }
1098
1099    /**
1100     * Initiates a search for an anchored match to a Pattern within the given
1101     * bounds. The groups are filled with default values and the match of the
1102     * root of the state machine is called. The state machine will hold the
1103     * state of the match as it proceeds in this matcher.
1104     */

1105    boolean match(int from, int anchor) {
1106        this.hitEnd = false;
1107        this.requireEnd = false;
1108        from = from < 0 ? 0 : from;
1109        this.first = from;
1110        this.oldLast = oldLast < 0 ? from : oldLast;
1111        for (int i = 0; i < groups.length; i++)
1112            groups[i] = -1;
1113        acceptMode = anchor;
1114        boolean result = parentPattern.matchRoot.match(this, from, text);
1115        if (!result)
1116            this.first = -1;
1117        this.oldLast = this.last;
1118        return result;
1119    }
1120
1121    /**
1122     * Returns the end index of the text.
1123     *
1124     * @return the index after the last character in the text
1125     */

1126    int getTextLength() {
1127        return text.length();
1128    }
1129
1130    /**
1131     * Generates a String from this Matcher's input in the specified range.
1132     *
1133     * @param beginIndex the beginning index, inclusive
1134     * @param endIndex the ending index, exclusive
1135     * @return A String generated from this Matcher's input
1136     */

1137    CharSequence JavaDoc getSubSequence(int beginIndex, int endIndex) {
1138        return text.subSequence(beginIndex, endIndex);
1139    }
1140
1141    /**
1142     * Returns this Matcher's input character at index i.
1143     *
1144     * @return A char from the specified index
1145     */

1146    char charAt(int i) {
1147        return text.charAt(i);
1148    }
1149
1150}
1151
Popular Tags