KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > saxon > instruct > RegexIterator


1 package net.sf.saxon.instruct;
2 import net.sf.saxon.om.ArrayIterator;
3 import net.sf.saxon.om.Item;
4 import net.sf.saxon.om.SequenceIterator;
5 import net.sf.saxon.om.EmptyIterator;
6 import net.sf.saxon.value.StringValue;
7
8 import java.util.regex.Matcher JavaDoc;
9 import java.util.regex.Pattern JavaDoc;
10
11 /**
12 * Class RegexIterator - provides an iterator over matched and unmatched substrings
13 */

14
15 public class RegexIterator implements SequenceIterator {
16
17     private String JavaDoc theString; // the input string being matched
18
private Pattern JavaDoc pattern; // the regex against which the string is matched
19
private Matcher JavaDoc matcher; // the Matcher object that does the matching, and holds the state
20
private String JavaDoc current; // the string most recently returned by the iterator
21
private String JavaDoc next; // if the last string was a matching string, null; otherwise the next substring
22
// matched by the regex
23
private int position = 0; // the value of XPath position()
24
private int prevEnd = 0; // the position in the input string of the end of the last match or non-match
25

26     /**
27     * Construct a RegexIterator. Note that the underlying matcher.find() method is called once
28     * to obtain each matching substring. But the iterator also returns non-matching substrings
29     * if these appear between the matching substrings.
30     * @param string the string to be analysed
31     * @param pattern the regular expression
32     */

33
34     public RegexIterator (String JavaDoc string, Pattern JavaDoc pattern) {
35         theString = string;
36         this.pattern = pattern;
37         matcher = pattern.matcher(string);
38         next = null;
39     }
40
41     /**
42     * Get the next item in the sequence
43     * @return the next item in the sequence
44     */

45
46     public Item next() {
47         if (next == null && prevEnd >= 0) {
48             // we've returned a match (or we're at the start), so find the next match
49
if (matcher.find()) {
50                 int start = matcher.start();
51                 int end = matcher.end();
52                 if (prevEnd == start) {
53                     // there's no intervening non-matching string to return
54
next = null;
55                     current = theString.substring(start, end);
56                     prevEnd = end;
57                 } else {
58                     // return the non-matching substring first
59
current = theString.substring(prevEnd, start);
60                     next = theString.substring(start, end);
61                 }
62             } else {
63                 // there are no more regex matches, we must return the final non-matching text if any
64
if (prevEnd < theString.length()) {
65                     current = theString.substring(prevEnd);
66                     next = null;
67                 } else {
68                     // this really is the end...
69
current = null;
70                     position = -1;
71                     prevEnd = -1;
72                     return null;
73                 }
74                 prevEnd = -1;
75             }
76         } else {
77             // we've returned a non-match, so now return the match that follows it, if there is one
78
if (prevEnd >= 0) {
79                 current = next;
80                 next = null;
81                 prevEnd = matcher.end();
82             } else {
83                 current = null;
84                 position = -1;
85                 return null;
86             }
87         }
88         position++;
89         return StringValue.makeStringValue(current);
90     }
91
92     /**
93     * Get the current item in the sequence
94     * @return the item most recently returned by next()
95     */

96
97     public Item current() {
98         return StringValue.makeStringValue(current);
99     }
100
101     /**
102     * Get the position of the current item in the sequence
103     * @return the position of the item most recently returned by next(), starting at 1
104     */

105
106     public int position() {
107         return position;
108     }
109
110     /**
111     * Get another iterator over the same items
112     * @return a new iterator, positioned before the first item
113     */

114
115     public SequenceIterator getAnother() {
116         return new RegexIterator(theString, pattern);
117     }
118
119     /**
120      * Get properties of this iterator, as a bit-significant integer.
121      *
122      * @return the properties of this iterator. This will be some combination of
123      * properties such as {@link GROUNDED}, {@link LAST_POSITION_FINDER},
124      * and {@link LOOKAHEAD}. It is always
125      * acceptable to return the value zero, indicating that there are no known special properties.
126      * It is acceptable for the properties of the iterator to change depending on its state.
127      */

128
129     public int getProperties() {
130         return 0;
131     }
132
133     /**
134     * Determine whether the current item is a matching item or a non-matching item
135     * @return true if the current item (the one most recently returned by next()) is
136     * an item that matches the regular expression, or false if it is an item that
137     * does not match
138     */

139
140     public boolean isMatching() {
141         return next == null && prevEnd >= 0;
142     }
143
144     /**
145     * Get a substring that matches a parenthesised group within the regular expression
146     * @param number the number of the group to be obtained
147     * @return the substring of the current item that matches the n'th parenthesized group
148     * within the regular expression
149     */

150
151     public String JavaDoc getRegexGroup(int number) {
152         if (!isMatching()) return null;
153         if (number > matcher.groupCount() || number < 0) return "";
154         String JavaDoc s = matcher.group(number);
155         if (s==null) return "";
156         return s;
157     }
158
159     /**
160      * Get a sequence containing all the regex groups (except group 0, because we want to use indexing from 1).
161      * This is used by the saxon:analyze-string() higher-order extension function.
162      */

163
164     public SequenceIterator getRegexGroupIterator() {
165         int c = matcher.groupCount();
166         if (c == 0) {
167             return EmptyIterator.getInstance();
168         } else {
169             StringValue[] groups = new StringValue[c];
170             for (int i=1; i<=groups.length; i++) {
171                 groups[i-1] = StringValue.makeStringValue(matcher.group(i));
172             }
173             return new ArrayIterator(groups);
174         }
175     }
176
177 }
178
179 //
180
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
181
// you may not use this file except in compliance with the License. You may obtain a copy of the
182
// License at http://www.mozilla.org/MPL/
183
//
184
// Software distributed under the License is distributed on an "AS IS" basis,
185
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
186
// See the License for the specific language governing rights and limitations under the License.
187
//
188
// The Original Code is: all this file.
189
//
190
// The Initial Developer of the Original Code is Michael H. Kay
191
//
192
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
193
//
194
// Contributor(s):
195
// Portions marked "e.g." are from Edwin Glaser (edwin@pannenleiter.de)
196
//
197
Popular Tags