KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > saxon > functions > Tokenize


1 package net.sf.saxon.functions;
2 import net.sf.saxon.expr.Expression;
3 import net.sf.saxon.expr.StaticContext;
4 import net.sf.saxon.expr.XPathContext;
5 import net.sf.saxon.om.Item;
6 import net.sf.saxon.om.SequenceIterator;
7 import net.sf.saxon.om.EmptyIterator;
8 import net.sf.saxon.trans.DynamicError;
9 import net.sf.saxon.trans.StaticError;
10 import net.sf.saxon.trans.XPathException;
11 import net.sf.saxon.type.RegexTranslator;
12 import net.sf.saxon.value.AtomicValue;
13 import net.sf.saxon.value.StringValue;
14 import net.sf.saxon.value.Value;
15
16 import java.util.regex.Matcher JavaDoc;
17 import java.util.regex.Pattern JavaDoc;
18 import java.util.regex.PatternSyntaxException JavaDoc;
19
20
21 /**
22 * This class implements the tokenize() function for regular expression matching. This returns a
23 * sequence of strings representing the unmatched substrings: the separators which match the
24 * regular expression are not returned.
25 */

26
27 public class Tokenize extends SystemFunction {
28
29     private Pattern JavaDoc regexp;
30
31     /**
32     * Simplify and validate.
33     * This is a pure function so it can be simplified in advance if the arguments are known
34     */

35
36      public Expression simplify(StaticContext env) throws XPathException {
37         Expression e = simplifyArguments(env);
38
39         // compile the regular expression once if possible
40
if (!(e instanceof Value)) {
41             regexp = Matches.tryToCompile(argument, 1, 2);
42             // check that it's not a pattern that matches ""
43
if (regexp != null && regexp.matcher("").matches()) {
44                 StaticError err = new StaticError(
45                         "The regular expression must not be one that matches a zero-length string");
46                 err.setErrorCode("FORX0003");
47                 throw err;
48             }
49         }
50
51         return e;
52     }
53
54     /**
55     * Iterate over the results of the function
56     */

57
58     public SequenceIterator iterate(XPathContext c) throws XPathException {
59         AtomicValue sv = (AtomicValue)argument[0].evaluateItem(c);
60         if (sv==null) {
61             return EmptyIterator.getInstance();
62         };
63         CharSequence JavaDoc input = sv.getStringValueCS();
64         if (input.length() == 0) {
65             return EmptyIterator.getInstance();
66         }
67
68         Pattern JavaDoc re = regexp;
69         if (re == null) {
70
71             sv = (AtomicValue)argument[1].evaluateItem(c);
72             CharSequence JavaDoc pattern = sv.getStringValueCS();
73
74             CharSequence JavaDoc flags;
75             if (argument.length==2) {
76                 flags = "";
77             } else {
78                 sv = (AtomicValue)argument[2].evaluateItem(c);
79                 flags = sv.getStringValueCS();
80             }
81
82             try {
83                 String JavaDoc javaRegex = RegexTranslator.translate(pattern, true);
84                 re = Pattern.compile(javaRegex, Matches.setFlags(flags));
85             } catch (RegexTranslator.RegexSyntaxException err) {
86                 throw new DynamicError(err);
87             } catch (PatternSyntaxException JavaDoc err) {
88                 throw new DynamicError(err);
89             }
90
91             // check that it's not a pattern that matches ""
92
if (re.matcher("").matches()) {
93                 throw new StaticError(
94                         "The regular expression must not be one that matches a zero-length string");
95             }
96
97         }
98         return new TokenIterator(input, re);
99     }
100
101
102     /**
103     * Inner class TokenIterator
104     */

105
106     public static class TokenIterator implements SequenceIterator {
107
108         private CharSequence JavaDoc input;
109         private Pattern JavaDoc pattern;
110         private Matcher JavaDoc matcher;
111         private CharSequence JavaDoc current;
112         private int position = 0;
113         private int prevEnd = 0;
114
115
116         /**
117         * Construct a TokenIterator.
118         */

119
120         public TokenIterator (CharSequence JavaDoc input, Pattern JavaDoc pattern) {
121             this.input = input;
122             this.pattern = pattern;
123             matcher = pattern.matcher(input);
124             prevEnd = 0;
125         }
126
127         public Item next() {
128             if (prevEnd < 0) {
129                 current = null;
130                 position = -1;
131                 return null;
132             }
133
134             if (matcher.find()) {
135                 current = input.subSequence(prevEnd, matcher.start());
136                 prevEnd = matcher.end();
137             } else {
138                 current = input.subSequence(prevEnd, input.length());
139                 prevEnd = -1;
140             }
141             position++;
142             return StringValue.makeStringValue(current);
143         }
144
145         public Item current() {
146             return (current==null ? null : StringValue.makeStringValue(current));
147         }
148
149         public int position() {
150             return position;
151         }
152
153         public SequenceIterator getAnother() {
154             return new TokenIterator(input, pattern);
155         }
156
157         /**
158          * Get properties of this iterator, as a bit-significant integer.
159          *
160          * @return the properties of this iterator. This will be some combination of
161          * properties such as {@link GROUNDED}, {@link LAST_POSITION_FINDER},
162          * and {@link LOOKAHEAD}. It is always
163          * acceptable to return the value zero, indicating that there are no known special properties.
164          * It is acceptable for the properties of the iterator to change depending on its state.
165          */

166
167         public int getProperties() {
168             return 0;
169         }
170
171     }
172
173     /**
174      * Simple command-line interface for testing.
175      * @param args (1) the string to be tokenized (2) the regular expression
176      * @throws Exception
177      */

178
179     public static void main(String JavaDoc[] args) throws Exception JavaDoc {
180         String JavaDoc in = args[0];
181         String JavaDoc[] out = Pattern.compile(args[1]).split(in, 0);
182         System.out.println("results");
183         for (int i=0; i<out.length; i++) {
184             System.out.println('[' + out[i] + ']');
185         }
186         System.out.println("end results");
187     }
188
189
190
191 }
192
193
194
195 //
196
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
197
// you may not use this file except in compliance with the License. You may obtain a copy of the
198
// License at http://www.mozilla.org/MPL/
199
//
200
// Software distributed under the License is distributed on an "AS IS" basis,
201
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
202
// See the License for the specific language governing rights and limitations under the License.
203
//
204
// The Original Code is: all this file.
205
//
206
// The Initial Developer of the Original Code is Michael H. Kay
207
//
208
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
209
//
210
// Contributor(s): none.
211
//
212
Popular Tags