KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > cocoon > util > Tokenizer


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.cocoon.util;
17
18 import java.util.Enumeration JavaDoc;
19 import java.util.NoSuchElementException JavaDoc;
20
21 /**
22  * Replacement for StringTokenizer in java.util, because of bug in the
23  * Sun's implementation.
24  *
25  * @author <A HREF="mailto:moravek@pobox.sk">Peter Moravek</A>
26  * @version CVS $Id: Tokenizer.java 123826 2004-12-31 21:40:50Z antonio $
27  */

28 public class Tokenizer implements Enumeration JavaDoc {
29
30   /**
31    * Constructs a string tokenizer for the specified string. All characters
32    * in the delim argument are the delimiters for separating tokens.
33    * If the returnTokens flag is true, then the delimiter characters are
34    * also returned as tokens. Each delimiter is returned as a string of
35    * length one. If the flag is false, the delimiter characters are skipped
36    * and only serve as separators between tokens.
37    *
38    * @param str a string to be parsed
39    * @param delim the delimiters
40    * @param returnTokens flag indicating whether to return the delimiters
41    * as tokens
42    */

43   public Tokenizer(String JavaDoc str, String JavaDoc delim, boolean returnTokens) {
44     this.str = str;
45     this.delim = delim;
46     this.returnTokens = returnTokens;
47
48     max = str.length();
49   }
50
51   /**
52    * Constructs a string tokenizer for the specified string. The characters
53    * in the delim argument are the delimiters for separating tokens.
54    * Delimiter characters themselves will not be treated as tokens.
55    *
56    * @param str a string to be parsed
57    * @param delim the delimiters
58    */

59   public Tokenizer(String JavaDoc str, String JavaDoc delim) {
60     this(str, delim, false);
61   }
62
63   /**
64    * Constructs a string tokenizer for the specified string. The character
65    * in the delim argument is the delimiter for separating tokens.
66    * Delimiter character themselves will not be treated as token.
67    *
68    * @param str a string to be parsed
69    * @param delim the delimiter
70    */

71   public Tokenizer(String JavaDoc str, char delim) {
72     this(str, String.valueOf(delim), false);
73   }
74
75   /**
76    * Constructs a string tokenizer for the specified string. The tokenizer
77    * uses the default delimiter set, which is " \t\n\r\f": the space
78    * character, the tab character, the newline character, the carriage-return
79    * character, and the form-feed character. Delimiter characters themselves
80    * will not be treated as tokens.
81    *
82    * @param str a string to be parsed
83    */

84   public Tokenizer(String JavaDoc str) {
85     this(str, DEFAULT_DELIMITERS, false);
86   }
87
88   /**
89    * Tests if there are more tokens available from this tokenizer's string.
90    * If this method returns true, then a subsequent call to nextToken with
91    * no argument will successfully return a token.
92    *
93    * @return true if and only if there is at least one token in the string
94    * after the current position; false otherwise.
95    */

96   public boolean hasMoreTokens() {
97     return ((current < max) ? (true) :
98       (((current == max) && (max == 0
99         || (returnTokens && delim.indexOf(str.charAt(previous)) >= 0)))));
100   }
101
102   /**
103    * Returns the next token from this string tokenizer.
104    *
105    * @return the next token from this string tokenizer
106    *
107    * @exception NoSuchElementException if there are no more tokens in this
108    * tokenizer's string
109    */

110   public String JavaDoc nextToken() throws NoSuchElementException JavaDoc {
111     if (current == max
112       && (max == 0
113       || (returnTokens && delim.indexOf(str.charAt(previous)) >= 0))) {
114
115       current++;
116       return "";
117     }
118
119     if (current >= max)
120       throw new NoSuchElementException JavaDoc();
121
122     int start = current;
123     String JavaDoc result = null;
124
125     if (delim.indexOf(str.charAt(start)) >= 0) {
126       if (previous == -1 || (returnTokens && previous != current
127         && delim.indexOf(str.charAt(previous)) >= 0)) {
128
129         result = "";
130       }
131       else if (returnTokens)
132         result = str.substring(start, ++current);
133
134       if (!returnTokens)
135         current++;
136     }
137
138     previous = start;
139     start = current;
140
141     if (result == null)
142       while (current < max && delim.indexOf(str.charAt(current)) < 0)
143         current++;
144
145     return result == null ? str.substring(start, current) : result;
146   }
147
148   /**
149    * Returns the next token in this string tokenizer's string. First, the
150    * set of characters considered to be delimiters by this Tokenizer
151    * object is changed to be the characters in the string delim.
152    * Then the next token in the string after the current position is
153    * returned. The current position is advanced beyond the recognized token.
154    * The new delimiter set remains the default after this call.
155    *
156    * @param delim the new delimiters
157    *
158    * @return the next token, after switching to the new delimiter set
159    *
160    * @exception NoSuchElementException if there are no more tokens in this
161    * tokenizer's string.
162    */

163   public String JavaDoc nextToken(String JavaDoc delim) throws NoSuchElementException JavaDoc {
164     this.delim = delim;
165     return nextToken();
166   }
167
168   /**
169    * Returns the same value as the hasMoreTokens method. It exists so that
170    * this class can implement the Enumeration interface.
171    *
172    * @return true if there are more tokens; false otherwise.
173    */

174   public boolean hasMoreElements() {
175     return hasMoreTokens();
176   }
177
178   /**
179    * Returns the same value as the nextToken method, except that its
180    * declared return value is Object rather than String. It exists so that
181    * this class can implement the Enumeration interface.
182    *
183    * @return the next token in the string
184    *
185    * @exception NoSuchElementException if there are no more tokens in this
186    * tokenizer's string
187    */

188   public Object JavaDoc nextElement() {
189     return nextToken();
190   }
191
192   /**
193    * Calculates the number of times that this tokenizer's nextToken method
194    * can be called before it generates an exception. The current position
195    * is not advanced.
196    *
197    * @return the number of tokens remaining in the string using the
198    * current delimiter set
199    */

200   public int countTokens() {
201     int curr = current;
202     int count = 0;
203
204     for (int i = curr; i < max; i++) {
205       if (delim.indexOf(str.charAt(i)) >= 0)
206         count++;
207
208       curr++;
209     }
210
211     return count + (returnTokens ? count : 0) + 1;
212   }
213
214   /**
215    * Resets this tokenizer's state so the tokenizing starts from the begin.
216    */

217   public void reset() {
218     previous = -1;
219     current = 0;
220   }
221
222   /**
223    * Constructs a string tokenizer for the specified string. All characters
224    * in the delim argument are the delimiters for separating tokens.
225    * If the returnTokens flag is true, then the delimiter characters are
226    * also returned as tokens. Each delimiter is returned as a string of
227    * length one. If the flag is false, the delimiter characters are skipped
228    * and only serve as separators between tokens. Then tokenizes the str
229    * and return an String[] array with tokens.
230    *
231    * @param str a string to be parsed
232    * @param delim the delimiters
233    * @param returnTokens flag indicating whether to return the delimiters
234    * as tokens
235    *
236    * @return array with tokens
237    */

238   public static String JavaDoc[] tokenize(String JavaDoc str, String JavaDoc delim,
239     boolean returnTokens) {
240
241     Tokenizer tokenizer = new Tokenizer(str, delim, returnTokens);
242     String JavaDoc[] tokens = new String JavaDoc[tokenizer.countTokens()];
243
244     int i = 0;
245     while (tokenizer.hasMoreTokens()) {
246       tokens[i] = tokenizer.nextToken();
247       i++;
248     }
249
250     return tokens;
251   }
252
253   /**
254    * Default delimiters " \t\n\r\f":
255    * the space character, the tab character, the newline character,
256    * the carriage-return character, and the form-feed character.
257    */

258   public static final String JavaDoc DEFAULT_DELIMITERS = " \t\n\r\f";
259
260   /**
261    * String to tokenize.
262    */

263   private String JavaDoc str = null;
264
265   /**
266    * Delimiters.
267    */

268   private String JavaDoc delim = null;
269
270   /**
271    * Flag indicating whether to return the delimiters as tokens.
272    */

273   private boolean returnTokens = false;
274
275   /**
276    * Previous token start.
277    */

278   private int previous = -1;
279
280   /**
281    * Current position in str string.
282    */

283   private int current = 0;
284
285   /**
286    * Maximal position in str string.
287    */

288   private int max = 0;
289 }
290
Popular Tags