KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > Ostermiller > util > StringTokenizer


1 /*
2  * A replacement for java.util.StringTokenizer
3  * Copyright (C) 2001 Stephen Ostermiller
4  * http://ostermiller.org/contact.pl?regarding=Java+Utilities
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * See COPYING.TXT for details.
17  */

18
19  package com.Ostermiller.util;
20
21 /**
22  * The string tokenizer class allows an application to break a string into
23  * tokens.
24  * More information about this class is available from <a target="_top" HREF=
25  * "http://ostermiller.org/utils/StringTokenizer.html">ostermiller.org</a>.
26  * <p>
27  * The tokenization method is much simpler than the one used by the
28  * <code>StreamTokenizer</code> class. The <code>StringTokenizer</code> methods
29  * do not distinguish among identifiers, numbers, and quoted strings, nor do
30  * they recognize and skip comments.
31  * <p>
32  * The set of delimiters (the characters that separate tokens) may be specified
33  * either at creation time or on a per-token basis.
34  * <p>
35  * There are two kinds of delimiters: token delimiters and nontoken delimiters.
36  * A token is either one token delimiter character, or a maximal sequence of
37  * consecutive characters that are not delimiters.
38  * <p>
39  * A <code>StringTokenizer</code> object internally maintains a current
40  * position within the string to be tokenized. Some operations advance this
41  * current position past the characters processed.
42  * <p>
43  * The implementation is not thread safe; if a <code>StringTokenizer</code>
44  * object is intended to be used in multiple threads, an appropriate wrapper
45  * must be provided.
46  * <p>
47  * The following is one example of the use of the tokenizer. It also
48  * demonstrates the usefulness of having both token and nontoken delimiters in
49  * one <code>StringTokenizer</code>.
50  * <p>
51  * The code:
52  * <blockquote><code>
53  * String s = " &nbsp;( &nbsp; aaa \t &nbsp;* (b+c1 ))";<br>
54  * StringTokenizer st = new StringTokenizer(s, " \t\n\r\f", "()+*");<br>
55  * while (st.hasMoreTokens()) {<br>
56  * &nbsp;&nbsp;&nbsp;&nbsp;System.out.println(st.nextToken());<br>
57  * };
58  * </code></blockquote>
59  * <p>
60  * prints the following output:
61  * <blockquote>
62  * (<br>
63  * aaa<br>
64  * *<br>
65  * (<br>
66  * b<br>
67  * +<br>
68  * c1<br>
69  * )<br>
70  * )
71  * </blockquote>
72  * <p>
73  * </b>Compatibility with <code>java.util.StringTokenizer</code></b>
74  * <p>
75  * In the original version of <code>java.util.StringTokenizer</code>, the method
76  * <code>nextToken()</code> left the current position after the returned token,
77  * and the method <code>hasMoreTokens()</code> moved (as a side effect) the
78  * current position before the beginning of the next token. Thus, the code:
79  * <blockquote><code>
80  * String s = "x=a,b,c";<br>
81  * java.util.StringTokenizer st = new java.util.StringTokenizer(s,"=");<br>
82  * System.out.println(st.nextToken());<br>
83  * while (st.hasMoreTokens()) {<br>
84  * &nbsp;&nbsp;&nbsp;&nbsp;System.out.println(st.nextToken(","));<br>
85  * };
86  * </code></blockquote>
87  * <p>
88  * prints the following output:
89  * <blockquote>
90  * x<br>
91  * a<br>
92  * b<br>
93  * c
94  * </blockquote>
95  * <p>
96  * The Java SDK 1.3 implementation removed the undesired side effect of
97  * <code>hasMoreTokens</code> method: now, it does not advance current position.
98  * However, after these changes the output of the above code was:
99  * <blockquote>
100  * x<br>
101  * =a<br>
102  * b<br>
103  * c
104  * </blockquote>
105  * <p>
106  * and there was no good way to produce a second token without "=".
107  * <p>
108  * To solve the problem, this implementation introduces a new method
109  * <code>skipDelimiters()</code>. To produce the original output, the above code
110  * should be modified as:
111  * <blockquote><code>
112  * String s = "x=a,b,c";<br>
113  * StringTokenizer st = new StringTokenizer(s,"=");<br>
114  * System.out.println(st.nextToken());<br>
115  * st.skipDelimiters();<br>
116  * while (st.hasMoreTokens()) {<br>
117  * &nbsp;&nbsp;&nbsp;&nbsp;System.out.println(st.nextToken(","));<br>
118  * };
119  * </code></blockquote>
120  *
121  * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
122  * @since ostermillerutils 1.00.00
123  */

124 public class StringTokenizer implements java.util.Enumeration JavaDoc, java.util.Iterator JavaDoc {
125
126     /**
127      * The string to be tokenized.
128      * The code relies on this to never be null.
129      *
130      * @since ostermillerutils 1.00.00
131      */

132     protected String JavaDoc text;
133
134     /**
135      * The length of the text.
136      * Cached for performance. This should be set whenever the
137      * string we are working with is changed.
138      *
139      * @since ostermillerutils 1.00.00
140      */

141     protected int strLength;
142
143     /**
144      * The set of nontoken delimiters.
145      *
146      * @since ostermillerutils 1.00.00
147      */

148     protected String JavaDoc nontokenDelims;
149
150     /**
151      * The set of token delimiters.
152      *
153      * @since ostermillerutils 1.00.00
154      */

155     protected String JavaDoc tokenDelims;
156
157     /**
158      * One of two variables used to maintain state through
159      * the tokenizing process.
160      * <P>
161      * Represents the position at which we should start looking for
162      * the next token(the position of the character immediately
163      * following the end of the last token, or 0 to start), or
164      * -1 if the entire string has been examined.
165      *
166      * @since ostermillerutils 1.00.00
167      */

168     protected int position;
169
170     /**
171      * One of two variables used to maintain state through
172      * the tokenizing process.
173      * <p>
174      * true if and only if is found that an empty token should
175      * be returned or if empty token was the last thing returned.
176      * <p>
177      * If returnEmptyTokens in false, then this variable will
178      * always be false.
179      *
180      * @since ostermillerutils 1.00.00
181      */

182     protected boolean emptyReturned;
183
184     /**
185      * Stores the value of the delimiter character with the
186      * highest value. It is used to optimize the detection of delimiter
187      * characters. The common case will be that the int values of delimiters
188      * will be less than that of most characters in the string (, or space less
189      * than any letter for example). Given this, we can check easily check
190      * to see if a character is not a delimiter by comparing it to the max
191      * delimiter. If it is greater than the max delimiter, then it is no
192      * a delimiter otherwise we have to do some more in depth analysis. (ie
193      * search the delimiter string.) This will reduce the running time of
194      * the algorithm not to depend on the length of the delimiter string
195      * for the common case.
196      *
197      * @since ostermillerutils 1.00.00
198      */

199     protected char maxDelimChar;
200
201     /**
202      * Whether empty tokens should be returned.
203      * ie if "" should be returned when text starts with
204      * a delim, has two delims next to each other, or
205      * ends with a delim.
206      *
207      * @since ostermillerutils 1.00.00
208      */

209     protected boolean returnEmptyTokens;
210
211     /**
212      * Indicates at which position the delimiters last changed. This
213      * will effect how null tokens are returned. Any
214      * time that delimiters are changed, the string will be treated as if
215      * it is being parsed from position zero, ie, null strings are possible
216      * at the very beginning.
217      *
218      * @since ostermillerutils 1.00.00
219      */

220     protected int delimsChangedPosition;
221
222     /**
223      * A cache of the token count. This variable should be -1 if the token
224      * have not yet been counted. It should be greater than or equal to zero
225      * if the tokens have been counted.
226      *
227      * @since ostermillerutils 1.00.00
228      */

229     protected int tokenCount;
230
231     /**
232      * Constructs a string tokenizer for the specified string. Both token and
233      * nontoken delimiters are specified.
234      * <p>
235      * The current position is set at the beginning of the string.
236      *
237      * @param text a string to be parsed.
238      * @param nontokenDelims the nontoken delimiters, i.e. the delimiters that only separate
239      * tokens and are not returned as separate tokens.
240      * @param tokenDelims the token delimiters, i.e. delimiters that both separate tokens,
241      * and are themselves returned as tokens.
242      * @throws NullPointerException if text is null.
243      *
244      * @since ostermillerutils 1.00.00
245      */

246     public StringTokenizer(String JavaDoc text, String JavaDoc nontokenDelims, String JavaDoc tokenDelims){
247         this(text, nontokenDelims, tokenDelims, false);
248     }
249
250     /**
251      * Constructs a string tokenizer for the specified string. Both token and
252      * nontoken delimiters are specified and whether or not empty tokens are returned
253      * is specified.
254      * <p>
255      * Empty tokens are tokens that are between consecutive delimiters.
256      * <p>
257      * It is a primary constructor (i.e. all other constructors are defined in terms
258      * of it.)
259      * <p>
260      * The current position is set at the beginning of the string.
261      *
262      * @param text a string to be parsed.
263      * @param nontokenDelims the nontoken delimiters, i.e. the delimiters that only separate
264      * tokens and are not returned as separate tokens.
265      * @param tokenDelims the token delimiters, i.e. delimiters that both separate tokens,
266      * and are themselves returned as tokens.
267      * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
268      * @throws NullPointerException if text is null.
269      *
270      * @since ostermillerutils 1.00.00
271      */

272     public StringTokenizer(String JavaDoc text, String JavaDoc nontokenDelims, String JavaDoc tokenDelims, boolean returnEmptyTokens){
273         setDelims(nontokenDelims, tokenDelims);
274         setText(text);
275         setReturnEmptyTokens(returnEmptyTokens);
276     }
277
278     /**
279      * Constructs a string tokenizer for the specified string. Either token or
280      * nontoken delimiters are specified.
281      * <p>
282      * Is equivalent to:
283      * <ul>
284      * <li> If the third parameter is <code>false</code> --
285      * <code>StringTokenizer(text,delims, null)</code>
286      * <li> If the third parameter is <code>true</code> --
287      * <code>StringTokenizer(text, null ,delims)</code>
288      * </ul>
289      *
290      * @param text a string to be parsed.
291      * @param delims the delimiters.
292      * @param delimsAreTokens
293      * flag indicating whether the second parameter specifies token or
294      * nontoken delimiters: <code>false</code> -- the second parameter
295      * specifies nontoken delimiters, the set of token delimiters is
296      * empty; <code>true</code> -- the second parameter specifies token
297      * delimiters, the set of nontoken delimiters is empty.
298      * @throws NullPointerException if text is null.
299      *
300      * @since ostermillerutils 1.00.00
301      */

302     public StringTokenizer(String JavaDoc text, String JavaDoc delims, boolean delimsAreTokens){
303         this(text, (delimsAreTokens ? null : delims), (delimsAreTokens ? delims : null));
304     }
305
306     /**
307      * Constructs a string tokenizer for the specified string. The characters in the
308      * <code>nontokenDelims</code> argument are the delimiters for separating
309      * tokens. Delimiter characters themselves will not be treated as tokens.
310      * <p>
311      * Is equivalent to <code>StringTokenizer(text,nontokenDelims, null)</code>.
312      *
313      * @param text a string to be parsed.
314      * @param nontokenDelims the nontoken delimiters.
315      * @throws NullPointerException if text is null.
316      *
317      * @since ostermillerutils 1.00.00
318      */

319     public StringTokenizer(String JavaDoc text, String JavaDoc nontokenDelims){
320         this(text, nontokenDelims, null);
321     }
322
323     /**
324      * Constructs a string tokenizer for the specified string. The tokenizer uses
325      * " \t\n\r\f" as a delimiter set of nontoken delimiters, and an empty token
326      * delimiter set.
327      * <p>
328      * Is equivalent to <code>StringTokenizer(text, " \t\n\r\f", null);
329      *
330      * @param text a string to be parsed.
331      * @throws NullPointerException if text is null.
332      *
333      * @since ostermillerutils 1.00.00
334      */

335     public StringTokenizer(String JavaDoc text){
336         this(text, " \t\n\r\f", null);
337     }
338
339     /**
340      * Set the text to be tokenized in this StringTokenizer.
341      * <p>
342      * This is useful when for StringTokenizer re-use so that new string tokenizers do no
343      * have to be created for each string you want to tokenizer.
344      * <p>
345      * The string will be tokenized from the beginning of the string.
346      *
347      * @param text a string to be parsed.
348      * @throws NullPointerException if text is null.
349      *
350      * @since ostermillerutils 1.00.00
351      */

352     public void setText(String JavaDoc text){
353         if (text == null){
354             throw new NullPointerException JavaDoc();
355         }
356         this.text = text;
357         strLength = text.length();
358         emptyReturned = false;
359         // set the position to start evaluation to zero
360
// unless the string has no length, in which case
361
// the entire string has already been examined.
362
position = (strLength > 0 ? 0: -1);
363         // because the text was changed since the last time the delimiters
364
// were changed we need to set the delimiter changed position
365
delimsChangedPosition = 0;
366         // The token count changes when the text changes
367
tokenCount = -1;
368     }
369
370     /**
371      * Set the delimiters for this StringTokenizer.
372      * The position must be initialized before this method is used.
373      * (setText does this and it is called from the constructor)
374      *
375      * @param nontokenDelims delimiters that should not be returned as tokens.
376      * @param tokenDelims delimiters that should be returned as tokens.
377      *
378      * @since ostermillerutils 1.00.00
379      */

380     private void setDelims(String JavaDoc nontokenDelims, String JavaDoc tokenDelims){
381         this.nontokenDelims = nontokenDelims;
382         this.tokenDelims = tokenDelims;
383         // If we change delimiters, we do not want to start fresh,
384
// without returning empty tokens.
385
// the delimiter changed position can never be less than
386
// zero, unlike position.
387
delimsChangedPosition = (position != -1 ? position : strLength);
388         // set the max delimiter
389
maxDelimChar = 0;
390         for (int i=0; nontokenDelims != null && i < nontokenDelims.length(); i++){
391             if (maxDelimChar < nontokenDelims.charAt(i)){
392                 maxDelimChar = nontokenDelims.charAt(i);
393             }
394         }
395         for (int i=0; tokenDelims != null && i < tokenDelims.length(); i++){
396             if (maxDelimChar < tokenDelims.charAt(i)){
397                 maxDelimChar = tokenDelims.charAt(i);
398             }
399         }
400         // Changing the delimiters may change the number of tokens
401
tokenCount = -1;
402     }
403
404
405     /**
406      * Tests if there are more tokens available from this tokenizer's string.
407      * If this method returns <tt>true</tt>, then a subsequent call to
408      * <tt>nextToken</tt> with no argument will successfully return a token.
409      * <p>
410      * The current position is not changed.
411      *
412      * @return <code>true</code> if and only if there is at least one token in the
413      * string after the current position; <code>false</code> otherwise.
414      *
415      * @since ostermillerutils 1.00.00
416      */

417     public boolean hasMoreTokens(){
418
419         // handle the easy case in which the number
420
// of tokens has been counted.
421
if (tokenCount == 0){
422             return false;
423         } else if (tokenCount > 0){
424             return true;
425         }
426
427         // copy over state variables from the class to local
428
// variables so that the state of this object can be
429
// restored to the state that it was in before this
430
// method was called.
431
int savedPosition = position;
432         boolean savedEmptyReturned = emptyReturned;
433
434         int workingPosition = position;
435         boolean workingEmptyReturned = emptyReturned;
436         boolean onToken = advancePosition();
437         while(position != workingPosition ||
438             emptyReturned != workingEmptyReturned){
439             if (onToken){
440                 // restore object state
441
position = savedPosition;
442                 emptyReturned = savedEmptyReturned;
443                 return true;
444             }
445             workingPosition = position;
446             workingEmptyReturned = emptyReturned;
447             onToken = advancePosition();
448         }
449
450         // restore object state
451
position = savedPosition;
452         emptyReturned = savedEmptyReturned;
453         return false;
454     }
455
456     /**
457      * Returns the next token from this string tokenizer.
458      * <p>
459      * The current position is set after the token returned.
460      *
461      * @return the next token from this string tokenizer.
462      * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
463      *
464      * @since ostermillerutils 1.00.00
465      */

466     public String JavaDoc nextToken(){
467         int workingPosition = position;
468         boolean workingEmptyReturned = emptyReturned;
469         boolean onToken = advancePosition();
470         while(position != workingPosition ||
471             emptyReturned != workingEmptyReturned){
472             if (onToken){
473                 // returning a token decreases the token count
474
tokenCount--;
475                 return (emptyReturned ? "" : text.substring(workingPosition, (position != -1) ? position : strLength));
476             }
477             workingPosition = position;
478             workingEmptyReturned = emptyReturned;
479             onToken = advancePosition();
480         }
481         throw new java.util.NoSuchElementException JavaDoc();
482     }
483
484     /**
485      * Advances the current position so it is before the next token.
486      * <p>
487      * This method skips nontoken delimiters but does not skip
488      * token delimiters.
489      * <p>
490      * This method is useful when switching to the new delimiter sets (see the
491      * second example in the class comment.)
492      *
493      * @return <code>true</code> if there are more tokens, <code>false</code> otherwise.
494      *
495      * @since ostermillerutils 1.00.00
496      */

497     public boolean skipDelimiters(){
498         int workingPosition = position;
499         boolean workingEmptyReturned = emptyReturned;
500         boolean onToken = advancePosition();
501
502         // skipping delimiters may cause the number of tokens to change
503
tokenCount = -1;
504
505         while(position != workingPosition ||
506             emptyReturned != workingEmptyReturned){
507             if (onToken){
508                 // restore the state to just as it was before we found
509
// this token and return
510
position = workingPosition;
511                 emptyReturned = workingEmptyReturned;
512                 return true;
513             }
514             workingPosition = position;
515             workingEmptyReturned = emptyReturned;
516             onToken = advancePosition();
517         }
518
519         // the end of the string was reached
520
// without finding any tokens
521
return false;
522     }
523
524     /**
525      * Calculates the number of times that this tokenizer's <code>nextToken</code>
526      * method can be called before it generates an exception. The current position
527      * is not advanced.
528      *
529      * @return the number of tokens remaining in the string using the current
530      * delimiter set.
531      *
532      * @see #nextToken()
533      * @since ostermillerutils 1.00.00
534      */

535     public int countTokens(){
536
537         // return the cached token count if a cache
538
// is available.
539
if (this.tokenCount >=0){
540             return this.tokenCount;
541         }
542
543         int tokenCount = 0;
544
545         // copy over state variables from the class to local
546
// variables so that the state of this object can be
547
// restored to the state that it was in before this
548
// method was called.
549
int savedPosition = position;
550         boolean savedEmptyReturned = emptyReturned;
551
552         int workingPosition = position;
553         boolean workingEmptyReturned = emptyReturned;
554         boolean onToken = advancePosition();
555         while(position != workingPosition ||
556             emptyReturned != workingEmptyReturned){
557             if (onToken){
558                 tokenCount++;
559             }
560             workingPosition = position;
561             workingEmptyReturned = emptyReturned;
562             onToken = advancePosition();
563         }
564
565         // restore object state
566
position = savedPosition;
567         emptyReturned = savedEmptyReturned;
568
569         // Save the token count in case this is called again
570
// so we wouldn't have to do so much work.
571
this.tokenCount = tokenCount;
572
573         return tokenCount;
574     }
575
576     /**
577      * Set the delimiters used to this set of (nontoken) delimiters.
578      *
579      * @param delims the new set of nontoken delimiters (the set of token delimiters will be empty).
580      *
581      * @since ostermillerutils 1.00.00
582      */

583     public void setDelimiters(String JavaDoc delims){
584         setDelims(delims, null);
585     }
586
587     /**
588      * Set the delimiters used to this set of delimiters.
589      *
590      * @param delims the new set of delimiters.
591      * @param delimsAreTokens flag indicating whether the first parameter specifies
592      * token or nontoken delimiters: false -- the first parameter specifies nontoken
593      * delimiters, the set of token delimiters is empty; true -- the first parameter
594      * specifies token delimiters, the set of nontoken delimiters is empty.
595      *
596      * @since ostermillerutils 1.00.00
597      */

598     public void setDelimiters(String JavaDoc delims, boolean delimsAreTokens){
599         setDelims((delimsAreTokens ? null : delims), (delimsAreTokens ? delims : null));
600     }
601
602     /**
603      * Set the delimiters used to this set of delimiters.
604      *
605      * @param nontokenDelims the new set of nontoken delimiters.
606      * @param tokenDelims the new set of token delimiters.
607      *
608      * @since ostermillerutils 1.00.00
609      */

610     public void setDelimiters(String JavaDoc nontokenDelims, String JavaDoc tokenDelims){
611         setDelims(nontokenDelims, tokenDelims);
612     }
613
614     /**
615      * Set the delimiters used to this set of delimiters.
616      *
617      * @param nontokenDelims the new set of nontoken delimiters.
618      * @param tokenDelims the new set of token delimiters.
619      * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
620      *
621      * @since ostermillerutils 1.00.00
622      */

623     public void setDelimiters(String JavaDoc nontokenDelims, String JavaDoc tokenDelims, boolean returnEmptyTokens){
624         setDelims(nontokenDelims, tokenDelims);
625         setReturnEmptyTokens(returnEmptyTokens);
626     }
627
628     /**
629      * Calculates the number of times that this tokenizer's <code>nextToken</code>
630      * method can be called before it generates an exception using the given set of
631      * (nontoken) delimiters. The delimiters given will be used for future calls to
632      * nextToken() unless new delimiters are given. The current position
633      * is not advanced.
634      *
635      * @param delims the new set of nontoken delimiters (the set of token delimiters will be empty).
636      * @return the number of tokens remaining in the string using the new
637      * delimiter set.
638      *
639      * @see #countTokens()
640      * @since ostermillerutils 1.00.00
641      */

642     public int countTokens(String JavaDoc delims){
643         setDelims(delims, null);
644         return countTokens();
645     }
646
647     /**
648      * Calculates the number of times that this tokenizer's <code>nextToken</code>
649      * method can be called before it generates an exception using the given set of
650      * delimiters. The delimiters given will be used for future calls to
651      * nextToken() unless new delimiters are given. The current position
652      * is not advanced.
653      *
654      * @param delims the new set of delimiters.
655      * @param delimsAreTokens flag indicating whether the first parameter specifies
656      * token or nontoken delimiters: false -- the first parameter specifies nontoken
657      * delimiters, the set of token delimiters is empty; true -- the first parameter
658      * specifies token delimiters, the set of nontoken delimiters is empty.
659      * @return the number of tokens remaining in the string using the new
660      * delimiter set.
661      *
662      * @see #countTokens()
663      * @since ostermillerutils 1.00.00
664      */

665     public int countTokens(String JavaDoc delims, boolean delimsAreTokens){
666         setDelims((delimsAreTokens ? null : delims), (delimsAreTokens ? delims : null));
667         return countTokens();
668     }
669
670     /**
671      * Calculates the number of times that this tokenizer's <code>nextToken</code>
672      * method can be called before it generates an exception using the given set of
673      * delimiters. The delimiters given will be used for future calls to
674      * nextToken() unless new delimiters are given. The current position
675      * is not advanced.
676      *
677      * @param nontokenDelims the new set of nontoken delimiters.
678      * @param tokenDelims the new set of token delimiters.
679      * @return the number of tokens remaining in the string using the new
680      * delimiter set.
681      *
682      * @see #countTokens()
683      * @since ostermillerutils 1.00.00
684      */

685     public int countTokens(String JavaDoc nontokenDelims, String JavaDoc tokenDelims){
686         setDelims(nontokenDelims, tokenDelims);
687         return countTokens();
688     }
689
690     /**
691      * Calculates the number of times that this tokenizer's <code>nextToken</code>
692      * method can be called before it generates an exception using the given set of
693      * delimiters. The delimiters given will be used for future calls to
694      * nextToken() unless new delimiters are given. The current position
695      * is not advanced.
696      *
697      * @param nontokenDelims the new set of nontoken delimiters.
698      * @param tokenDelims the new set of token delimiters.
699      * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
700      * @return the number of tokens remaining in the string using the new
701      * delimiter set.
702      *
703      * @see #countTokens()
704      * @since ostermillerutils 1.00.00
705      */

706     public int countTokens(String JavaDoc nontokenDelims, String JavaDoc tokenDelims, boolean returnEmptyTokens){
707         setDelims(nontokenDelims, tokenDelims);
708         setReturnEmptyTokens(returnEmptyTokens);
709         return countTokens();
710     }
711
712     /**
713      * Advances the state of the tokenizer to the next token or delimiter. This method only
714      * modifies the class variables position, and emptyReturned. The type of token that
715      * should be emitted can be deduced by examining the changes to these two variables.
716      * If there are no more tokens, the state of these variables does not change at all.
717      *
718      * @return true if we are at a juncture at which a token may be emitted, false otherwise.
719      *
720      * @since ostermillerutils 1.00.00
721      */

722     private boolean advancePosition(){
723         // if we are returning empty tokens, we are just starting to tokenizer
724
// and there is a delimiter at the beginning of the string or the string
725
// is empty we need to indicate that there is an empty token at the beginning.
726
// The beginning is defined as where the delimiters were last changed.
727
if (returnEmptyTokens && !emptyReturned &&
728             (delimsChangedPosition == position ||
729             (position == -1 && strLength == delimsChangedPosition))){
730             if (strLength == delimsChangedPosition){
731                 // Case in which the string (since delim change)
732
// is empty, but because we are returning empty
733
// tokens, a single empty token should be returned.
734
emptyReturned = true;
735                 /*System.out.println("Empty token for empty string.");*/
736                 return true;
737             } else {
738                 char c = text.charAt(position);
739                 if (c <= maxDelimChar &&
740                     (nontokenDelims != null && nontokenDelims.indexOf(c) != -1) ||
741                     (tokenDelims != null && tokenDelims.indexOf(c) != -1)){
742                     // There is delimiter at the very start of the string
743
// so we must return an empty token at the beginning.
744
emptyReturned = true;
745                     /*System.out.println("Empty token at beginning.");*/
746                     return true;
747                 }
748             }
749         }
750         // The main loop
751
// Do this as long as parts of the string have yet to be examined
752
while (position != -1){
753             char c = text.charAt(position);
754             if (returnEmptyTokens && !emptyReturned && position > delimsChangedPosition){
755                 char c1 = text.charAt(position - 1);
756                 // Examine the current character and the one before it.
757
// If both of them are delimiters, then we need to return
758
// an empty delimiter. Note that characters that were examined
759
// before the delimiters changed should not be reexamined.
760
if (c <= maxDelimChar && c1 <= maxDelimChar &&
761                     ((nontokenDelims != null && nontokenDelims.indexOf(c) != -1) ||
762                     (tokenDelims != null && tokenDelims.indexOf(c) != -1)) &&
763                     ((nontokenDelims != null && nontokenDelims.indexOf(c1) != -1) ||
764                     (tokenDelims != null && tokenDelims.indexOf(c1) != -1))){
765                     emptyReturned = true;
766                     /*System.out.println("Empty token.");*/
767                     return true;
768                 }
769             }
770
771             int nextDelimiter = (position < strLength - 1 ? indexOfNextDelimiter(position + 1) : -1);
772             if (c > maxDelimChar ||
773                 ((nontokenDelims == null || nontokenDelims.indexOf(c) == -1) &&
774                 (tokenDelims == null || tokenDelims.indexOf(c) == -1))){
775                 // token found
776
/*System.out.println("Token: '" +
777                     text.substring(position, (nextDelimiter == -1 ? strLength : nextDelimiter)) +
778                     "' at " + position + ".");*/

779                 position = nextDelimiter;
780                 emptyReturned = false;
781                 return true;
782             } else if (tokenDelims != null && tokenDelims.indexOf(c) != -1) {
783                 // delimiter that can be returned as a token found
784
emptyReturned = false;
785                 /*System.out.println("Delimiter: '" + c + "' at " + position + ".");*/
786                 position = (position < strLength -1 ? position +1 : -1);
787                 return true;
788             } else {
789                 // delimiter that is not a token found.
790
emptyReturned = false;
791                 position = (position < strLength -1 ? position +1 : -1);
792                 return false;
793             }
794         }
795         // handle the case that a token is at the end of the string and we should
796
// return empty tokens.
797
if (returnEmptyTokens && !emptyReturned && strLength > 0){
798             char c = text.charAt(strLength - 1);
799             if (c <= maxDelimChar &&
800                 (nontokenDelims != null && nontokenDelims.indexOf(c) != -1) ||
801                 (tokenDelims != null && tokenDelims.indexOf(c) != -1)){
802                 // empty token at the end of the string found.
803
emptyReturned = true;
804                 /*System.out.println("Empty token at end.");*/
805                 return true;
806             }
807         }
808         return false;
809     }
810
811     /**
812      * Returns the next token in this string tokenizer's string.
813      * <p>
814      * First, the sets of token and nontoken delimiters are changed to be the
815      * <code>tokenDelims</code> and <code>nontokenDelims</code>, respectively.
816      * Then the next token (with respect to new delimiters) in the string after the
817      * current position is returned.
818      * <p>
819      * The current position is set after the token returned.
820      * <p>
821      * The new delimiter sets remains the used ones after this call.
822      *
823      * @param nontokenDelims the new set of nontoken delimiters.
824      * @param tokenDelims the new set of token delimiters.
825      * @return the next token, after switching to the new delimiter set.
826      * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
827      * @see #nextToken()
828      *
829      * @since ostermillerutils 1.00.00
830      */

831     public String JavaDoc nextToken(String JavaDoc nontokenDelims, String JavaDoc tokenDelims){
832         setDelims(nontokenDelims, tokenDelims);
833         return nextToken();
834     }
835
836     /**
837      * Returns the next token in this string tokenizer's string.
838      * <p>
839      * First, the sets of token and nontoken delimiters are changed to be the
840      * <code>tokenDelims</code> and <code>nontokenDelims</code>, respectively;
841      * and whether or not to return empty tokens is set.
842      * Then the next token (with respect to new delimiters) in the string after the
843      * current position is returned.
844      * <p>
845      * The current position is set after the token returned.
846      * <p>
847      * The new delimiter set remains the one used for this call and empty tokens are
848      * returned in the future as they are in this call.
849      *
850      * @param nontokenDelims the new set of nontoken delimiters.
851      * @param tokenDelims the new set of token delimiters.
852      * @param returnEmptyTokens true if empty tokens may be returned; false otherwise.
853      * @return the next token, after switching to the new delimiter set.
854      * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
855      * @see #nextToken()
856      *
857      * @since ostermillerutils 1.00.00
858      */

859     public String JavaDoc nextToken(String JavaDoc nontokenDelims, String JavaDoc tokenDelims, boolean returnEmptyTokens){
860         setDelims(nontokenDelims, tokenDelims);
861         setReturnEmptyTokens(returnEmptyTokens);
862         return nextToken();
863     }
864
865     /**
866      * Returns the next token in this string tokenizer's string.
867      * <p>
868      * Is equivalent to:
869      * <ul>
870      * <li> If the second parameter is <code>false</code> --
871      * <code>nextToken(delims, null)</code>
872      * <li> If the second parameter is <code>true</code> --
873      * <code>nextToken(null ,delims)</code>
874      * </ul>
875      * <p>
876      * @param delims the new set of token or nontoken delimiters.
877      * @param delimsAreTokens
878      * flag indicating whether the first parameter specifies token or
879      * nontoken delimiters: <code>false</code> -- the first parameter
880      * specifies nontoken delimiters, the set of token delimiters is
881      * empty; <code>true</code> -- the first parameter specifies token
882      * delimiters, the set of nontoken delimiters is empty.
883      * @return the next token, after switching to the new delimiter set.
884      * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
885      *
886      * @see #nextToken(String,String)
887      * @since ostermillerutils 1.00.00
888      */

889     public String JavaDoc nextToken(String JavaDoc delims, boolean delimsAreTokens){
890         return (delimsAreTokens ? nextToken(null, delims) : nextToken(delims, null));
891     }
892
893     /**
894      * Returns the next token in this string tokenizer's string.
895      * <p>
896      * Is equivalent to <code>nextToken(delims, null)</code>.
897      *
898      * @param nontokenDelims the new set of nontoken delimiters (the set of
899      * token delimiters will be empty).
900      * @return the next token, after switching to the new delimiter set.
901      * @throws NoSuchElementException if there are no more tokens in this
902      * tokenizer's string.
903      *
904      * @see #nextToken(String,String)
905      * @since ostermillerutils 1.00.00
906      */

907     public String JavaDoc nextToken(String JavaDoc nontokenDelims){
908         return nextToken(nontokenDelims, null);
909     }
910
911     /**
912      * Similar to String.indexOf(int, String) but will look for
913      * any character from string rather than the entire string.
914      *
915      * @param start index in text at which to begin the search
916      * @return index of the first delimiter from the start index (inclusive), or -1
917      * if there are no more delimiters in the string
918      *
919      * @since ostermillerutils 1.00.00
920      */

921     private int indexOfNextDelimiter(int start){
922         char c;
923         int next;
924         for (next = start; (c = text.charAt(next)) > maxDelimChar ||
925             ((nontokenDelims == null || nontokenDelims.indexOf(c) == -1) &&
926             (tokenDelims == null || tokenDelims.indexOf(c) == -1)); next++){
927             if (next == strLength - 1){
928                 // we have reached the end of the string without
929
// finding a delimiter
930
return (-1);
931             }
932         }
933         return next;
934     }
935
936     /**
937      * Returns the same value as the <code>hasMoreTokens()</code> method. It exists
938      * so that this class can implement the <code>Enumeration</code> interface.
939      *
940      * @return <code>true</code> if there are more tokens;
941      * <code>false</code> otherwise.
942      *
943      * @see java.util.Enumeration
944      * @see #hasMoreTokens()
945      * @since ostermillerutils 1.00.00
946      */

947     public boolean hasMoreElements(){
948         return hasMoreTokens();
949     }
950
951     /**
952      * Returns the same value as the <code>nextToken()</code> method, except that
953      * its declared return value is <code>Object</code> rather than
954      * <code>String</code>. It exists so that this class can implement the
955      * <code>Enumeration</code> interface.
956      *
957      * @return the next token in the string.
958      * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
959      *
960      * @see java.util.Enumeration
961      * @see #nextToken()
962      * @since ostermillerutils 1.00.00
963      */

964     public Object JavaDoc nextElement(){
965         return nextToken();
966     }
967
968     /**
969      * Returns the same value as the <code>hasMoreTokens()</code> method. It exists
970      * so that this class can implement the <code>Iterator</code> interface.
971      *
972      * @return <code>true</code> if there are more tokens;
973      * <code>false</code> otherwise.
974      *
975      * @see java.util.Iterator
976      * @see #hasMoreTokens()
977      * @since ostermillerutils 1.00.00
978      */

979     public boolean hasNext(){
980         return hasMoreTokens();
981     }
982
983     /**
984      * Returns the same value as the <code>nextToken()</code> method, except that
985      * its declared return value is <code>Object</code> rather than
986      * <code>String</code>. It exists so that this class can implement the
987      * <code>Iterator</code> interface.
988      *
989      * @return the next token in the string.
990      * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
991      *
992      * @see java.util.Iterator
993      * @see #nextToken()
994      * @since ostermillerutils 1.00.00
995      */

996     public Object JavaDoc next(){
997         return nextToken();
998     }
999
1000    /**
1001     * This implementation always throws <code>UnsupportedOperationException</code>.
1002     * It exists so that this class can implement the <code>Iterator</code> interface.
1003     *
1004     * @throws UnsupportedOperationException always is thrown.
1005     *
1006     * @see java.util.Iterator
1007     * @since ostermillerutils 1.00.00
1008     */

1009    public void remove(){
1010        throw new UnsupportedOperationException JavaDoc();
1011    }
1012
1013    /**
1014     * Set whether empty tokens should be returned from this point in
1015     * in the tokenizing process onward.
1016     * <P>
1017     * Empty tokens occur when two delimiters are next to each other
1018     * or a delimiter occurs at the beginning or end of a string. If
1019     * empty tokens are set to be returned, and a comma is the non token
1020     * delimiter, the following table shows how many tokens are in each
1021     * string.<br>
1022     * <table><tr><th>String<th><th>Number of tokens<th></tr>
1023     * <tr><td align=right>"one,two"<td><td>2 - normal case with no empty tokens.<td></tr>
1024     * <tr><td align=right>"one,,three"<td><td>3 including the empty token in the middle.<td></tr>
1025     * <tr><td align=right>"one,"<td><td>2 including the empty token at the end.<td></tr>
1026     * <tr><td align=right>",two"<td><td>2 including the empty token at the beginning.<td></tr>
1027     * <tr><td align=right>","<td><td>2 including the empty tokens at the beginning and the ends.<td></tr>
1028     * <tr><td align=right>""<td><td>1 - all strings will have at least one token if empty tokens are returned.<td></tr></table>
1029     *
1030     * @param returnEmptyTokens true iff empty tokens should be returned.
1031     *
1032     * @since ostermillerutils 1.00.00
1033     */

1034    public void setReturnEmptyTokens(boolean returnEmptyTokens){
1035        // this could effect the number of tokens
1036
tokenCount = -1;
1037        this.returnEmptyTokens = returnEmptyTokens;
1038    }
1039
1040    /**
1041     * Get the the index of the character immediately
1042     * following the end of the last token. This is the position at which this tokenizer will begin looking
1043     * for the next token when a <code>nextToken()</code> method is invoked.
1044     *
1045     * @return the current position or -1 if the entire string has been tokenized.
1046     *
1047     * @since ostermillerutils 1.00.00
1048     */

1049    public int getCurrentPosition(){
1050        return this.position;
1051    }
1052
1053    /**
1054     * Retrieve all of the remaining tokens in a String array.
1055     * This method uses the options that are currently set for
1056     * the tokenizer and will advance the state of the tokenizer
1057     * such that <code>hasMoreTokens()</code> will return false.
1058     *
1059     * @return an array of tokens from this tokenizer.
1060     *
1061     * @since ostermillerutils 1.00.00
1062     */

1063    public String JavaDoc[] toArray(){
1064        String JavaDoc[] tokenArray = new String JavaDoc[countTokens()];
1065        for(int i=0; hasMoreTokens(); i++) {
1066            tokenArray[i] = nextToken();
1067        }
1068        return tokenArray;
1069    }
1070
1071    /**
1072     * Retrieves the rest of the text as a single token.
1073     * After calling this method hasMoreTokens() will always return false.
1074     *
1075     * @return any part of the text that has not yet been tokenized.
1076     *
1077     * @since ostermillerutils 1.00.00
1078     */

1079    public String JavaDoc restOfText(){
1080        return nextToken(null, null);
1081    }
1082
1083    /**
1084     * Returns the same value as nextToken() but does not alter
1085     * the internal state of the Tokenizer. Subsequent calls
1086     * to peek() or a call to nextToken() will return the same
1087     * token again.
1088     *
1089     * @return the next token from this string tokenizer.
1090     * @throws NoSuchElementException if there are no more tokens in this tokenizer's string.
1091     *
1092     * @since ostermillerutils 1.00.00
1093     */

1094    public String JavaDoc peek(){
1095        // copy over state variables from the class to local
1096
// variables so that the state of this object can be
1097
// restored to the state that it was in before this
1098
// method was called.
1099
int savedPosition = position;
1100        boolean savedEmptyReturned = emptyReturned;
1101        int savedtokenCount = tokenCount;
1102
1103        // get the next token
1104
String JavaDoc retval = nextToken();
1105
1106        // restore the state
1107
position = savedPosition;
1108        emptyReturned = savedEmptyReturned;
1109        tokenCount = savedtokenCount;
1110
1111        // return the nextToken;
1112
return(retval);
1113    }
1114}
1115
Popular Tags