KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > cli > util > TokenizerImpl


1 /*
2  * The contents of this file are subject to the terms
3  * of the Common Development and Distribution License
4  * (the License). You may not use this file except in
5  * compliance with the License.
6  *
7  * You can obtain a copy of the license at
8  * https://glassfish.dev.java.net/public/CDDLv1.0.html or
9  * glassfish/bootstrap/legal/CDDLv1.0.txt.
10  * See the License for the specific language governing
11  * permissions and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL
14  * Header Notice in each file and include the License file
15  * at glassfish/bootstrap/legal/CDDLv1.0.txt.
16  * If applicable, add the following below the CDDL Header,
17  * with the fields enclosed by brackets [] replaced by
18  * you own identifying information:
19  * "Portions Copyrighted [year] [name of copyright owner]"
20  *
21  * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
22  */

23  
24 /*
25  * $Header: /cvs/glassfish/admin-cli/cli-api/src/java/com/sun/cli/util/TokenizerImpl.java,v 1.3 2005/12/25 03:46:00 tcfujii Exp $
26  * $Revision: 1.3 $
27  * $Date: 2005/12/25 03:46:00 $
28  */

29
30 package com.sun.cli.util;
31
32 import java.text.StringCharacterIterator JavaDoc;
33 import java.util.ArrayList JavaDoc;
34 import java.util.Iterator JavaDoc;
35 import java.util.Arrays JavaDoc;
36
37
38 public final class TokenizerImpl implements Tokenizer
39 {
40     final String JavaDoc [] mTokens;
41     
42         public
43     TokenizerImpl(
44         String JavaDoc input,
45         String JavaDoc delimiters,
46         char escapeChar,
47         String JavaDoc escapableChars)
48     {
49         this( input, delimiters, true, escapeChar, escapableChars );
50     }
51     
52         public
53     TokenizerImpl(
54         String JavaDoc input,
55         String JavaDoc delimiters,
56         boolean multipleDelimsCountAsOne,
57         char escapeChar,
58         String JavaDoc escapableChars)
59     {
60         final TokenizerInternal worker = new TokenizerInternal( input, delimiters,
61             multipleDelimsCountAsOne, escapeChar, escapableChars );
62         
63         mTokens = worker.parseTokens();
64     }
65     
66         public String JavaDoc []
67     getTokens()
68     {
69         return( mTokens );
70     }
71     
72         public Iterator JavaDoc
73     iterator()
74     {
75         return( Arrays.asList( getTokens() ).listIterator() );
76     }
77 }
78
79
80
81 final class TokenizerInternal
82 {
83     final String JavaDoc mInput;
84     final String JavaDoc mDelimiters;
85     final boolean mMultipleDelimsCountAsOne;
86     final char mEscapeChar;
87     final String JavaDoc mEscapableChars;
88     final StringCharacterIterator JavaDoc mIter;
89     
90     
91     
92         private static void
93     dm( Object JavaDoc msg )
94     {
95         System.out.println( msg.toString() );
96     }
97     
98         public
99     TokenizerInternal(
100         String JavaDoc input,
101         String JavaDoc delimiters,
102         char escapeChar,
103         String JavaDoc escapableChars)
104     {
105         this( input, delimiters, true, escapeChar, escapableChars );
106     }
107     
108         public
109     TokenizerInternal(
110         String JavaDoc input,
111         String JavaDoc delimiters,
112         boolean multipleDelimsCountAsOne,
113         char escapeChar,
114         String JavaDoc escapableChars)
115     {
116         checkEscapableChars( escapableChars );
117         
118         mInput = input;
119         mDelimiters = delimiters;
120         mMultipleDelimsCountAsOne = multipleDelimsCountAsOne;
121         mEscapeChar = escapeChar;
122         mEscapableChars = escapableChars;
123         mIter = new StringCharacterIterator JavaDoc( input );
124     }
125     
126     /*
127         Some characters may not be escaped, hex digits in particular
128         because they can also be used as character codes.
129      */

130         private static void
131     checkEscapableChars( final String JavaDoc escapableChars )
132     {
133         final int length = escapableChars.length();
134         
135         for( int i = 0; i < length; ++i )
136         {
137             if ( isHexDigit( escapableChars.charAt( i ) ) )
138             {
139                 throw new IllegalArgumentException JavaDoc();
140             }
141         }
142     }
143     
144         String JavaDoc []
145     parseTokens()
146     {
147         final ArrayList JavaDoc list = new ArrayList JavaDoc();
148         
149         while ( hasMoreChars() )
150         {
151             list.add( nextToken() );
152         }
153         
154         // if we had a trailing delimiter, there is an empty token following it
155
// that normal parsing will not produce, since the end of the input
156
// has been reached.
157
final int inputLength = mInput.length();
158         if ( inputLength != 0 &&
159             isDelim( mDelimiters, mInput.charAt( inputLength -1 ) ) )
160         {
161             list.add( "" );
162         }
163         
164         final String JavaDoc [] tokens = new String JavaDoc[ list.size() ];
165         
166         return( (String JavaDoc []) list.toArray( tokens ) );
167     }
168     
169         boolean
170     isSpecialEscapeChar( char theChar )
171     {
172         // carriage return or newline
173
return( theChar == 'n' || theChar == 'r' );
174     }
175     
176         boolean
177     isCallerProvidedEscapeChar( char theChar )
178     {
179         return( mEscapableChars.indexOf( theChar ) >= 0 || theChar == mEscapeChar );
180     }
181     
182         boolean
183     isEscapableChar( char theChar )
184     {
185         return( isCallerProvidedEscapeChar( theChar ) || isSpecialEscapeChar( theChar ) );
186     }
187     
188         boolean
189     isDelim( String JavaDoc delims, char theChar )
190     {
191         return( delims != null &&
192             delims.indexOf( theChar ) >= 0 || theChar == mIter.DONE );
193     }
194     
195         static boolean
196     isDigit( char theChar )
197     {
198         return( (theChar >= '0' && theChar <= '9') );
199     }
200     
201
202         static boolean
203     isHexDigit( char theChar )
204     {
205         return( isDigit( theChar ) || (theChar >= 'a' && theChar <= 'f') ||
206             (theChar >= 'A' && theChar <= 'F') );
207     }
208     
209         boolean
210     hasMoreChars()
211     {
212         return( mIter.current() != mIter.DONE );
213     }
214     
215         char
216     peekNextChar()
217     {
218         return( mIter.current() );
219     }
220
221         char
222     nextChar()
223     {
224         final char theChar = mIter.current();
225         mIter.next();
226         
227         return( theChar );
228     }
229     
230         void
231     skipDelim( String JavaDoc delimiters )
232     {
233         while ( hasMoreChars() && isDelim( delimiters, peekNextChar() ) )
234         {
235             nextChar(); // skip it
236
}
237     }
238     
239         char
240     getEscapedChar( final char inputChar )
241     {
242         char outChar = 0;
243         
244         if ( isCallerProvidedEscapeChar( inputChar ) )
245         {
246             outChar = inputChar;
247         }
248         else
249         {
250             if ( inputChar == 'n' )
251             {
252                 outChar = '\n';
253             }
254             else if ( inputChar == 'r' )
255             {
256                 outChar = '\r';
257             }
258             else
259             {
260                 assert( false );
261             }
262         }
263         
264         return( outChar );
265     }
266     
267         char
268     handleEscapeChar()
269     {
270         char resultChar = mEscapeChar;
271         
272         // retain starting position in case it's not a real escaped char
273
final int curIndex = mIter.getIndex();
274         boolean valid = false;
275         
276         final char nextChar = nextChar();
277         if ( isEscapableChar( nextChar ) )
278         {
279             resultChar = getEscapedChar( nextChar );
280             valid = true;
281         }
282         else
283         {
284             // if valid hexadecimal, convert two hex digits to a number
285
if ( isHexDigit( nextChar ) )
286             {
287                 final char nextNextChar = nextChar();
288                 if ( isHexDigit( nextNextChar ) )
289                 {
290                     final int newChar = (((int)nextChar) << 4) + (int)nextNextChar;
291                     resultChar = (char)newChar;
292                     valid = true;
293                 }
294             }
295         }
296         
297         if ( ! valid )
298         {
299             assert( resultChar == mEscapeChar );
300             mIter.setIndex( curIndex );
301         }
302         
303         return( resultChar );
304     }
305     
306         String JavaDoc
307     parseLiteralString( String JavaDoc delimiters )
308     {
309         // must start with the string delimiter
310
assert( peekNextChar() == Tokenizer.LITERAL_STRING_DELIM );
311         nextChar(); // skip it
312

313         // did we find a trailing end-of-string delimiter?
314
boolean foundEndDelim = false;
315         
316         // escaping still in force, but delimiters are defeated until the string delim
317
// is reached.
318
StringBuffer JavaDoc tok = new StringBuffer JavaDoc();
319     
320         while ( hasMoreChars() )
321         {
322             final char theChar = nextChar();
323             
324             if ( theChar == mEscapeChar )
325             {
326                 final char escapedChar = handleEscapeChar();
327                 
328                 tok.append( escapedChar );
329             }
330             else if ( theChar == Tokenizer.LITERAL_STRING_DELIM )
331             {
332                 // end of the literal string if there are no more chars or the next char
333
// is a delimter
334
if ( ! hasMoreChars() )
335                 {
336                     foundEndDelim = true;
337                 }
338                 else if ( isDelim( delimiters, peekNextChar() ) )
339                 {
340                     foundEndDelim = true;
341                     nextChar();
342                 }
343                 break;
344             }
345             else
346             {
347                 tok.append( theChar );
348             }
349         }
350         
351         if ( ! foundEndDelim )
352         {
353             // if we didn't find an ending delimter, treat the start one as a literal
354
return( '\"' + tok.toString() );
355         }
356         
357         return( tok.toString() );
358     }
359
360         String JavaDoc
361     parseToken( String JavaDoc delimiters )
362     {
363         final char escapeChar = mEscapeChar;
364         StringBuffer JavaDoc tok = new StringBuffer JavaDoc();
365     
366         while ( hasMoreChars() )
367         {
368             final char theChar = nextChar();
369             
370             if ( isDelim( delimiters, theChar ) )
371             {
372                 break;
373             }
374             
375             if ( theChar == escapeChar )
376             {
377                 final char escapedChar = handleEscapeChar();
378                 
379                 tok.append( escapedChar );
380             }
381             else
382             {
383                 tok.append( theChar );
384             }
385         }
386         
387         return( tok.toString() );
388     }
389     
390         String JavaDoc
391     nextToken( )
392     {
393         if ( ! hasMoreChars() )
394         {
395             throw new IllegalArgumentException JavaDoc( "no more tokens available" );
396         }
397         
398         String JavaDoc tok = null;
399         
400         if ( peekNextChar() == Tokenizer.LITERAL_STRING_DELIM )
401         {
402             tok = parseLiteralString( mDelimiters );
403         }
404         else
405         {
406             tok = parseToken( mDelimiters );
407         }
408         // a single delimiter following the token as been consumed
409

410         if ( mMultipleDelimsCountAsOne )
411         {
412             skipDelim( mDelimiters );
413         }
414         
415         return( tok );
416     }
417 }
418
419
Popular Tags