KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > appserv > management > util > misc > TokenizerImpl


1 /*
2  * The contents of this file are subject to the terms
3  * of the Common Development and Distribution License
4  * (the License). You may not use this file except in
5  * compliance with the License.
6  *
7  * You can obtain a copy of the license at
8  * https://glassfish.dev.java.net/public/CDDLv1.0.html or
9  * glassfish/bootstrap/legal/CDDLv1.0.txt.
10  * See the License for the specific language governing
11  * permissions and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL
14  * Header Notice in each file and include the License file
15  * at glassfish/bootstrap/legal/CDDLv1.0.txt.
16  * If applicable, add the following below the CDDL Header,
17  * with the fields enclosed by brackets [] replaced by
18  * you own identifying information:
19  * "Portions Copyrighted [year] [name of copyright owner]"
20  *
21  * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
22  */

23 package com.sun.appserv.management.util.misc;
24
25 import java.text.StringCharacterIterator JavaDoc;
26 import java.util.List JavaDoc;
27 import java.util.ArrayList JavaDoc;
28 import java.util.Iterator JavaDoc;
29 import java.util.Arrays JavaDoc;
30
31
32 class IllegalEscapeSequenceException extends TokenizerException
33 {
34     static final long serialVersionUID = 6579038898242625567L;
35     public IllegalEscapeSequenceException( String JavaDoc msg ) { super( msg ); }
36 }
37
38 final class UnterminatedLiteralStringException extends TokenizerException
39 {
40     static final long serialVersionUID = -1327166469948605347L;
41     public UnterminatedLiteralStringException( String JavaDoc msg ) { super( msg ); }
42 }
43
44 final class MalformedUnicodeSequenceException extends IllegalEscapeSequenceException
45 {
46     static final long serialVersionUID = 6604956430084180525L;
47     public MalformedUnicodeSequenceException( String JavaDoc msg ) { super( msg ); }
48 }
49
50 /**
51  */

52 public final class TokenizerImpl implements Tokenizer
53 {
54     final String JavaDoc[] mTokens;
55     
56         public
57     TokenizerImpl( String JavaDoc input )
58         throws TokenizerException
59     {
60         this( input, new TokenizerParams() );
61     }
62     
63     private static final char QUOTE_CHAR = '\"';
64     
65         public
66     TokenizerImpl(
67         String JavaDoc input,
68         TokenizerParams params )
69         throws TokenizerException
70     {
71         final TokenizerInternal worker = new TokenizerInternal( input, params );
72     
73         List JavaDoc<Object JavaDoc> allTokens = worker.parseTokens( );
74
75         if ( params.mMultipleDelimsCountAsOne )
76         {
77             allTokens = removeMultipleDelims( allTokens );
78         }
79         
80         mTokens = interpretTokenList( allTokens );
81     }
82     
83         final static List JavaDoc<Object JavaDoc>
84     removeMultipleDelims( List JavaDoc<Object JavaDoc> list )
85     {
86         final List JavaDoc<Object JavaDoc> resultList = new ArrayList JavaDoc<Object JavaDoc>();
87         
88         boolean lastWasDelim = false;
89         for( final Object JavaDoc value : list )
90         {
91             if ( value instanceof String JavaDoc )
92             {
93                 resultList.add( value );
94                 lastWasDelim = false;
95             }
96             else if ( ! lastWasDelim )
97             {
98                 // add the delimiter
99
resultList.add( value );
100                 lastWasDelim = true;
101             }
102         }
103         
104         return( resultList );
105     }
106     
107     /**
108         Interpret the parsed token list, which consists of a series of strings
109         and tokens. We need to handle the special cases where the list starts
110         with a delimiter and/or ends with a delimiter. Examples:
111         
112         "" => {}
113         "." => { "", "" }
114         "..." => { "", "", "", "" }
115         "x." => { "x", "" }
116         ".x" => { "", "x" }
117         "y.x" => { "y", "x" }
118      */

119         static String JavaDoc[]
120     interpretTokenList( List JavaDoc<Object JavaDoc> list )
121     {
122         final List JavaDoc<String JavaDoc> resultList = new ArrayList JavaDoc<String JavaDoc>();
123
124         boolean lastWasDelim = true;
125
126         for( final Object JavaDoc value : list )
127         {
128             if ( value instanceof String JavaDoc )
129             {
130                 resultList.add( (String JavaDoc)value );
131                 lastWasDelim = false;
132             }
133             else
134             {
135                 if ( lastWasDelim )
136                 {
137                     // this one's a delimiter, and so was the last one
138
// insert the implicit empty string
139
resultList.add( "" );
140                 }
141                 else
142                 {
143                     lastWasDelim = true;
144                 }
145             }
146         }
147         
148         // a trailing delimiter implies an empty string after it
149
if ( lastWasDelim && list.size() != 0 )
150         {
151             resultList.add( "" );
152         }
153         
154         return( (String JavaDoc[])resultList.toArray( new String JavaDoc[ resultList.size() ] ) );
155     }
156     
157         public String JavaDoc []
158     getTokens()
159     {
160         return( mTokens );
161     }
162 }
163
164
165
166 final class TokenizerInternal
167 {
168     final String JavaDoc mInput;
169     final TokenizerParams mParams;
170     final StringCharacterIterator JavaDoc mIter;
171     
172     // a distinct object used to denote a delimiter
173
private static final class Delim
174     {
175         private Delim() {}
176         public static Delim getInstance() { return( new Delim() ); }
177         public String JavaDoc toString() { return( "<DELIM>" ); }
178     }
179     final static Delim DELIM = Delim.getInstance();
180     
181         
182     TokenizerInternal(
183         String JavaDoc input,
184         TokenizerParams params )
185     {
186         mInput = input;
187         mParams = params;
188         mIter = new StringCharacterIterator JavaDoc( input );
189     }
190     
191         private static boolean
192     isSpecialEscapeChar( char theChar )
193     {
194         // carriage return or newline
195
return( theChar == 'n' || theChar == 'r' || theChar == 't' ||theChar == QUOTE_CHAR );
196     }
197     
198         private boolean
199     isCallerProvidedEscapableChar( char theChar )
200     {
201         return( mParams.mEscapableChars.indexOf( theChar ) >= 0 ||
202             theChar == mParams.mEscapeChar );
203     }
204     
205         private boolean
206     isEscapableChar( char theChar )
207     {
208         return( isCallerProvidedEscapableChar( theChar ) || isSpecialEscapeChar( theChar ) );
209     }
210     
211         private boolean
212     isDelim( String JavaDoc delims, char theChar )
213     {
214         return( delims.indexOf( theChar ) >= 0 || theChar == mIter.DONE );
215     }
216     
217         private static boolean
218     isDigit( char theChar )
219     {
220         return( (theChar >= '0' && theChar <= '9') );
221     }
222     
223
224         private static boolean
225     isHexDigit( char theChar )
226     {
227         return( isDigit( theChar ) || (theChar >= 'a' && theChar <= 'f') || isUpper( theChar ) );
228     }
229     
230         private static boolean
231     isUpper( char c )
232     {
233         return( (c >= 'A' && c <= 'F') );
234     }
235     
236         private boolean
237     hasMoreChars()
238     {
239         return( mIter.current() != mIter.DONE );
240     }
241     
242         private int
243     getIndex()
244     {
245         return( mIter.getIndex() );
246     }
247     
248         private char
249     setIndex( int index )
250     {
251         return( mIter.setIndex( index ) );
252     }
253
254         private char
255     nextChar()
256     {
257         final char theChar = mIter.current();
258         mIter.next();
259         
260         return( theChar );
261     }
262     
263     private static final char QUOTE_CHAR = '\"';
264     private static final char TAB_CHAR = '\t';
265     
266         private char
267     decodeUnicodeSequence()
268         throws MalformedUnicodeSequenceException
269     {
270         int value = 0;
271         
272         try
273         {
274             for( int i = 0; i < 4; ++i )
275             {
276                 value = (value << 4 ) | hexValue( nextChar() );
277             }
278         }
279         catch( Exception JavaDoc e )
280         {
281             throw new MalformedUnicodeSequenceException( "" );
282         }
283         
284         return( (char)value );
285     }
286     
287         private static int
288     hexValue( char c )
289     {
290         if ( ! isHexDigit( c ) )
291         {
292             throw new IllegalArgumentException JavaDoc();
293         }
294         
295         int value = 0;
296
297         if ( isDigit( c ) )
298         {
299             value = (int)c - (int)'0';
300         }
301         else if ( isUpper( c ) )
302         {
303             value = (int)c - (int)'A';
304         }
305         else
306         {
307             value = (int)c - (int)'a';
308         }
309         return( value );
310     }
311     
312         private char
313     getEscapedChar( final char inputChar )
314         throws MalformedUnicodeSequenceException,IllegalEscapeSequenceException
315     {
316         char outChar = 0;
317         
318         if ( isCallerProvidedEscapableChar( inputChar ) )
319         {
320             outChar = inputChar;
321         }
322         else
323         {
324             switch( inputChar )
325             {
326                 default: throw new IllegalEscapeSequenceException( "" + inputChar );
327                 case 'n': outChar = '\n'; break;
328                 case 'r': outChar = '\r'; break;
329                 case 't': outChar = '\t'; break;
330                 case QUOTE_CHAR: outChar = QUOTE_CHAR; break;
331                 case 'u': outChar = decodeUnicodeSequence(); break;
332             }
333         }
334         
335         return( outChar );
336     }
337     
338     
339         private String JavaDoc
340     processEscapeSequence()
341     {
342         // index of the character following the escape character
343
String JavaDoc s = null;
344         
345         final char theChar = nextChar();
346         final int continuePos = mIter.getIndex();
347         try
348         {
349             s = "" + getEscapedChar( theChar );
350         }
351         catch( TokenizerException e )
352         {
353             // emit the escape character and the character following it]
354
// literally, then proceed.
355
s = mParams.mEscapeChar + "" + theChar;
356             mIter.setIndex( continuePos );
357         }
358         
359         return( s );
360     }
361     
362         ArrayList JavaDoc<Object JavaDoc>
363     parseTokens( )
364         throws UnterminatedLiteralStringException,
365             MalformedUnicodeSequenceException, IllegalEscapeSequenceException
366     {
367         final StringBuffer JavaDoc tok = new StringBuffer JavaDoc();
368         final ArrayList JavaDoc<Object JavaDoc> tokens = new ArrayList JavaDoc<Object JavaDoc>();
369         boolean insideStringLiteral = false;
370         
371         /**
372             Escape sequences are always processed regardless of whether we're inside a
373             quoted string or not. A quote string really only alters whether delimiters
374             are treated as literal characters, or not.
375          */

376         while ( hasMoreChars() )
377         {
378             final char theChar = nextChar();
379             
380             if ( theChar == mParams.mEscapeChar )
381             {
382                 if ( mParams.mEmitInvalidEscapeSequencesLiterally )
383                 {
384                     tok.append( processEscapeSequence() );
385                 }
386                 else
387                 {
388                     tok.append( getEscapedChar( nextChar() ) );
389                 }
390             }
391             else if ( theChar == Tokenizer.LITERAL_STRING_DELIM )
392             {
393                 // special cases of "", """", """""", etc require forcing an empty string out
394
// these case have no delimiter or regular characters to cause a string to
395
// be emitted
396
if ( insideStringLiteral && tok.length() == 0 && tokens.size() == 0)
397                 {
398                     tokens.add( "" );
399                 }
400                 
401                 insideStringLiteral = ! insideStringLiteral;
402             }
403             else if ( insideStringLiteral )
404             {
405                 tok.append( theChar );
406             }
407             else if ( isDelim( mParams.mDelimiters, theChar ) )
408             {
409                 // we've hit a delimiter...if characters have accumulated, spit them out
410
// then spit out the delimiter token.
411
if ( tok.length() != 0 )
412                 {
413                     tokens.add( tok.toString() );
414                     tok.setLength( 0 );
415                 }
416                 tokens.add( DELIM );
417             }
418             else
419             {
420                 tok.append( theChar );
421             }
422         }
423         
424         if ( tok.length() != 0 )
425         {
426             tokens.add( tok.toString() );
427         }
428         
429         if ( insideStringLiteral )
430         {
431             throw new UnterminatedLiteralStringException( tok.toString() );
432         }
433         
434         return( tokens );
435     }
436 }
437
438
Popular Tags