KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > enterprise > admin > util > TokenizerImpl


1 /*
2  * The contents of this file are subject to the terms
3  * of the Common Development and Distribution License
4  * (the License). You may not use this file except in
5  * compliance with the License.
6  *
7  * You can obtain a copy of the license at
8  * https://glassfish.dev.java.net/public/CDDLv1.0.html or
9  * glassfish/bootstrap/legal/CDDLv1.0.txt.
10  * See the License for the specific language governing
11  * permissions and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL
14  * Header Notice in each file and include the License file
15  * at glassfish/bootstrap/legal/CDDLv1.0.txt.
16  * If applicable, add the following below the CDDL Header,
17  * with the fields enclosed by brackets [] replaced by
18  * you own identifying information:
19  * "Portions Copyrighted [year] [name of copyright owner]"
20  *
21  * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
22  */

23
24 /*
25  * Copyright 2004-2005 Sun Microsystems, Inc. All rights reserved.
26  * Use is subject to license terms.
27  */

28  
29 /*
30  * $Header: /cvs/glassfish/admin-core/util/src/java/com/sun/enterprise/admin/util/TokenizerImpl.java,v 1.2 2005/12/25 03:53:15 tcfujii Exp $
31  * $Revision: 1.2 $
32  * $Date: 2005/12/25 03:53:15 $
33  */

34
35 package com.sun.enterprise.admin.util;
36
37 import java.text.StringCharacterIterator JavaDoc;
38 import java.util.ArrayList JavaDoc;
39 import java.util.Iterator JavaDoc;
40 import java.util.Arrays JavaDoc;
41
42
43 class IllegalEscapeSequenceException extends TokenizerException
44 {
45     public IllegalEscapeSequenceException( String JavaDoc msg ) { super( msg ); }
46 }
47
48 class UnterminatedLiteralStringException extends TokenizerException
49 {
50     public UnterminatedLiteralStringException( String JavaDoc msg ) { super( msg ); }
51 }
52
53 class MalformedUnicodeSequenceException extends IllegalEscapeSequenceException
54 {
55     public MalformedUnicodeSequenceException( String JavaDoc msg ) { super( msg ); }
56 }
57
58 /**
59  */

60 public final class TokenizerImpl implements Tokenizer
61 {
62     final String JavaDoc[] mTokens;
63     
64         public
65     TokenizerImpl(
66         String JavaDoc input,
67         String JavaDoc delimiters,
68         char escapeChar,
69         String JavaDoc escapableChars)
70         throws TokenizerException
71     {
72         this( input, delimiters, true, escapeChar, escapableChars );
73     }
74     
75     private static final char QUOTE_CHAR = '\"';
76     
77         public
78     TokenizerImpl(
79         String JavaDoc input,
80         String JavaDoc delimiters,
81         boolean multipleDelimsCountAsOne,
82         char escapeChar,
83         String JavaDoc escapableChars)
84         throws TokenizerException
85     {
86         final TokenizerInternal worker =
87             new TokenizerInternal( input, delimiters, escapeChar, escapableChars );
88     
89         ArrayList JavaDoc allTokens = worker.parseTokens( );
90
91         if ( multipleDelimsCountAsOne )
92         {
93             allTokens = removeMultipleDelims( allTokens );
94         }
95         
96         mTokens = interpretTokenList( allTokens );
97     }
98     
99         final static ArrayList JavaDoc
100     removeMultipleDelims( ArrayList JavaDoc list )
101     {
102         final ArrayList JavaDoc resultList = new ArrayList JavaDoc();
103         
104         boolean lastWasDelim = false;
105         final Iterator JavaDoc iter = list.iterator();
106         while ( iter.hasNext() )
107         {
108             final Object JavaDoc value = iter.next();
109             
110             if ( value instanceof String JavaDoc )
111             {
112                 resultList.add( value );
113                 lastWasDelim = false;
114             }
115             else if ( ! lastWasDelim )
116             {
117                 // add the delimiter
118
resultList.add( value );
119                 lastWasDelim = true;
120             }
121         }
122         
123         return( resultList );
124     }
125     
126     /**
127         Interpret the parsed token list, which consists of a series of strings
128         and tokens. We need to handle the special cases where the list starts
129         with a delimiter and/or ends with a delimiter. Examples:
130         
131         "" => {}
132         "." => { "", "" }
133         "..." => { "", "", "", "" }
134         "x." => { "x", "" }
135         ".x" => { "", "x" }
136         "y.x" => { "y", "x" }
137      */

138         static String JavaDoc[]
139     interpretTokenList( ArrayList JavaDoc list )
140     {
141         final ArrayList JavaDoc resultList = new ArrayList JavaDoc();
142
143         boolean lastWasDelim = true;
144
145         final Iterator JavaDoc iter = list.iterator();
146         while ( iter.hasNext() )
147         {
148             final Object JavaDoc value = iter.next();
149             if ( value instanceof String JavaDoc )
150             {
151                 resultList.add( value );
152                 lastWasDelim = false;
153             }
154             else
155             {
156                 if ( lastWasDelim )
157                 {
158                     // this one's a delimiter, and so was the last one
159
// insert the implicit empty string
160
resultList.add( "" );
161                 }
162                 else
163                 {
164                     lastWasDelim = true;
165                 }
166             }
167         }
168         
169         // a trailing delimiter implies an empty string after it
170
if ( lastWasDelim && list.size() != 0 )
171         {
172             resultList.add( "" );
173         }
174         
175         return( (String JavaDoc[])resultList.toArray( new String JavaDoc[ resultList.size() ] ) );
176     }
177     
178         public String JavaDoc []
179     getTokens()
180     {
181         return( mTokens );
182     }
183 }
184
185
186
187 final class TokenizerInternal
188 {
189     final String JavaDoc mInput;
190     final String JavaDoc mDelimiters;
191     final char mEscapeChar;
192     final String JavaDoc mEscapableChars;
193     final StringCharacterIterator JavaDoc mIter;
194     
195     // a distinct object used to denote a delimiter
196
private static class Delim
197     {
198         private Delim() {}
199         public static Delim getInstance() { return( new Delim() ); }
200         public String JavaDoc toString() { return( "<DELIM>" ); }
201     }
202     final static Delim DELIM = Delim.getInstance();
203     
204         public
205     TokenizerInternal(
206         String JavaDoc input,
207         String JavaDoc delimiters,
208         char escapeChar,
209         String JavaDoc escapableChars)
210     {
211         mInput = input;
212         mDelimiters = delimiters;
213         mEscapeChar = escapeChar;
214         mEscapableChars = escapableChars;
215         mIter = new StringCharacterIterator JavaDoc( input );
216     }
217     
218         static boolean
219     isSpecialEscapeChar( char theChar )
220     {
221         // carriage return or newline
222
return( theChar == 'n' || theChar == 'r' || theChar == 't' ||theChar == QUOTE_CHAR );
223     }
224     
225         boolean
226     isCallerProvidedEscapableChar( char theChar )
227     {
228         return( mEscapableChars.indexOf( theChar ) >= 0 || theChar == mEscapeChar );
229     }
230     
231         boolean
232     isEscapableChar( char theChar )
233     {
234         return( isCallerProvidedEscapableChar( theChar ) || isSpecialEscapeChar( theChar ) );
235     }
236     
237         boolean
238     isDelim( String JavaDoc delims, char theChar )
239     {
240         return( delims.indexOf( theChar ) >= 0 || theChar == mIter.DONE );
241     }
242     
243         static boolean
244     isDigit( char theChar )
245     {
246         return( (theChar >= '0' && theChar <= '9') );
247     }
248     
249
250         static boolean
251     isHexDigit( char theChar )
252     {
253         return( isDigit( theChar ) || (theChar >= 'a' && theChar <= 'f') || isUpper( theChar ) );
254     }
255     
256         static boolean
257     isUpper( char c )
258     {
259         return( (c >= 'A' && c <= 'F') );
260     }
261     
262         boolean
263     hasMoreChars()
264     {
265         return( mIter.current() != mIter.DONE );
266     }
267
268         char
269     nextChar()
270     {
271         final char theChar = mIter.current();
272         mIter.next();
273         
274         return( theChar );
275     }
276     
277     private static final char QUOTE_CHAR = '\"';
278     private static final char TAB_CHAR = '\t';
279     
280         char
281     decodeUnicodeSequence()
282         throws MalformedUnicodeSequenceException
283     {
284         int value = 0;
285         
286         try
287         {
288             for( int i = 0; i < 4; ++i )
289             {
290                 value = (value << 4 ) | hexValue( nextChar() );
291             }
292         }
293         catch( Exception JavaDoc e )
294         {
295             throw new MalformedUnicodeSequenceException( "" );
296         }
297         
298         return( (char)value );
299     }
300     
301         static int
302     hexValue( char c )
303     {
304         if ( ! isHexDigit( c ) )
305         {
306             throw new IllegalArgumentException JavaDoc();
307         }
308         
309         int value = 0;
310
311         if ( isDigit( c ) )
312         {
313             value = (int)c - (int)'0';
314         }
315         else if ( isUpper( c ) )
316         {
317             value = (int)c - (int)'A';
318         }
319         else
320         {
321             value = (int)c - (int)'a';
322         }
323         return( value );
324     }
325     
326         char
327     getEscapedChar( final char inputChar )
328         throws MalformedUnicodeSequenceException,IllegalEscapeSequenceException
329     {
330         char outChar = 0;
331         
332         if ( isCallerProvidedEscapableChar( inputChar ) )
333         {
334             outChar = inputChar;
335         }
336         else
337         {
338             switch( inputChar )
339             {
340                 default: throw new IllegalEscapeSequenceException( "" + inputChar );
341                 case 'n': outChar = '\n'; break;
342                 case 'r': outChar = '\r'; break;
343                 case 't': outChar = '\t'; break;
344                 case QUOTE_CHAR: outChar = QUOTE_CHAR; break;
345                 case 'u': outChar = decodeUnicodeSequence(); break;
346             }
347         }
348         
349         return( outChar );
350     }
351     
352
353     
354         ArrayList JavaDoc
355     parseTokens( )
356         throws UnterminatedLiteralStringException,
357             MalformedUnicodeSequenceException, IllegalEscapeSequenceException
358     {
359         final StringBuffer JavaDoc tok = new StringBuffer JavaDoc();
360         final ArrayList JavaDoc tokens = new ArrayList JavaDoc();
361         boolean insideStringLiteral = false;
362         
363         /**
364             Escape sequences are always processed regardless of whether we're inside a
365             quoted string or not. A quote string really only alters whether delimiters
366             are treated as literal characters, or not.
367          */

368         while ( hasMoreChars() )
369         {
370             final char theChar = nextChar();
371             
372             if ( theChar == mEscapeChar )
373             {
374                 tok.append( getEscapedChar( nextChar() ) );
375             }
376             else if ( theChar == Tokenizer.LITERAL_STRING_DELIM )
377             {
378                 // special cases of "", """", """""", etc require forcing an empty string out
379
// these case have no delimiter or regular characters to cause a string to
380
// be emitted
381
if ( insideStringLiteral && tok.length() == 0 && tokens.size() == 0)
382                 {
383                     tokens.add( "" );
384                 }
385                 
386                 insideStringLiteral = ! insideStringLiteral;
387             }
388             else if ( insideStringLiteral )
389             {
390                 tok.append( theChar );
391             }
392             else if ( isDelim( mDelimiters, theChar ) )
393             {
394                 // we've hit a delimiter...if characters have accumulated, spit them out
395
// then spit out the delimiter token.
396
if ( tok.length() != 0 )
397                 {
398                     tokens.add( tok.toString() );
399                     tok.setLength( 0 );
400                 }
401                 tokens.add( DELIM );
402             }
403             else
404             {
405                 tok.append( theChar );
406             }
407         }
408         
409         if ( tok.length() != 0 )
410         {
411             tokens.add( tok.toString() );
412         }
413         
414         if ( insideStringLiteral )
415         {
416             throw new UnterminatedLiteralStringException( tok.toString() );
417         }
418         
419         return( tokens );
420     }
421 }
422
423
Popular Tags