TokenizerImpl


1   /*
2    * The contents of this file are subject to the terms 
3    * of the Common Development and Distribution License 
4    * (the License).  You may not use this file except in
5    * compliance with the License.
6    * 
7    * You can obtain a copy of the license at 
8    * https://glassfish.dev.java.net/public/CDDLv1.0.html or
9    * glassfish/bootstrap/legal/CDDLv1.0.txt.
10   * See the License for the specific language governing 
11   * permissions and limitations under the License.
12   * 
13   * When distributing Covered Code, include this CDDL 
14   * Header Notice in each file and include the License file 
15   * at glassfish/bootstrap/legal/CDDLv1.0.txt.  
16   * If applicable, add the following below the CDDL Header, 
17   * with the fields enclosed by brackets [] replaced by
18   * you own identifying information: 
19   * "Portions Copyrighted [year] [name of copyright owner]"
20   * 
21   * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
22   */
23  
24  /*
25   * Copyright 2004-2005 Sun Microsystems, Inc.  All rights reserved.
26   * Use is subject to license terms.
27   */
28   
29  /*
30   * $Header: /cvs/glassfish/admin-core/util/src/java/com/sun/enterprise/admin/util/TokenizerImpl.java,v 1.2 2005/12/25 03:53:15 tcfujii Exp $
31   * $Revision: 1.2 $
32   * $Date: 2005/12/25 03:53:15 $
33   */
34  
35  package com.sun.enterprise.admin.util;
36  
37  import java.text.StringCharacterIterator  ;
38  import java.util.ArrayList  ;
39  import java.util.Iterator  ;
40  import java.util.Arrays  ;
41  
42  
43  class IllegalEscapeSequenceException extends TokenizerException
44  {
45      public  IllegalEscapeSequenceException( String   msg )    { super( msg ); }
46  }
47  
48  class UnterminatedLiteralStringException extends TokenizerException
49  {
50      public  UnterminatedLiteralStringException( String   msg )    { super( msg ); }
51  }
52  
53  class MalformedUnicodeSequenceException extends IllegalEscapeSequenceException
54  {
55      public  MalformedUnicodeSequenceException( String   msg ) { super( msg ); }
56  }
57  
58  /**
59   */
60  public final class TokenizerImpl implements Tokenizer
61  {
62      final String  []      mTokens;
63      
64          public
65      TokenizerImpl(
66          String        input,
67          String        delimiters,
68          char        escapeChar,
69          String        escapableChars)
70          throws TokenizerException
71      {
72          this( input, delimiters, true, escapeChar, escapableChars );
73      }
74      
75      private static final char   QUOTE_CHAR  = '\"';
76      
77          public
78      TokenizerImpl(
79          String        input,
80          String        delimiters,
81          boolean     multipleDelimsCountAsOne,
82          char        escapeChar,
83          String        escapableChars)
84          throws TokenizerException
85      {
86          final TokenizerInternal worker =
87              new TokenizerInternal( input, delimiters, escapeChar, escapableChars );
88      
89          ArrayList     allTokens   = worker.parseTokens( );
90  
91          if ( multipleDelimsCountAsOne )
92          {
93              allTokens   = removeMultipleDelims( allTokens );
94          }
95          
96          mTokens = interpretTokenList( allTokens );
97      }
98      
99          final static ArrayList  
100     removeMultipleDelims( ArrayList   list )
101     {
102         final ArrayList       resultList  = new ArrayList  ();
103         
104         boolean lastWasDelim    = false;
105         final Iterator    iter    = list.iterator();
106         while ( iter.hasNext() )
107         {
108             final Object      value   = iter.next();
109             
110             if ( value instanceof String   )
111             {
112                 resultList.add( value );
113                 lastWasDelim    = false;
114             }
115             else if ( ! lastWasDelim )
116             {
117                 // add the delimiter
118                 resultList.add( value );
119                 lastWasDelim    = true;
120             }
121         }
122         
123         return( resultList );
124     }
125     
126     /**
127         Interpret the parsed token list, which consists of a series of strings
128         and tokens.  We need to handle the special cases where the list starts
129         with a delimiter and/or ends with a delimiter.  Examples:
130         
131         ""  => {}
132         "." => { "", "" }
133         "..."   => { "", "", "", "" }
134         "x."    => { "x", "" }
135         ".x"    => { "", "x" }
136         "y.x"   => { "y", "x" }
137      */
138         static String  []
139     interpretTokenList( ArrayList   list )
140     {
141         final ArrayList       resultList  = new ArrayList  ();
142 
143         boolean lastWasDelim    = true;
144 
145         final Iterator    iter    = list.iterator();
146         while ( iter.hasNext() )
147         {
148             final Object      value   = iter.next();
149             if ( value instanceof String   )
150             {
151                 resultList.add( value );
152                 lastWasDelim    = false;
153             }
154             else
155             {
156                 if ( lastWasDelim )
157                 {
158                     // this one's a delimiter, and so was the last one
159                     // insert the implicit empty string
160                     resultList.add( "" );
161                 }
162                 else
163                 {
164                     lastWasDelim    = true;
165                 }
166             }
167         }
168         
169         // a trailing delimiter implies an empty string after it
170         if ( lastWasDelim && list.size() != 0 )
171         {
172             resultList.add( "" );
173         }
174         
175         return( (String  [])resultList.toArray( new String  [ resultList.size() ] ) );
176     }
177     
178         public String   []
179     getTokens()
180     {
181         return( mTokens );
182     }
183 }
184 
185 
186 
187 final class TokenizerInternal
188 {
189     final String              mInput;
190     final String              mDelimiters;
191     final char              mEscapeChar;
192     final String              mEscapableChars;
193     final StringCharacterIterator     mIter;
194     
195     // a distinct object used to denote a delimiter
196     private static class Delim
197     {
198         private Delim() {}
199         public static Delim getInstance()   { return( new Delim() ); }
200         public String     toString() { return( "<DELIM>" ); }
201     }
202     final static Delim  DELIM   = Delim.getInstance();
203     
204         public
205     TokenizerInternal(
206         String        input,
207         String        delimiters,
208         char        escapeChar,
209         String        escapableChars)
210     {
211         mInput          = input;
212         mDelimiters     = delimiters;
213         mEscapeChar     = escapeChar;
214         mEscapableChars = escapableChars;
215         mIter       = new StringCharacterIterator  ( input );
216     }
217     
218         static boolean
219     isSpecialEscapeChar( char theChar )
220     {
221         // carriage return or newline
222         return( theChar == 'n' || theChar == 'r' || theChar == 't' ||theChar == QUOTE_CHAR );
223     }
224     
225         boolean
226     isCallerProvidedEscapableChar( char theChar )
227     {
228         return( mEscapableChars.indexOf( theChar ) >= 0 || theChar == mEscapeChar );
229     }
230     
231         boolean
232     isEscapableChar( char theChar )
233     {
234         return( isCallerProvidedEscapableChar( theChar ) || isSpecialEscapeChar( theChar ) );
235     }
236     
237         boolean
238     isDelim( String   delims, char theChar )
239     {
240         return( delims.indexOf( theChar ) >= 0 || theChar == mIter.DONE );
241     }
242     
243         static boolean
244     isDigit( char theChar )
245     {
246         return( (theChar >= '0' && theChar <= '9') );
247     }
248     
249 
250         static boolean
251     isHexDigit( char theChar )
252     {
253         return( isDigit( theChar ) || (theChar >= 'a' && theChar <= 'f') || isUpper( theChar ) );
254     }
255     
256         static boolean
257     isUpper( char c )
258     {
259         return( (c >= 'A' && c <= 'F') );
260     }
261     
262         boolean
263     hasMoreChars()
264     {
265         return( mIter.current() != mIter.DONE );
266     }
267 
268         char
269     nextChar()
270     {
271         final char  theChar = mIter.current();
272         mIter.next();
273         
274         return( theChar );
275     }
276     
277     private static final char   QUOTE_CHAR  = '\"';
278     private static final char   TAB_CHAR    = '\t';
279     
280         char
281     decodeUnicodeSequence()
282         throws MalformedUnicodeSequenceException
283     {
284         int     value   = 0;
285         
286         try
287         {
288             for( int i = 0; i < 4; ++i )
289             {
290                 value   = (value << 4 ) | hexValue( nextChar() );
291             }
292         }
293         catch( Exception   e )
294         {
295             throw new MalformedUnicodeSequenceException( "" );
296         }
297         
298         return( (char)value );
299     }
300     
301         static int
302     hexValue( char c )
303     {
304         if ( ! isHexDigit( c ) )
305         {
306             throw new IllegalArgumentException  ();
307         }
308         
309         int value   = 0;
310 
311         if ( isDigit( c ) )
312         {
313             value   = (int)c - (int)'0';
314         }
315         else if ( isUpper( c ) )
316         {
317             value   = (int)c - (int)'A';
318         }
319         else
320         {
321             value   = (int)c - (int)'a';
322         }
323         return( value );
324     }
325     
326         char
327     getEscapedChar( final char inputChar )
328         throws MalformedUnicodeSequenceException,IllegalEscapeSequenceException
329     {
330         char    outChar = 0;
331         
332         if ( isCallerProvidedEscapableChar( inputChar ) )
333         {
334             outChar = inputChar;
335         }
336         else
337         {
338             switch( inputChar )
339             {
340                 default:    throw new IllegalEscapeSequenceException( "" + inputChar );
341                 case 'n':   outChar = '\n';     break;
342                 case 'r':   outChar = '\r';     break;
343                 case 't':   outChar = '\t';     break;
344                 case QUOTE_CHAR:    outChar = QUOTE_CHAR;   break;
345                 case 'u':   outChar = decodeUnicodeSequence();  break;
346             }
347         }
348         
349         return( outChar );
350     }
351     
352 
353     
354         ArrayList  
355     parseTokens(  )
356         throws UnterminatedLiteralStringException,
357             MalformedUnicodeSequenceException, IllegalEscapeSequenceException
358     {
359         final StringBuffer    tok = new StringBuffer  ();
360         final ArrayList       tokens  = new ArrayList  ();
361         boolean             insideStringLiteral = false;
362         
363         /**
364             Escape sequences are always processed regardless of whether we're inside a
365             quoted string or not.  A quote string really only alters whether delimiters
366             are treated as literal characters, or not.
367          */
368         while ( hasMoreChars()  )
369         {
370             final char  theChar = nextChar();
371             
372             if ( theChar == mEscapeChar )
373             {
374                 tok.append( getEscapedChar( nextChar() ) );
375             }
376             else if ( theChar == Tokenizer.LITERAL_STRING_DELIM )
377             {
378                 // special cases of "", """", """""", etc require forcing an empty string out
379                 // these case have no delimiter or regular characters to cause a string to
380                 // be emitted
381                 if ( insideStringLiteral && tok.length() == 0 && tokens.size() == 0)
382                 {
383                     tokens.add( "" );
384                 }
385                 
386                 insideStringLiteral = ! insideStringLiteral;
387             }
388             else if ( insideStringLiteral )
389             {
390                 tok.append( theChar );
391             }
392             else if ( isDelim( mDelimiters, theChar ) )
393             {
394                 // we've hit a delimiter...if characters have accumulated, spit them out
395                 // then spit out the delimiter token.
396                 if ( tok.length() != 0 )
397                 {
398                     tokens.add( tok.toString() );
399                     tok.setLength( 0 );
400                 }
401                 tokens.add( DELIM );
402             }
403             else
404             {
405                 tok.append( theChar );
406             }
407         }
408         
409         if ( tok.length() != 0 )
410         {
411             tokens.add( tok.toString() );
412         }
413         
414         if ( insideStringLiteral )
415         {
416             throw new UnterminatedLiteralStringException( tok.toString() );
417         }
418         
419         return( tokens );
420     }
421 }
422 
423
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags