1 23 package com.sun.appserv.management.util.misc; 24 25 import java.text.StringCharacterIterator ; 26 import java.util.List ; 27 import java.util.ArrayList ; 28 import java.util.Iterator ; 29 import java.util.Arrays ; 30 31 32 class IllegalEscapeSequenceException extends TokenizerException 33 { 34 static final long serialVersionUID = 6579038898242625567L; 35 public IllegalEscapeSequenceException( String msg ) { super( msg ); } 36 } 37 38 final class UnterminatedLiteralStringException extends TokenizerException 39 { 40 static final long serialVersionUID = -1327166469948605347L; 41 public UnterminatedLiteralStringException( String msg ) { super( msg ); } 42 } 43 44 final class MalformedUnicodeSequenceException extends IllegalEscapeSequenceException 45 { 46 static final long serialVersionUID = 6604956430084180525L; 47 public MalformedUnicodeSequenceException( String msg ) { super( msg ); } 48 } 49 50 52 public final class TokenizerImpl implements Tokenizer 53 { 54 final String [] mTokens; 55 56 public 57 TokenizerImpl( String input ) 58 throws TokenizerException 59 { 60 this( input, new TokenizerParams() ); 61 } 62 63 private static final char QUOTE_CHAR = '\"'; 64 65 public 66 TokenizerImpl( 67 String input, 68 TokenizerParams params ) 69 throws TokenizerException 70 { 71 final TokenizerInternal worker = new TokenizerInternal( input, params ); 72 73 List <Object > allTokens = worker.parseTokens( ); 74 75 if ( params.mMultipleDelimsCountAsOne ) 76 { 77 allTokens = removeMultipleDelims( allTokens ); 78 } 79 80 mTokens = interpretTokenList( allTokens ); 81 } 82 83 final static List <Object > 84 removeMultipleDelims( List <Object > list ) 85 { 86 final List <Object > resultList = new ArrayList <Object >(); 87 88 boolean lastWasDelim = false; 89 for( final Object value : list ) 90 { 91 if ( value instanceof String ) 92 { 93 resultList.add( value ); 94 lastWasDelim = false; 95 } 96 else if ( ! lastWasDelim ) 97 { 98 resultList.add( value ); 100 lastWasDelim = true; 101 } 102 } 103 104 return( resultList ); 105 } 106 107 119 static String [] 120 interpretTokenList( List <Object > list ) 121 { 122 final List <String > resultList = new ArrayList <String >(); 123 124 boolean lastWasDelim = true; 125 126 for( final Object value : list ) 127 { 128 if ( value instanceof String ) 129 { 130 resultList.add( (String )value ); 131 lastWasDelim = false; 132 } 133 else 134 { 135 if ( lastWasDelim ) 136 { 137 resultList.add( "" ); 140 } 141 else 142 { 143 lastWasDelim = true; 144 } 145 } 146 } 147 148 if ( lastWasDelim && list.size() != 0 ) 150 { 151 resultList.add( "" ); 152 } 153 154 return( (String [])resultList.toArray( new String [ resultList.size() ] ) ); 155 } 156 157 public String [] 158 getTokens() 159 { 160 return( mTokens ); 161 } 162 } 163 164 165 166 final class TokenizerInternal 167 { 168 final String mInput; 169 final TokenizerParams mParams; 170 final StringCharacterIterator mIter; 171 172 private static final class Delim 174 { 175 private Delim() {} 176 public static Delim getInstance() { return( new Delim() ); } 177 public String toString() { return( "<DELIM>" ); } 178 } 179 final static Delim DELIM = Delim.getInstance(); 180 181 182 TokenizerInternal( 183 String input, 184 TokenizerParams params ) 185 { 186 mInput = input; 187 mParams = params; 188 mIter = new StringCharacterIterator ( input ); 189 } 190 191 private static boolean 192 isSpecialEscapeChar( char theChar ) 193 { 194 return( theChar == 'n' || theChar == 'r' || theChar == 't' ||theChar == QUOTE_CHAR ); 196 } 197 198 private boolean 199 isCallerProvidedEscapableChar( char theChar ) 200 { 201 return( mParams.mEscapableChars.indexOf( theChar ) >= 0 || 202 theChar == mParams.mEscapeChar ); 203 } 204 205 private boolean 206 isEscapableChar( char theChar ) 207 { 208 return( isCallerProvidedEscapableChar( theChar ) || isSpecialEscapeChar( theChar ) ); 209 } 210 211 private boolean 212 isDelim( String delims, char theChar ) 213 { 214 return( delims.indexOf( theChar ) >= 0 || theChar == mIter.DONE ); 215 } 216 217 private static boolean 218 isDigit( char theChar ) 219 { 220 return( (theChar >= '0' && theChar <= '9') ); 221 } 222 223 224 private static boolean 225 isHexDigit( char theChar ) 226 { 227 return( isDigit( theChar ) || (theChar >= 'a' && theChar <= 'f') || isUpper( theChar ) ); 228 } 229 230 private static boolean 231 isUpper( char c ) 232 { 233 return( (c >= 'A' && c <= 'F') ); 234 } 235 236 private boolean 237 hasMoreChars() 238 { 239 return( mIter.current() != mIter.DONE ); 240 } 241 242 private int 243 getIndex() 244 { 245 return( mIter.getIndex() ); 246 } 247 248 private char 249 setIndex( int index ) 250 { 251 return( mIter.setIndex( index ) ); 252 } 253 254 private char 255 nextChar() 256 { 257 final char theChar = mIter.current(); 258 mIter.next(); 259 260 return( theChar ); 261 } 262 263 private static final char QUOTE_CHAR = '\"'; 264 private static final char TAB_CHAR = '\t'; 265 266 private char 267 decodeUnicodeSequence() 268 throws MalformedUnicodeSequenceException 269 { 270 int value = 0; 271 272 try 273 { 274 for( int i = 0; i < 4; ++i ) 275 { 276 value = (value << 4 ) | hexValue( nextChar() ); 277 } 278 } 279 catch( Exception e ) 280 { 281 throw new MalformedUnicodeSequenceException( "" ); 282 } 283 284 return( (char)value ); 285 } 286 287 private static int 288 hexValue( char c ) 289 { 290 if ( ! isHexDigit( c ) ) 291 { 292 throw new IllegalArgumentException (); 293 } 294 295 int value = 0; 296 297 if ( isDigit( c ) ) 298 { 299 value = (int)c - (int)'0'; 300 } 301 else if ( isUpper( c ) ) 302 { 303 value = (int)c - (int)'A'; 304 } 305 else 306 { 307 value = (int)c - (int)'a'; 308 } 309 return( value ); 310 } 311 312 private char 313 getEscapedChar( final char inputChar ) 314 throws MalformedUnicodeSequenceException,IllegalEscapeSequenceException 315 { 316 char outChar = 0; 317 318 if ( isCallerProvidedEscapableChar( inputChar ) ) 319 { 320 outChar = inputChar; 321 } 322 else 323 { 324 switch( inputChar ) 325 { 326 default: throw new IllegalEscapeSequenceException( "" + inputChar ); 327 case 'n': outChar = '\n'; break; 328 case 'r': outChar = '\r'; break; 329 case 't': outChar = '\t'; break; 330 case QUOTE_CHAR: outChar = QUOTE_CHAR; break; 331 case 'u': outChar = decodeUnicodeSequence(); break; 332 } 333 } 334 335 return( outChar ); 336 } 337 338 339 private String 340 processEscapeSequence() 341 { 342 String s = null; 344 345 final char theChar = nextChar(); 346 final int continuePos = mIter.getIndex(); 347 try 348 { 349 s = "" + getEscapedChar( theChar ); 350 } 351 catch( TokenizerException e ) 352 { 353 s = mParams.mEscapeChar + "" + theChar; 356 mIter.setIndex( continuePos ); 357 } 358 359 return( s ); 360 } 361 362 ArrayList <Object > 363 parseTokens( ) 364 throws UnterminatedLiteralStringException, 365 MalformedUnicodeSequenceException, IllegalEscapeSequenceException 366 { 367 final StringBuffer tok = new StringBuffer (); 368 final ArrayList <Object > tokens = new ArrayList <Object >(); 369 boolean insideStringLiteral = false; 370 371 376 while ( hasMoreChars() ) 377 { 378 final char theChar = nextChar(); 379 380 if ( theChar == mParams.mEscapeChar ) 381 { 382 if ( mParams.mEmitInvalidEscapeSequencesLiterally ) 383 { 384 tok.append( processEscapeSequence() ); 385 } 386 else 387 { 388 tok.append( getEscapedChar( nextChar() ) ); 389 } 390 } 391 else if ( theChar == Tokenizer.LITERAL_STRING_DELIM ) 392 { 393 if ( insideStringLiteral && tok.length() == 0 && tokens.size() == 0) 397 { 398 tokens.add( "" ); 399 } 400 401 insideStringLiteral = ! insideStringLiteral; 402 } 403 else if ( insideStringLiteral ) 404 { 405 tok.append( theChar ); 406 } 407 else if ( isDelim( mParams.mDelimiters, theChar ) ) 408 { 409 if ( tok.length() != 0 ) 412 { 413 tokens.add( tok.toString() ); 414 tok.setLength( 0 ); 415 } 416 tokens.add( DELIM ); 417 } 418 else 419 { 420 tok.append( theChar ); 421 } 422 } 423 424 if ( tok.length() != 0 ) 425 { 426 tokens.add( tok.toString() ); 427 } 428 429 if ( insideStringLiteral ) 430 { 431 throw new UnterminatedLiteralStringException( tok.toString() ); 432 } 433 434 return( tokens ); 435 } 436 } 437 438 | Popular Tags |