| 1 3 package org.codehaus.groovy.antlr.parser; 4 import org.codehaus.groovy.antlr.*; 5 import java.util.*; 6 import java.io.InputStream ; 7 import java.io.Reader ; 8 import antlr.InputBuffer; 9 import antlr.LexerSharedInputState; 10 11 import java.io.InputStream ; 12 import antlr.TokenStreamException; 13 import antlr.TokenStreamIOException; 14 import antlr.TokenStreamRecognitionException; 15 import antlr.CharStreamException; 16 import antlr.CharStreamIOException; 17 import antlr.ANTLRException; 18 import java.io.Reader ; 19 import java.util.Hashtable ; 20 import antlr.CharScanner; 21 import antlr.InputBuffer; 22 import antlr.ByteBuffer; 23 import antlr.CharBuffer; 24 import antlr.Token; 25 import antlr.CommonToken; 26 import antlr.RecognitionException; 27 import antlr.NoViableAltForCharException; 28 import antlr.MismatchedCharException; 29 import antlr.TokenStream; 30 import antlr.ANTLRHashString; 31 import antlr.LexerSharedInputState; 32 import antlr.collections.impl.BitSet; 33 import antlr.SemanticException; 34 35 public class GroovyLexer extends antlr.CharScanner implements GroovyTokenTypes, TokenStream 36 { 37 38 39 private boolean assertEnabled = true; 40 41 private boolean enumEnabled = true; 42 43 private boolean whitespaceIncluded = false; 44 45 46 public void enableAssert(boolean shouldEnable) { assertEnabled = shouldEnable; } 47 48 public boolean isAssertEnabled() { return assertEnabled; } 49 50 public void enableEnum(boolean shouldEnable) { enumEnabled = shouldEnable; } 51 52 public boolean isEnumEnabled() { return enumEnabled; } 53 54 55 public void setWhitespaceIncluded(boolean z) { whitespaceIncluded = z; } 56 57 public boolean isWhitespaceIncluded() { return whitespaceIncluded; } 58 59 { 60 setTabSize(1); } 63 64 65 protected int parenLevel = 0; 66 protected int suppressNewline = 0; protected static final int SCS_TYPE = 3, SCS_VAL = 4, SCS_LIT = 8, SCS_LIMIT = 16; 68 protected static final int SCS_SQ_TYPE = 0, SCS_TQ_TYPE = 1, SCS_RE_TYPE = 2; 69 protected int stringCtorState = 0; 71 protected ArrayList parenLevelStack = new ArrayList(); 72 protected int lastSigTokenType = EOF; 74 protected void pushParenLevel() { 75 parenLevelStack.add(new Integer (parenLevel*SCS_LIMIT + stringCtorState)); 76 parenLevel = 0; 77 stringCtorState = 0; 78 } 79 protected void popParenLevel() { 80 int npl = parenLevelStack.size(); 81 if (npl == 0) return; 82 int i = ((Integer ) parenLevelStack.remove(--npl)).intValue(); 83 parenLevel = i / SCS_LIMIT; 84 stringCtorState = i % SCS_LIMIT; 85 } 86 87 protected void restartStringCtor(boolean expectLiteral) { 88 if (stringCtorState != 0) { 89 stringCtorState = (expectLiteral? SCS_LIT: SCS_VAL) + (stringCtorState & SCS_TYPE); 90 } 91 } 92 93 protected boolean allowRegexpLiteral() { 94 return !isExpressionEndingToken(lastSigTokenType); 95 } 96 97 103 protected static boolean isExpressionEndingToken(int ttype) { 105 switch (ttype) { 106 case INC: case DEC: case RPAREN: case RBRACK: case RCURLY: case STRING_LITERAL: case STRING_CTOR_END: case NUM_INT: case NUM_FLOAT: case NUM_LONG: case NUM_DOUBLE: case NUM_BIG_INT: case NUM_BIG_DECIMAL: case IDENT: case LITERAL_any: 122 case LITERAL_as: 123 case LITERAL_assert: 124 case LITERAL_boolean: 125 case LITERAL_break: 126 case LITERAL_byte: 127 case LITERAL_case: 128 case LITERAL_catch: 129 case LITERAL_char: 130 case LITERAL_class: 131 case LITERAL_continue: 132 case LITERAL_def: 133 case LITERAL_default: 134 case LITERAL_double: 135 case LITERAL_else: 136 case LITERAL_enum: 137 case LITERAL_extends: 138 case LITERAL_false: 139 case LITERAL_finally: 140 case LITERAL_float: 141 case LITERAL_for: 142 case LITERAL_if: 143 case LITERAL_implements: 144 case LITERAL_import: 145 case LITERAL_in: 146 case LITERAL_instanceof: 147 case LITERAL_int: 148 case LITERAL_interface: 149 case LITERAL_long: 150 case LITERAL_native: 151 case LITERAL_new: 152 case LITERAL_null: 153 case LITERAL_package: 154 case LITERAL_private: 155 case LITERAL_protected: 156 case LITERAL_public: 157 case LITERAL_return: 158 case LITERAL_short: 159 case LITERAL_static: 160 case LITERAL_super: 161 case LITERAL_switch: 162 case LITERAL_synchronized: 163 case LITERAL_this: 164 case LITERAL_threadsafe: 165 case LITERAL_throw: 166 case LITERAL_throws: 167 case LITERAL_transient: 168 case LITERAL_true: 169 case LITERAL_try: 170 case LITERAL_void: 171 case LITERAL_volatile: 172 case LITERAL_while: 173 case LITERAL_with: 174 return true; 175 default: 176 return false; 177 } 178 } 179 180 protected void newlineCheck(boolean check) throws RecognitionException { 181 if (check && suppressNewline > 0) { 182 require(suppressNewline == 0, 183 "end of line reached within a simple string 'x' or \"x\" or /x/", 184 "for multi-line literals, use triple quotes '''x''' or \"\"\"x\"\"\""); 185 suppressNewline = 0; } 187 newline(); 188 } 189 190 protected boolean atValidDollarEscape() throws CharStreamException { 191 int k = 1; 193 char lc = LA(k++); 194 if (lc != '$') return false; 195 lc = LA(k++); 196 if (lc == '*') lc = LA(k++); 197 return (lc == '{' || (lc != '$' && Character.isJavaIdentifierStart(lc))); 198 } 199 200 204 public TokenStream plumb() { 205 return new TokenStream() { 206 public Token nextToken() throws TokenStreamException { 207 if (stringCtorState >= SCS_LIT) { 208 int quoteType = (stringCtorState & SCS_TYPE); 210 stringCtorState = 0; resetText(); 212 try { 213 switch (quoteType) { 214 case SCS_SQ_TYPE: 215 mSTRING_CTOR_END(true, false, false); break; 216 case SCS_TQ_TYPE: 217 mSTRING_CTOR_END(true, false, true); break; 218 case SCS_RE_TYPE: 219 mREGEXP_CTOR_END(true, false); break; 220 default: assert(false); 221 } 222 lastSigTokenType = _returnToken.getType(); 223 return _returnToken; 224 } catch (RecognitionException e) { 225 throw new TokenStreamRecognitionException(e); 226 } catch (CharStreamException cse) { 227 if ( cse instanceof CharStreamIOException ) { 228 throw new TokenStreamIOException(((CharStreamIOException)cse).io); 229 } 230 else { 231 throw new TokenStreamException(cse.getMessage()); 232 } 233 } 234 } 235 Token token = GroovyLexer.this.nextToken(); 236 int lasttype = token.getType(); 237 if (whitespaceIncluded) { 238 switch (lasttype) { case WS: 240 case ONE_NL: 241 case SL_COMMENT: 242 case ML_COMMENT: 243 lasttype = lastSigTokenType; } 245 } 246 lastSigTokenType = lasttype; 247 return token; 248 } 249 }; 250 } 251 252 public static boolean tracing = false; public void traceIn(String rname) throws CharStreamException { 255 if (!GroovyLexer.tracing) return; 256 super.traceIn(rname); 257 } 258 public void traceOut(String rname) throws CharStreamException { 259 if (!GroovyLexer.tracing) return; 260 if (_returnToken != null) rname += tokenStringOf(_returnToken); 261 super.traceOut(rname); 262 } 263 private static java.util.HashMap ttypes; 264 private static String tokenStringOf(Token t) { 265 if (ttypes == null) { 266 java.util.HashMap map = new java.util.HashMap (); 267 java.lang.reflect.Field [] fields = GroovyTokenTypes.class.getDeclaredFields(); 268 for (int i = 0; i < fields.length; i++) { 269 if (fields[i].getType() != int.class) continue; 270 try { 271 map.put(fields[i].get(null), fields[i].getName()); 272 } catch (IllegalAccessException ee) { 273 } 274 } 275 ttypes = map; 276 } 277 Integer tt = new Integer (t.getType()); 278 Object ttn = ttypes.get(tt); 279 if (ttn == null) ttn = "<"+tt+">"; 280 return "["+ttn+",\""+t.getText()+"\"]"; 281 } 282 283 protected GroovyRecognizer parser; private void require(boolean z, String problem, String solution) throws SemanticException { 285 if (!z) parser.requireFailed(problem, solution); 287 } 288 public GroovyLexer(InputStream in) { 289 this(new ByteBuffer(in)); 290 } 291 public GroovyLexer(Reader in) { 292 this(new CharBuffer(in)); 293 } 294 public GroovyLexer(InputBuffer ib) { 295 this(new LexerSharedInputState(ib)); 296 } 297 public GroovyLexer(LexerSharedInputState state) { 298 super(state); 299 caseSensitiveLiterals = true; 300 setCaseSensitive(true); 301 literals = new Hashtable (); 302 literals.put(new ANTLRHashString("byte", this), new Integer (101)); 303 literals.put(new ANTLRHashString("public", this), new Integer (112)); 304 literals.put(new ANTLRHashString("case", this), new Integer (150)); 305 literals.put(new ANTLRHashString("short", this), new Integer (103)); 306 literals.put(new ANTLRHashString("break", this), new Integer (144)); 307 literals.put(new ANTLRHashString("while", this), new Integer (138)); 308 literals.put(new ANTLRHashString("new", this), new Integer (192)); 309 literals.put(new ANTLRHashString("instanceof", this), new Integer (178)); 310 literals.put(new ANTLRHashString("implements", this), new Integer (127)); 311 literals.put(new ANTLRHashString("synchronized", this), new Integer (117)); 312 literals.put(new ANTLRHashString("const", this), new Integer (40)); 313 literals.put(new ANTLRHashString("float", this), new Integer (105)); 314 literals.put(new ANTLRHashString("package", this), new Integer (78)); 315 literals.put(new ANTLRHashString("return", this), new Integer (143)); 316 literals.put(new ANTLRHashString("throw", this), new Integer (146)); 317 literals.put(new ANTLRHashString("null", this), new Integer (195)); 318 literals.put(new ANTLRHashString("def", this), new Integer (81)); 319 literals.put(new ANTLRHashString("threadsafe", this), new Integer (116)); 320 literals.put(new ANTLRHashString("protected", this), new Integer (113)); 321 literals.put(new ANTLRHashString("class", this), new Integer (88)); 322 literals.put(new ANTLRHashString("throws", this), new Integer (130)); 323 literals.put(new ANTLRHashString("do", this), new Integer (41)); 324 literals.put(new ANTLRHashString("strictfp", this), new Integer (42)); 325 literals.put(new ANTLRHashString("super", this), new Integer (93)); 326 literals.put(new ANTLRHashString("with", this), new Integer (139)); 327 literals.put(new ANTLRHashString("transient", this), new Integer (114)); 328 literals.put(new ANTLRHashString("native", this), new Integer (115)); 329 literals.put(new ANTLRHashString("interface", this), new Integer (89)); 330 literals.put(new ANTLRHashString("final", this), new Integer (37)); 331 literals.put(new ANTLRHashString("any", this), new Integer (108)); 332 literals.put(new ANTLRHashString("if", this), new Integer (136)); 333 literals.put(new ANTLRHashString("double", this), new Integer (107)); 334 literals.put(new ANTLRHashString("volatile", this), new Integer (118)); 335 literals.put(new ANTLRHashString("as", this), new Integer (110)); 336 literals.put(new ANTLRHashString("assert", this), new Integer (147)); 337 literals.put(new ANTLRHashString("catch", this), new Integer (153)); 338 literals.put(new ANTLRHashString("try", this), new Integer (151)); 339 literals.put(new ANTLRHashString("goto", this), new Integer (39)); 340 literals.put(new ANTLRHashString("enum", this), new Integer (90)); 341 literals.put(new ANTLRHashString("int", this), new Integer (104)); 342 literals.put(new ANTLRHashString("for", this), new Integer (141)); 343 literals.put(new ANTLRHashString("extends", this), new Integer (92)); 344 literals.put(new ANTLRHashString("boolean", this), new Integer (100)); 345 literals.put(new ANTLRHashString("char", this), new Integer (102)); 346 literals.put(new ANTLRHashString("private", this), new Integer (111)); 347 literals.put(new ANTLRHashString("default", this), new Integer (126)); 348 literals.put(new ANTLRHashString("false", this), new Integer (194)); 349 literals.put(new ANTLRHashString("this", this), new Integer (128)); 350 literals.put(new ANTLRHashString("static", this), new Integer (80)); 351 literals.put(new ANTLRHashString("abstract", this), new Integer (38)); 352 literals.put(new ANTLRHashString("continue", this), new Integer (145)); 353 literals.put(new ANTLRHashString("finally", this), new Integer (152)); 354 literals.put(new ANTLRHashString("else", this), new Integer (137)); 355 literals.put(new ANTLRHashString("import", this), new Integer (79)); 356 literals.put(new ANTLRHashString("in", this), new Integer (142)); 357 literals.put(new ANTLRHashString("void", this), new Integer (99)); 358 literals.put(new ANTLRHashString("switch", this), new Integer (140)); 359 literals.put(new ANTLRHashString("true", this), new Integer (193)); 360 literals.put(new ANTLRHashString("long", this), new Integer (106)); 361 } 362 363 public Token nextToken() throws TokenStreamException { 364 Token theRetToken=null; 365 tryAgain: 366 for (;;) { 367 Token _token = null; 368 int _ttype = Token.INVALID_TYPE; 369 resetText(); 370 try { try { switch ( LA(1)) { 373 case '(': 374 { 375 mLPAREN(true); 376 theRetToken=_returnToken; 377 break; 378 } 379 case ')': 380 { 381 mRPAREN(true); 382 theRetToken=_returnToken; 383 break; 384 } 385 case '[': 386 { 387 mLBRACK(true); 388 theRetToken=_returnToken; 389 break; 390 } 391 case ']': 392 { 393 mRBRACK(true); 394 theRetToken=_returnToken; 395 break; 396 } 397 case '{': 398 { 399 mLCURLY(true); 400 theRetToken=_returnToken; 401 break; 402 } 403 case '}': 404 { 405 mRCURLY(true); 406 theRetToken=_returnToken; 407 break; 408 } 409 case ':': 410 { 411 mCOLON(true); 412 theRetToken=_returnToken; 413 break; 414 } 415 case ',': 416 { 417 mCOMMA(true); 418 theRetToken=_returnToken; 419 break; 420 } 421 case '~': 422 { 423 mBNOT(true); 424 theRetToken=_returnToken; 425 break; 426 } 427 case ';': 428 { 429 mSEMI(true); 430 theRetToken=_returnToken; 431 break; 432 } 433 case '$': 434 { 435 mDOLLAR(true); 436 theRetToken=_returnToken; 437 break; 438 } 439 case '\t': case '\u000c': case ' ': case '\\': 440 { 441 mWS(true); 442 theRetToken=_returnToken; 443 break; 444 } 445 case '\n': case '\r': 446 { 447 mNLS(true); 448 theRetToken=_returnToken; 449 break; 450 } 451 case '"': case '\'': 452 { 453 mSTRING_LITERAL(true); 454 theRetToken=_returnToken; 455 break; 456 } 457 case '0': case '1': case '2': case '3': 458 case '4': case '5': case '6': case '7': 459 case '8': case '9': 460 { 461 mNUM_INT(true); 462 theRetToken=_returnToken; 463 break; 464 } 465 case '@': 466 { 467 mAT(true); 468 theRetToken=_returnToken; 469 break; 470 } 471 default: 472 if ((LA(1)=='>') && (LA(2)=='>') && (LA(3)=='>') && (LA(4)=='=')) { 473 mBSR_ASSIGN(true); 474 theRetToken=_returnToken; 475 } 476 else if ((LA(1)=='<') && (LA(2)=='=') && (LA(3)=='>')) { 477 mCOMPARE_TO(true); 478 theRetToken=_returnToken; 479 } 480 else if ((LA(1)=='>') && (LA(2)=='>') && (LA(3)=='=')) { 481 mSR_ASSIGN(true); 482 theRetToken=_returnToken; 483 } 484 else if ((LA(1)=='>') && (LA(2)=='>') && (LA(3)=='>') && (true)) { 485 mBSR(true); 486 theRetToken=_returnToken; 487 } 488 else if ((LA(1)=='<') && (LA(2)=='<') && (LA(3)=='=')) { 489 mSL_ASSIGN(true); 490 theRetToken=_returnToken; 491 } 492 else if ((LA(1)=='.') && (LA(2)=='.') && (LA(3)=='<')) { 493 mRANGE_EXCLUSIVE(true); 494 theRetToken=_returnToken; 495 } 496 else if ((LA(1)=='.') && (LA(2)=='.') && (LA(3)=='.')) { 497 mTRIPLE_DOT(true); 498 theRetToken=_returnToken; 499 } 500 else if ((LA(1)=='=') && (LA(2)=='=') && (LA(3)=='~')) { 501 mREGEX_MATCH(true); 502 theRetToken=_returnToken; 503 } 504 else if ((LA(1)=='*') && (LA(2)=='*') && (LA(3)=='=')) { 505 mSTAR_STAR_ASSIGN(true); 506 theRetToken=_returnToken; 507 } 508 else if ((LA(1)=='=') && (LA(2)=='=') && (true)) { 509 mEQUAL(true); 510 theRetToken=_returnToken; 511 } 512 else if ((LA(1)=='!') && (LA(2)=='=')) { 513 mNOT_EQUAL(true); 514 theRetToken=_returnToken; 515 } 516 else if ((LA(1)=='+') && (LA(2)=='=')) { 517 mPLUS_ASSIGN(true); 518 theRetToken=_returnToken; 519 } 520 else if ((LA(1)=='+') && (LA(2)=='+')) { 521 mINC(true); 522 theRetToken=_returnToken; 523 } 524 else if ((LA(1)=='-') && (LA(2)=='=')) { 525 mMINUS_ASSIGN(true); 526 theRetToken=_returnToken; 527 } 528 else if ((LA(1)=='-') && (LA(2)=='-')) { 529 mDEC(true); 530 theRetToken=_returnToken; 531 } 532 else if ((LA(1)=='*') && (LA(2)=='=')) { 533 mSTAR_ASSIGN(true); 534 theRetToken=_returnToken; 535 } 536 else if ((LA(1)=='%') && (LA(2)=='=')) { 537 mMOD_ASSIGN(true); 538 theRetToken=_returnToken; 539 } 540 else if ((LA(1)=='>') && (LA(2)=='>') && (true)) { 541 mSR(true); 542 theRetToken=_returnToken; 543 } 544 else if ((LA(1)=='>') && (LA(2)=='=')) { 545 mGE(true); 546 theRetToken=_returnToken; 547 } 548 else if ((LA(1)=='<') && (LA(2)=='<') && (true)) { 549 mSL(true); 550 theRetToken=_returnToken; 551 } 552 else if ((LA(1)=='<') && (LA(2)=='=') && (true)) { 553 mLE(true); 554 theRetToken=_returnToken; 555 } 556 else if ((LA(1)=='^') && (LA(2)=='=')) { 557 mBXOR_ASSIGN(true); 558 theRetToken=_returnToken; 559 } 560 else if ((LA(1)=='|') && (LA(2)=='=')) { 561 mBOR_ASSIGN(true); 562 theRetToken=_returnToken; 563 } 564 else if ((LA(1)=='|') && (LA(2)=='|')) { 565 mLOR(true); 566  
|