1 package org.codehaus.groovy.syntax.lexer; 2 3 import org.codehaus.groovy.syntax.ReadException; 4 import org.codehaus.groovy.syntax.Numbers; 5 import org.codehaus.groovy.syntax.Types; 6 import org.codehaus.groovy.syntax.Token; 7 8 16 17 public class GroovyLexerBase extends LexerBase 18 { 19 20 protected StringLexer stringLexer = new StringLexer(); protected GStringLexer gstringLexer = new GStringLexer(); 23 24 28 29 public Token nextToken() throws ReadException, LexerException 30 { 31 33 Token token = null; 34 OUTER_LOOP : while (token == null) 35 { 36 37 40 if( delegate != null ) 41 { 42 token = delegate.nextToken(); 43 44 if( token == null ) 45 { 46 undelegate(); 47 } 48 else 49 { 50 break OUTER_LOOP; 51 } 52 } 53 54 55 58 char c = la(); 59 60 ROOT_SWITCH : switch (c) 61 { 62 case (CharStream.EOS) : 63 { 64 break OUTER_LOOP; 65 } 66 case (' ') : 67 case ('\t') : 68 { 69 consume(); 70 token = null; 71 break ROOT_SWITCH; 72 } 73 case ('\r') : 74 case ('\n') : 75 { 76 mark(); 77 token = tokenizeEOL(); 78 break ROOT_SWITCH; 79 } 80 case ('{') : 81 { 82 mark(); 83 consume(); 84 token = symbol( Types.LEFT_CURLY_BRACE ); 85 break ROOT_SWITCH; 86 } 87 case ('}') : 88 { 89 mark(); 90 consume(); 91 token = symbol( Types.RIGHT_CURLY_BRACE ); 92 break ROOT_SWITCH; 93 } 94 case ('[') : 95 { 96 mark(); 97 consume(); 98 token = symbol( Types.LEFT_SQUARE_BRACKET ); 99 break ROOT_SWITCH; 100 } 101 case (']') : 102 { 103 mark(); 104 consume(); 105 token = symbol( Types.RIGHT_SQUARE_BRACKET ); 106 break ROOT_SWITCH; 107 } 108 case ('(') : 109 { 110 mark(); 111 consume(); 112 token = symbol( Types.LEFT_PARENTHESIS ); 113 break ROOT_SWITCH; 114 } 115 case (')') : 116 { 117 mark(); 118 consume(); 119 token = symbol( Types.RIGHT_PARENTHESIS ); 120 break ROOT_SWITCH; 121 } 122 case ('#') : 123 { 124 consume(); 125 126 token = symbol( Types.NEWLINE, -1 ); 127 128 CONSUME_LOOP : while( true ) 129 { 130 switch (c = la()) 131 { 132 case ('\r') : 133 case ('\n') : 134 { 135 readEOL(); 136 break CONSUME_LOOP; 137 } 138 case CharStream.EOS : 139 { 140 break CONSUME_LOOP; 141 } 142 default : 143 { 144 consume(); 145 } 146 } 147 } 148 break ROOT_SWITCH; 149 } 150 case ('/') : 151 { 152 mark(); 153 consume(); 154 155 c = la(); 156 157 MULTICHAR_SWITCH : switch (c) 158 { 159 case ('=') : 160 { 161 consume(); 162 token = symbol( Types.DIVIDE_EQUAL ); 163 break MULTICHAR_SWITCH; 164 } 165 case ('/') : 166 { 167 consume(); 168 token = symbol( Types.NEWLINE, -2 ); 169 170 CONSUME_LOOP : while (true) 171 { 172 switch (c = la()) 173 { 174 case ('\r') : 175 case ('\n') : 176 { 177 readEOL(); 178 break CONSUME_LOOP; 179 } 180 case CharStream.EOS : 181 { 182 break CONSUME_LOOP; 183 } 184 default : 185 { 186 consume(); 187 } 188 } 189 } 190 break MULTICHAR_SWITCH; 191 } 192 case ('*') : 193 { 194 CONSUME_LOOP : while (true) 195 { 196 CONSUME_SWITCH : switch (c = la()) 197 { 198 case ('*') : 199 { 200 consume(); 201 if (la() == '/') 202 { 203 consume(); 204 break CONSUME_LOOP; 205 } 206 break CONSUME_SWITCH; 207 } 208 case ('\r') : 209 case ('\n') : 210 { 211 readEOL(); 212 break CONSUME_SWITCH; 213 } 214 case CharStream.EOS : 215 { 216 break CONSUME_LOOP; 217 } 218 default : 219 { 220 consume(); 221 } 222 } 223 } 224 token = null; 225 break MULTICHAR_SWITCH; 226 } 227 default : 228 { 229 token = symbol( Types.DIVIDE ); 230 break MULTICHAR_SWITCH; 231 } 232 } 233 break ROOT_SWITCH; 234 } 235 case ('%') : 236 { 237 mark(); 238 consume(); 239 240 c = la(); 241 242 MULTICHAR_SWITCH : switch (c) 243 { 244 case ('=') : 245 { 246 consume(); 247 token = symbol( Types.MOD_EQUAL ); 248 break MULTICHAR_SWITCH; 249 } 250 default : 251 { 252 token = symbol( Types.MOD ); 253 break MULTICHAR_SWITCH; 254 } 255 } 256 break ROOT_SWITCH; 257 } 258 case ('\\') : 259 { 260 mark(); 261 consume(); 262 263 c = la(); 264 265 MULTICHAR_SWITCH : switch (c) 266 { 267 case ('=') : 268 { 269 consume(); 270 token = symbol( Types.INTDIV_EQUAL ); 271 break MULTICHAR_SWITCH; 272 } 273 default : 274 { 275 token = symbol( Types.INTDIV ); 276 break MULTICHAR_SWITCH; 277 } 278 } 279 break ROOT_SWITCH; 280 } 281 case ('~') : 282 { 283 mark(); 284 consume(); 285 286 token = symbol( Types.REGEX_PATTERN ); 287 break ROOT_SWITCH; 288 } 289 case ('!') : 290 { 291 mark(); 292 consume(); 293 294 c = la(); 295 296 MULTICHAR_SWITCH : switch (c) 297 { 298 case ('=') : 299 { 300 consume(); 301 if( la() == '=' ) 302 { 303 consume(); 304 token = symbol( Types.COMPARE_NOT_IDENTICAL ); 305 } 306 else 307 { 308 token = symbol( Types.COMPARE_NOT_EQUAL ); 309 } 310 break MULTICHAR_SWITCH; 311 } 312 default : 313 { 314 token = symbol( Types.NOT ); 315 break MULTICHAR_SWITCH; 316 } 317 } 318 break ROOT_SWITCH; 319 } 320 case ('=') : 321 { 322 mark(); 323 consume(); 324 325 c = la(); 326 327 MULTICHAR_SWITCH : switch (c) 328 { 329 case ('=') : 330 { 331 consume(); 332 c = la(); 333 334 switch (c) 335 { 336 case '=' : 337 { 338 consume(); 339 token = symbol( Types.COMPARE_IDENTICAL ); 340 break; 341 } 342 case '~' : 343 { 344 consume(); 345 token = symbol( Types.MATCH_REGEX ); 346 break; 347 } 348 default : 349 { 350 token = symbol( Types.COMPARE_EQUAL ); 351 } 352 } 353 break MULTICHAR_SWITCH; 354 } 355 case '~' : 356 { 357 consume(); 358 token = symbol( Types.FIND_REGEX ); 359 break MULTICHAR_SWITCH; 360 } 361 default : 362 { 363 token = symbol( Types.EQUAL ); 364 break MULTICHAR_SWITCH; 365 } 366 } 367 break ROOT_SWITCH; 368 } 369 case ('&') : 370 { 371 mark(); 372 consume(); 373 374 c = la(); 375 376 MULTICHAR_SWITCH : switch (c) 377 { 378 case ('&') : 379 { 380 consume(); 381 382 if( la() == '=' ) 383 { 384 consume(); 385 token = symbol( Types.LOGICAL_AND_EQUAL ); 386 } 387 else 388 { 389 token = symbol( Types.LOGICAL_AND ); 390 } 391 392 break MULTICHAR_SWITCH; 393 } 394 default : 395 { 396 unexpected( c, new char[] { '&' }, 1 ); 397 } 398 } 399 break ROOT_SWITCH; 400 } 401 case ('|') : 402 { 403 mark(); 404 consume(); 405 c = la(); 406 407 MULTICHAR_SWITCH : switch (c) 408 { 409 case ('|') : 410 { 411 consume(); 412 413 if( la() == '=' ) 414 { 415 consume(); 416 token = symbol( Types.LOGICAL_OR_EQUAL ); 417 } 418 else 419 { 420 token = symbol( Types.LOGICAL_OR ); 421 } 422 423 break MULTICHAR_SWITCH; 424 } 425 default : 426 { 427 token = symbol( Types.PIPE ); 428 break MULTICHAR_SWITCH; 429 } 430 } 431 break ROOT_SWITCH; 432 } 433 case ('+') : 434 { 435 mark(); 436 consume(); 437 438 c = la(); 439 440 MULTICHAR_SWITCH : switch (c) 441 { 442 case ('+') : 443 { 444 consume(); 445 token = symbol( Types.PLUS_PLUS ); 446 break MULTICHAR_SWITCH; 447 } 448 case ('=') : 449 { 450 consume(); 451 token = symbol( Types.PLUS_EQUAL ); 452 break MULTICHAR_SWITCH; 453 } 454 default : 455 { 456 token = symbol( Types.PLUS ); 457 break MULTICHAR_SWITCH; 458 } 459 } 460 break ROOT_SWITCH; 461 } 462 case ('-') : 463 { 464 mark(); 465 consume(); 466 467 c = la(); 468 469 MULTICHAR_SWITCH : switch (c) 470 { 471 case ('-') : 472 { 473 consume(); 474 token = symbol( Types.MINUS_MINUS ); 475 break MULTICHAR_SWITCH; 476 } 477 case ('=') : 478 { 479 consume(); 480 token = symbol( Types.MINUS_EQUAL ); 481 break MULTICHAR_SWITCH; 482 } 483 case ('>') : 484 { 485 consume(); 486 token = symbol( Types.NAVIGATE ); 487 break MULTICHAR_SWITCH; 488 } 489 default : 490 { 491 token = symbol( Types.MINUS ); 492 break MULTICHAR_SWITCH; 493 } 494 } 495 break ROOT_SWITCH; 496 } 497 case ('*') : 498 { 499 mark(); 500 consume(); 501 502 c = la(); 503 504 MULTICHAR_SWITCH : switch (c) 505 { 506 case ('=') : 507 { 508 consume(); 509 token = symbol( Types.MULTIPLY_EQUAL ); 510 break MULTICHAR_SWITCH; 511 } 512 default : 513 { 514 token = symbol( Types.MULTIPLY ); 515 break MULTICHAR_SWITCH; 516 } 517 } 518 break ROOT_SWITCH; 519 } 520 case (':') : 521 { 522 mark(); 523 consume(); 524 525 token = symbol( Types.COLON ); 526 break ROOT_SWITCH; 527 } 528 case (',') : 529 { 530 mark(); 531 consume(); 532 token = symbol( Types.COMMA ); 533 break ROOT_SWITCH; 534 } 535 case (';') : 536 { 537 mark(); 538 consume(); 539 token = symbol( Types.SEMICOLON ); 540 break ROOT_SWITCH; 541 } 542 case ('?') : 543 { 544 mark(); 545 consume(); 546 token = symbol( Types.QUESTION ); 547 break ROOT_SWITCH; 548 } 549 case ('<') : 550 { 551 mark(); 552 consume(); 553 554 c = la(); 555 556 MULTICHAR_SWITCH : switch (c) 557 { 558 case ('=') : 559 { 560 consume(); 561 c = la(); 562 if (c == '>') 563 { 564 consume(); 565 token = symbol( Types.COMPARE_TO ); 566 } 567 else 568 { 569 token = symbol( Types.COMPARE_LESS_THAN_EQUAL ); 570 } 571 break MULTICHAR_SWITCH; 572 } 573 case ('<') : 574 { 575 consume(); 576 c = la(); 577 578 584 if (c == '<') 585 { 586 consume(); 587 588 StringBuffer marker = new StringBuffer (); 589 while( (c = la()) != '\n' && c != '\r' && c != CharStream.EOS ) 590 { 591 marker.append( consume() ); 592 } 593 594 readEOL(); 595 596 Lexer child = new HereDocLexer( marker.toString() ); 597 delegate( child ); 598 599 gstringLexer.reset(); 600 child.delegate( gstringLexer ); 601 602 break ROOT_SWITCH; 603 } 604 else 605 { 606 token = symbol( Types.LEFT_SHIFT ); 607 break ROOT_SWITCH; 608 } 609 } 610 default : 611 { 612 token = symbol( Types.COMPARE_LESS_THAN ); 613 break MULTICHAR_SWITCH; 614 } 615 } 616 break ROOT_SWITCH; 617 } 618 case ('>') : 619 { 620 mark(); 621 consume(); 622 623 c = la(); 624 625 MULTICHAR_SWITCH : switch (c) 626 { 627 case ('=') : 628 { 629 consume(); 630 token = symbol( Types.COMPARE_GREATER_THAN_EQUAL ); 631 break MULTICHAR_SWITCH; 632 } 633 case ('>') : 634 { 635 consume(); 636 if( la() == '>' ) 637 { 638 consume(); 639 token = symbol( Types.RIGHT_SHIFT_UNSIGNED ); 640 } 641 else 642 { 643 token = symbol( Types.RIGHT_SHIFT ); 644 } 645 break MULTICHAR_SWITCH; 646 } 647 default : 648 { 649 token = symbol( Types.COMPARE_GREATER_THAN ); 650 break MULTICHAR_SWITCH; 651 } 652 } 653 break ROOT_SWITCH; 654 } 655 case ('\'') : 656 { 657 mark(); 658 659 stringLexer.reset(); 660 stringLexer.allowGStrings(false); 661 delegate( stringLexer ); 662 663 break ROOT_SWITCH; 664 } 665 case ('"') : 666 { 667 mark(); 668 669 stringLexer.reset(); 670 stringLexer.allowGStrings(true); 671 delegate( stringLexer ); 672 673 gstringLexer.reset(); 674 stringLexer.delegate( gstringLexer ); 675 676 break ROOT_SWITCH; 677 } 678 case ('0') : 679 case ('1') : 680 case ('2') : 681 case ('3') : 682 case ('4') : 683 case ('5') : 684 case ('6') : 685 case ('7') : 686 case ('8') : 687 case ('9') : 688 case ('.') : 689 { 690 mark(); 691 692 696 if( c == '.' && !Numbers.isDigit(la(2)) ) 697 { 698 consume(); 699 if( la() == '.' ) 700 { 701 consume(); 702 if( la() == '.' ) 703 { 704 consume(); 705 token = symbol( Types.DOT_DOT_DOT ); 706 } 707 else 708 { 709 token = symbol( Types.DOT_DOT ); 710 } 711 } 712 else 713 { 714 token = symbol( Types.DOT ); 715 } 716 break ROOT_SWITCH; 717 } 718 719 720 723 StringBuffer numericLiteral = new StringBuffer (); 724 boolean isDecimal = false; 725 726 727 731 char c2 = la(2); 732 if( c == '0' && (c2 == 'X' || c2 == 'x' || Numbers.isDigit(c2)) ) 733 { 734 numericLiteral.append( consume() ); 735 736 if( (c = la()) == 'X' || c == 'x' ) 737 { 738 numericLiteral.append( consume() ); 739 if( Numbers.isHexDigit(la()) ) 740 { 741 while( Numbers.isHexDigit(la()) ) 742 { 743 numericLiteral.append( consume() ); 744 } 745 } 746 else 747 { 748 unexpected( la(), numericLiteral.length(), "expected hexadecimal digit" ); 749 } 750 } 751 else 752 { 753 while( Numbers.isOctalDigit(la()) ) 754 { 755 numericLiteral.append( consume() ); 756 } 757 758 if( Numbers.isDigit(la()) ) 759 { 760 unexpected( la(), numericLiteral.length(), "expected octal digit" ); 761 } 762 } 763 } 764 765 766 769 else 770 { 771 while( Numbers.isDigit(la()) ) 772 { 773 numericLiteral.append( consume() ); 774 } 775 776 777 780 if( la() == '.' && Numbers.isDigit(la(2)) ) 781 { 782 isDecimal = true; 783 784 numericLiteral.append( consume() ); 785 while( Numbers.isDigit(la()) ) 786 { 787 numericLiteral.append( consume() ); 788 } 789 790 793 if( (c = la()) == 'e' || c == 'E' ) 794 { 795 numericLiteral.append( consume() ); 796 797 if (la() == '+' || la() == '-') 798 { 799 numericLiteral.append(consume()); 800 } 801 802 if( Numbers.isDigit(la()) ) 803 { 804 while( Numbers.isDigit(la()) ) 805 { 806 numericLiteral.append( consume() ); 807 } 808 } 809 else 810 { 811 unexpected( la(), numericLiteral.length(), "expected exponent" ); 812 } 813 } 814 } 815 } 816 817 818 821 if( Numbers.isNumericTypeSpecifier(la(), isDecimal) ) 822 { 823 numericLiteral.append( consume() ); 824 } 825 826 827 830 if( Character.isJavaIdentifierPart(c = la()) ) 831 { 832 unexpected( c, numericLiteral.length(), "expected end of numeric literal" ); 833 } 834 835 836 839 if( isDecimal ) 840 { 841 token = Token.newDecimal( numericLiteral.toString(), getStartLine(), getStartColumn() ); 842 } 843 else 844 { 845 token = Token.newInteger( numericLiteral.toString(), getStartLine(), getStartColumn() ); 846 } 847 848 break ROOT_SWITCH; 849 } 850 default : 851 { 852 mark(); 853 if (Character.isJavaIdentifierStart(c)) 854 { 855 StringBuffer identifier = new StringBuffer (); 856 857 IDENTIFIER_LOOP : while (true) 858 { 859 c = la(); 860 861 if (Character.isJavaIdentifierPart(c)) 862 { 863 identifier.append(consume()); 864 } 865 else 866 { 867 break IDENTIFIER_LOOP; 868 } 869 } 870 871 String text = identifier.toString(); 872 token = Token.newKeyword( text, getStartLine(), getStartColumn() ); 873 874 if (token == null) 875 { 876 token = Token.newIdentifier( text, getStartLine(), getStartColumn() ); 877 } 878 } 879 else 880 { 881 unexpected( c, 1 ); 882 } 883 884 break ROOT_SWITCH; 885 } 886 } 887 } 888 889 891 return token; 892 } 893 894 } 895 | Popular Tags |