1 19 20 package org.netbeans.modules.ruby.rhtml; 21 22 import org.netbeans.api.lexer.Token; 23 import org.netbeans.spi.lexer.Lexer; 24 import org.netbeans.spi.lexer.LexerInput; 25 import org.netbeans.spi.lexer.LexerRestartInfo; 26 import org.netbeans.spi.lexer.TokenFactory; 27 28 37 38 public final class RhtmlLexer implements Lexer<RhtmlTokenId> { 39 40 private static final int EOF = LexerInput.EOF; 41 42 private LexerInput input; 43 44 private TokenFactory<RhtmlTokenId> tokenFactory; 45 46 public Object state() { 47 return state + before_el_state * 1000; 48 } 49 50 private int state = INIT; 52 53 private int before_el_state = INIT; 58 59 private static final int INIT = 0; private static final int ISI_ERROR = 1; private static final int ISA_LT = 2; private static final int ISI_TAGNAME = 3; private static final int ISI_DIRNAME = 4; private static final int ISP_TAG = 5; private static final int ISP_DIR = 6; private static final int ISI_TAG_I_WS = 7; private static final int ISI_DIR_I_WS = 8; private static final int ISI_ENDTAG = 9; private static final int ISI_TAG_ATTR = 10; private static final int ISI_DIR_ATTR = 11; private static final int ISP_TAG_EQ = 12; private static final int ISP_DIR_EQ = 13; private static final int ISI_TAG_STRING = 14; private static final int ISI_DIR_STRING = 15; private static final int ISI_TAG_STRING_B = 16; private static final int ISI_DIR_STRING_B = 17; private static final int ISI_TAG_STRING2 = 18; private static final int ISI_DIR_STRING2 = 19; private static final int ISI_TAG_STRING2_B = 20; private static final int ISI_DIR_STRING2_B = 21; private static final int ISA_ENDSLASH = 22; private static final int ISA_ENDPC = 23; private static final int ISA_LT_PC = 24; private static final int ISI_JSP_COMMENT = 25; 90 private static final int ISI_JSP_COMMENT_M = 26; private static final int ISI_JSP_COMMENT_MM = 27; private static final int ISI_JSP_COMMENT_MMP = 28; private static final int ISI_TAG_ERROR = 30; private static final int ISI_DIR_ERROR = 31; private static final int ISI_DIR_ERROR_P = 32; 101 private static final int ISA_LT_PC_AT = 33; private static final int ISA_LT_SLASH = 34; private static final int ISA_LT_PC_DASH = 35; 105 private static final int ISI_SCRIPTLET = 36; private static final int ISP_SCRIPTLET_PC = 37; 108 110 private static final int ISA_EL_DELIM = 38; private static final int ISI_EL = 39; 114 public RhtmlLexer(LexerRestartInfo<RhtmlTokenId> info) { 115 this.input = info.input(); 116 this.tokenFactory = info.tokenFactory(); 117 if (info.state() == null) { 118 this.state = INIT; 119 } else { 120 int encoded = ((Integer ) info.state()).intValue(); 121 before_el_state = encoded / 1000; 122 state = encoded % 1000; 123 } 124 } 125 126 public boolean isIdentifierPart(char ch) { 127 return Character.isJavaIdentifierPart(ch); 128 } 129 130 private Token<RhtmlTokenId> token(RhtmlTokenId id) { 131 if(input.readLength() == 0) { 133 new Exception ("Error - token length is zero!; state = " + state).printStackTrace(); 134 } 135 Token<RhtmlTokenId> t = tokenFactory.createToken(id); 136 return t; 138 } 139 140 141 protected boolean isJspTag(String tagName) { 142 boolean canBeJsp = tagName.startsWith("jsp:"); return canBeJsp; 145 } 146 147 148 private boolean followsJspTag() { 149 int actChar; 150 int prev_read = input.readLength(); int read = 0; 152 while(true) { 153 actChar = input.read(); 154 read++; 155 if(!(Character.isLetter(actChar) || 156 Character.isDigit(actChar) || 157 (actChar == '_') || 158 (actChar == '-') || 159 (actChar == ':') || 160 (actChar == '.')) || 161 (actChar == EOF)) { String tagName = input.readText().toString().substring(prev_read); 164 input.backup(read); return isJspTag(tagName); 166 } 167 } 168 } 169 170 public Token<RhtmlTokenId> nextToken() { 171 int actChar; 172 while (true) { 173 actChar = input.read(); 174 175 if (actChar == EOF) { 176 if(input.readLengthEOF() == 1) { 177 return null; } else { 179 input.backup(1); break; 183 } 184 } 185 186 switch (state) { 187 case INIT: 188 switch (actChar) { 189 case '<': 192 state = ISA_LT; 193 break; 194 case '$': 198 case '#': before_el_state = state; state = ISA_EL_DELIM; 201 break; 202 } 203 break; 204 205 case ISA_EL_DELIM: 206 switch(actChar) { 207 case '{': 208 if(input.readLength() > 2) { 209 input.backup(2); state = before_el_state; before_el_state = INIT; 213 return token(RhtmlTokenId.TEXT); } 215 state = ISI_EL; 216 break; 217 default: 218 state = before_el_state; 219 before_el_state = INIT; 220 } 221 break; 222 223 case ISI_EL: 224 if(actChar == '}') { 225 state = before_el_state; 227 before_el_state = INIT; 228 return token(RhtmlTokenId.EL); 229 } 230 break; 232 233 case ISA_LT: 234 if (Character.isLetter(actChar) || 235 (actChar == '_') 236 ) { input.backup(1); if(followsJspTag()) { if(input.readLength() > 1) { 240 input.backup(1); state = INIT; return token(RhtmlTokenId.TEXT); } 245 state = ISI_TAGNAME; 246 break; 247 } else { 248 state = INIT; 250 break; 251 } 252 } 255 256 switch (actChar) { 257 case '/': 258 state = ISA_LT_SLASH; 259 break; 260 case '%': 265 state = ISA_LT_PC; 266 break; 267 default: 268 state = INIT; } 272 break; 273 274 case ISA_LT_SLASH: 275 if (Character.isLetter(actChar) || 276 (actChar == '_')) { 277 input.backup(1); if(followsJspTag()) { 280 if(input.readLength() > 2) { 281 input.backup(2); 283 state = INIT; 284 return token(RhtmlTokenId.TEXT); 285 } else { 286 state = ISI_ENDTAG; 287 } 288 break; 289 } else { 290 state = INIT; 292 break; 293 } 294 } 295 296 state = ISI_TAG_ERROR; 298 break; 299 300 case ISI_TAGNAME: 301 case ISI_DIRNAME: 302 303 if (!(Character.isLetter(actChar) || 304 Character.isDigit(actChar) || 305 (actChar == '_') || 306 (actChar == '-') || 307 (actChar == ':') || 308 (actChar == '.'))) { switch(actChar) { 310 case '<': 311 state = INIT; 312 input.backup(1); 313 break; 314 case '/': 315 input.backup(1); 316 state = ((state == ISI_TAGNAME) ? ISP_TAG : ISP_DIR); 317 break; 318 case '>': 319 state = INIT; 320 break; 321 case ' ': 322 input.backup(1); 323 state = ((state == ISI_TAGNAME) ? ISP_TAG : ISP_DIR); 324 break; 325 default: 326 state = ((state == ISI_TAGNAME) ? ISP_TAG : ISP_DIR); 327 } 328 return token(RhtmlTokenId.TAG); 329 } 330 break; 331 332 case ISP_TAG: 333 case ISP_DIR: 334 if (Character.isLetter(actChar) || 335 (actChar == '_') 336 ) { 337 state = ((state == ISP_TAG) ? ISI_TAG_ATTR : ISI_DIR_ATTR); 338 break; 339 } 340 switch (actChar) { 341 case '\n': 342 return token(RhtmlTokenId.EOL); 344 case '>': if (state == ISP_TAG) { 350 state = INIT; 353 return token(RhtmlTokenId.SYMBOL); 354 } else { break; 362 } 363 case '/': if (state == ISP_TAG) { 365 state = ISA_ENDSLASH; 367 break; 368 } else { break; 376 } 377 case '%': if (state == ISP_DIR) { 379 state = ISA_ENDPC; 381 break; 382 } else { state = ISI_TAG_ERROR; 388 break; 389 } 390 case '=': 391 state = ((state == ISP_TAG) ? ISP_TAG_EQ : ISP_DIR_EQ); 392 return token(RhtmlTokenId.SYMBOL); 393 case ' ': 394 case '\t': 395 state = ((state == ISP_TAG) ? ISI_TAG_I_WS : ISI_DIR_I_WS); 396 break; 397 case '<': state = INIT; 400 input.backup(1); 401 return token(RhtmlTokenId.TAG); 402 default: state = ((state == ISP_TAG) ? ISI_TAG_ERROR : ISI_DIR_ERROR); 404 break; 405 } 406 break; 407 408 case ISI_TAG_I_WS: 409 case ISI_DIR_I_WS: 410 switch (actChar) { 411 case ' ': 412 case '\t': 413 break; 414 case '<': state = INIT; 417 input.backup(1); 418 return token(RhtmlTokenId.TAG); 419 default: 420 state = ((state == ISI_TAG_I_WS) ? ISP_TAG : ISP_DIR); 421 input.backup(1); 422 return token(RhtmlTokenId.WHITESPACE); 423 } 424 break; 425 426 case ISI_ENDTAG: 427 if (!(Character.isLetter(actChar) || 428 Character.isDigit(actChar) || 429 (actChar == '_') || 430 (actChar == '-') || 431 (actChar == ':')) 432 ) { state = ISP_TAG; 434 input.backup(1); 435 return token(RhtmlTokenId.TAG); 436 } 437 break; 438 439 case ISI_TAG_ATTR: 440 case ISI_DIR_ATTR: 441 if (!(Character.isLetter(actChar) || 442 Character.isDigit(actChar) || 443 (actChar == '_') || 444 (actChar == ':') || 445 (actChar == '-')) 446 ) { state = ((state == ISI_TAG_ATTR) ? ISP_TAG : ISP_DIR); 448 input.backup(1); 449 return token(RhtmlTokenId.ATTRIBUTE); 450 } 451 break; 452 453 case ISP_TAG_EQ: 454 case ISP_DIR_EQ: 455 switch (actChar) { 456 case '\n': 457 return token(RhtmlTokenId.EOL); 459 case '"': 464 state = ((state == ISP_TAG_EQ) ? ISI_TAG_STRING : ISI_DIR_STRING); 465 break; 466 case '\'': 467 state = ((state == ISP_TAG_EQ) ? ISI_TAG_STRING2 : ISI_DIR_STRING2); 468 break; 469 case ' ': 470 case '\t': 471 break; 473 default: 474 state = ((state == ISP_TAG_EQ) ? ISP_TAG : ISP_DIR); 475 input.backup(1); 476 break; 478 } 479 break; 480 481 case ISI_TAG_STRING: 482 case ISI_DIR_STRING: 483 case ISI_TAG_STRING2: 484 case ISI_DIR_STRING2: 485 if ((actChar == '"') && ((state == ISI_TAG_STRING) || (state == ISI_DIR_STRING))) { 486 state = ((state == ISI_TAG_STRING) ? ISP_TAG : ISP_DIR); 487 return token(RhtmlTokenId.ATTR_VALUE); 488 } 489 490 if ((actChar == '\'') && ((state == ISI_TAG_STRING2) || (state == ISI_DIR_STRING2))) { 491 state = ((state == ISI_TAG_STRING2) ? ISP_TAG : ISP_DIR); 492 return token(RhtmlTokenId.ATTR_VALUE); 493 } 494 495 switch (actChar) { 496 case '\\': 497 switch (state) { 498 case ISI_TAG_STRING: 499 state = ISI_TAG_STRING_B; 500 break; 501 case ISI_DIR_STRING: 502 state = ISI_DIR_STRING_B; 503 break; 504 case ISI_TAG_STRING2: 505 state = ISI_TAG_STRING2_B; 506 break; 507 case ISI_DIR_STRING2: 508 state = ISI_DIR_STRING2_B; 509 break; 510 } 511 break; 512 case '\n': 513 return token(RhtmlTokenId.EOL); 515 case '$': 521 case '#': 522 before_el_state = state; state = ISA_EL_DELIM; 524 break; 525 526 default: 527 } 529 break; 530 531 case ISI_TAG_STRING_B: 532 case ISI_DIR_STRING_B: 533 case ISI_TAG_STRING2_B: 534 case ISI_DIR_STRING2_B: 535 switch (actChar) { 536 case '"': 537 case '\'': 538 case '\\': 539 break; 540 default: 541 input.backup(1); 542 break; 543 } 544 switch (state) { 545 case ISI_TAG_STRING_B: 546 state = ISI_TAG_STRING; 547 break; 548 case ISI_DIR_STRING_B: 549 state = ISI_DIR_STRING; 550 break; 551 case ISI_TAG_STRING2_B: 552 state = ISI_TAG_STRING2; 553 break; 554 case ISI_DIR_STRING2_B: 555 state = ISI_DIR_STRING2; 556 break; 557 } 558 break; 559 560 case ISA_ENDSLASH: 561 switch (actChar) { 562 case '>': 563 state = INIT; 565 return token(RhtmlTokenId.SYMBOL); 566 case '\n': 567 state = ISI_TAG_ERROR; 568 input.backup(1); 569 return token(RhtmlTokenId.SYMBOL); 570 default: 571 state = ISP_TAG; 572 input.backup(1); 573 return token(RhtmlTokenId.SYMBOL); 574 } 575 577 case ISA_ENDPC: 578 switch (actChar) { 579 case '>': 580 state = INIT; 582 return token(RhtmlTokenId.SYMBOL); 583 case '\n': 584 state = ISI_DIR_ERROR; 585 input.backup(1); 586 return token(RhtmlTokenId.SYMBOL); 587 default: 588 state = ISP_DIR; 589 input.backup(1); 590 return token(RhtmlTokenId.SYMBOL); 591 } 592 594 case ISA_LT_PC: 595 switch (actChar) { 596 case '@': 597 if(input.readLength() == 3) { 598 state = ISA_LT_PC_AT; 600 return token(RhtmlTokenId.SYMBOL); 601 } else { 602 input.backup(3); state = INIT; 605 return token(RhtmlTokenId.TEXT); } 607 case '-': state = ISA_LT_PC_DASH; 609 break; 610 case '!': case '=': if(input.readLength() == 3) { 613 state = ISI_SCRIPTLET; 615 return token(RhtmlTokenId.SYMBOL2); 616 } else { 617 input.backup(3); state = INIT; 620 return token(RhtmlTokenId.TEXT); } 622 default: if(input.readLength() == 3) { 624 state = ISI_SCRIPTLET; 626 input.backup(1); return token(RhtmlTokenId.SYMBOL2); 628 } else { 629 input.backup(3); state = INIT; 632 return token(RhtmlTokenId.TEXT); } 634 } 635 break; 636 637 case ISI_SCRIPTLET: 638 switch(actChar) { 639 case '%': 640 state = ISP_SCRIPTLET_PC; 641 break; 642 } 643 break; 644 645 case ISP_SCRIPTLET_PC: 646 switch(actChar) { 647 case '>': 648 if(input.readLength() == 2) { 649 state = INIT; 651 return token(RhtmlTokenId.SYMBOL2); 652 } else { 653 input.backup(2); state = ISI_SCRIPTLET; 656 return token(RhtmlTokenId.SCRIPTLET); 657 } 658 default: 659 state = ISI_SCRIPTLET; 660 break; 661 } 662 break; 663 664 case ISA_LT_PC_DASH: 665 switch(actChar) { 666 case '-': 667 if(input.readLength() == 4) { 668 state = ISI_JSP_COMMENT; 670 } else { 671 input.backup(4); state = INIT; 674 return token(RhtmlTokenId.TEXT); 675 } 676 break; 677 default: 678 state = INIT; return token(RhtmlTokenId.TEXT); } 682 683 case ISI_JSP_COMMENT: 685 switch (actChar) { 686 case '\n': 687 if (input.readLength() == 1) { return token(RhtmlTokenId.EOL); 689 } else { input.backup(1); 691 return token(RhtmlTokenId.COMMENT); 692 } 693 case '-': 694 state = ISI_JSP_COMMENT_M; 695 break; 696 } 697 break; 698 699 case ISI_JSP_COMMENT_M: 700 switch (actChar) { 701 case '\n': 702 state = ISI_JSP_COMMENT; 703 if (input.readLength() == 1) { return token(RhtmlTokenId.EOL); 705 } else { input.backup(1); 707 return token(RhtmlTokenId.COMMENT); 708 } 709 case '-': 710 state = ISI_JSP_COMMENT_MM; 711 break; 712 default: 713 state = ISI_JSP_COMMENT; 714 break; 715 } 716 break; 717 718 case ISI_JSP_COMMENT_MM: 719 switch (actChar) { 720 case '\n': 721 state = ISI_JSP_COMMENT; 722 if (input.readLength() == 1) { return token(RhtmlTokenId.EOL); 724 } else { input.backup(1); 726 return token(RhtmlTokenId.COMMENT); 727 } 728 case '%': 729 state = ISI_JSP_COMMENT_MMP; 730 break; 731 case '-': 732 state = ISI_JSP_COMMENT_MM; 733 break; 734 default: 735 state = ISI_JSP_COMMENT; 736 break; 737 } 738 break; 739 740 case ISI_JSP_COMMENT_MMP: 741 switch (actChar) { 742 case '\n': 743 state = ISI_JSP_COMMENT; 744 if (input.readLength() == 1) { return token(RhtmlTokenId.EOL); 746 } else { input.backup(1); 748 return token(RhtmlTokenId.COMMENT); 749 } 750 case '>': 751 state = INIT; 753 return token(RhtmlTokenId.COMMENT); 754 default: 755 state = ISI_JSP_COMMENT; 756 break; 757 } 758 break; 759 760 case ISI_ERROR: 761 switch (actChar) { 762 case '\n': 763 state = INIT; 764 input.backup(1); 765 return token(RhtmlTokenId.ERROR); 766 case '<': 767 state = ISA_LT; 768 input.backup(1); 769 return token(RhtmlTokenId.ERROR); 770 } 771 break; 772 773 case ISI_TAG_ERROR: 774 switch (actChar) { 775 case '\n': 776 if (input.readLength() == 1) { state = ISI_TAG_I_WS; 778 return token(RhtmlTokenId.EOL); 779 } else { return token(RhtmlTokenId.ERROR); 782 } 783 case '>': 784 case ' ': 785 case '\t': 786 state = ISP_TAG; 787 input.backup(1); 788 return token(RhtmlTokenId.ERROR); 789 } 790 break; 791 792 case ISI_DIR_ERROR: 793 switch (actChar) { 794 case '\n': 795 if (input.readLength() == 1) { state = ISI_DIR_I_WS; 797 return token(RhtmlTokenId.EOL); 798 } else { return token(RhtmlTokenId.ERROR); 801 } 802 case '%': 803 case '\t': 804 case ' ': 805 state = ISP_DIR; 806 input.backup(1); 807 return token(RhtmlTokenId.ERROR); 808 } 809 break; 810 811 case ISI_DIR_ERROR_P: 812 switch (actChar) { 813 case '\n': 814 if (input.readLength() == 1) { state = ISI_DIR_I_WS; 816 return token(RhtmlTokenId.EOL); 817 } else { input.backup(1); 819 return token(RhtmlTokenId.ERROR); 820 } 821 case '>': 822 input.backup(2); 823 state = ISI_DIR_I_WS; 824 return token(RhtmlTokenId.ERROR); 825 } 826 break; 827 828 838 case ISA_LT_PC_AT: 840 if (Character.isLetter(actChar) || 841 (actChar == '_') 842 ) { state = ISI_DIRNAME; 844 } 848 849 switch (actChar) { 850 case '\n': 851 if (input.readLength() == 1) { return token(RhtmlTokenId.EOL); 853 } else { 854 input.backup(1); 855 return token(RhtmlTokenId.TAG); 856 } 857 } 858 break; 859 860 } 861 862 } 863 864 868 switch(state) { 869 case INIT: 870 if (input.readLength() == 0) { 871 return null; 872 } else { 873 return token(RhtmlTokenId.TEXT); 874 } 875 case ISI_ERROR: 876 case ISI_TAG_ERROR: 877 state = INIT; 878 return token(RhtmlTokenId.ERROR); 879 case ISI_DIR_ERROR: 880 case ISI_DIR_ERROR_P: 881 state = INIT; 882 return token(RhtmlTokenId.ERROR); 883 case ISA_LT: 884 case ISA_LT_SLASH: 885 case ISA_ENDSLASH: 886 case ISP_TAG_EQ: 887 state = INIT; 888 return token(RhtmlTokenId.SYMBOL); 889 case ISA_LT_PC: 890 case ISA_LT_PC_DASH: 891 case ISA_ENDPC: 892 case ISP_DIR_EQ: 893 state = INIT; 894 return token(RhtmlTokenId.SYMBOL); 895 case ISI_TAGNAME: 896 case ISI_ENDTAG: 897 state = INIT; 898 return token(RhtmlTokenId.TAG); 899 case ISI_DIRNAME: 900 state = INIT; 901 return token(RhtmlTokenId.TAG); 902 case ISP_TAG: 903 case ISI_TAG_I_WS: 904 state = INIT; 905 return token(RhtmlTokenId.TAG); 906 case ISP_DIR: 907 case ISI_DIR_I_WS: 908 case ISA_LT_PC_AT: 909 state = INIT; 910 return token(RhtmlTokenId.TAG); 911 case ISI_TAG_ATTR: 912 state = INIT; 913 return token(RhtmlTokenId.ATTRIBUTE); 914 case ISI_DIR_ATTR: 915 state = INIT; 916 return token(RhtmlTokenId.ATTRIBUTE); 917 case ISI_TAG_STRING: 918 case ISI_TAG_STRING_B: 919 case ISI_TAG_STRING2: 920 case ISI_TAG_STRING2_B: 921 state = INIT; 922 return token(RhtmlTokenId.ATTR_VALUE); 923 case ISI_DIR_STRING: 924 case ISI_DIR_STRING_B: 925 case ISI_DIR_STRING2: 926 case ISI_DIR_STRING2_B: 927 state = INIT; 928 return token(RhtmlTokenId.ATTR_VALUE); 929 case ISI_JSP_COMMENT: 930 case ISI_JSP_COMMENT_M: 931 case ISI_JSP_COMMENT_MM: 932 case ISI_JSP_COMMENT_MMP: 933 state = INIT; 934 return token(RhtmlTokenId.COMMENT); 935 case ISA_EL_DELIM: 936 state = INIT; 937 return token(RhtmlTokenId.TEXT); 938 case ISI_EL: 939 state = INIT; 940 return token(RhtmlTokenId.EL); 941 case ISP_SCRIPTLET_PC: 942 state = INIT; 943 return token(RhtmlTokenId.SYMBOL2); 944 case ISI_SCRIPTLET: 945 state = INIT; 946 return token(RhtmlTokenId.SCRIPTLET); 947 948 default: 949 System.out.println("RhtmlLexer - unhandled state : " + state); } 951 952 return null; 953 954 } 955 956 public void release() { 957 } 958 } 959 960 | Popular Tags |