1 61 62 63 64 65 package org.jaxen.saxpath.base; 66 67 class XPathLexer 68 { 69 private String xpath; 70 private int currentPosition; 71 private int endPosition; 72 73 private Token previousToken; 74 75 XPathLexer(String xpath) 76 { 77 setXPath( xpath ); 78 } 79 80 private void setXPath(String xpath) 81 { 82 this.xpath = xpath; 83 this.currentPosition = 0; 84 this.endPosition = xpath.length(); 85 } 86 87 String getXPath() 88 { 89 return this.xpath; 90 } 91 92 Token nextToken() 93 { 94 Token token = null; 95 96 do 97 { 98 token = null; 99 100 switch ( LA(1) ) 101 { 102 case '$': 103 { 104 token = dollar(); 105 break; 106 } 107 108 case '"': 109 case '\'': 110 { 111 token = literal(); 112 break; 113 } 114 115 case '/': 116 { 117 token = slashes(); 118 break; 119 } 120 121 case ',': 122 { 123 token = comma(); 124 break; 125 } 126 127 case '(': 128 { 129 token = leftParen(); 130 break; 131 } 132 133 case ')': 134 { 135 token = rightParen(); 136 break; 137 } 138 139 case '[': 140 { 141 token = leftBracket(); 142 break; 143 } 144 145 case ']': 146 { 147 token = rightBracket(); 148 break; 149 } 150 151 case '+': 152 { 153 token = plus(); 154 break; 155 } 156 157 case '-': 158 { 159 token = minus(); 160 break; 161 } 162 163 case '<': 164 case '>': 165 { 166 token = relationalOperator(); 167 break; 168 } 169 170 case '=': 171 { 172 token = equals(); 173 break; 174 } 175 176 case '!': 177 { 178 if ( LA(2) == '=' ) 179 { 180 token = notEquals(); 181 } 182 else 183 { 184 token = not(); 185 } 186 break; 187 } 188 189 case '|': 190 { 191 token = pipe(); 192 break; 193 } 194 195 case '@': 196 { 197 token = at(); 198 break; 199 } 200 201 case ':': 202 { 203 if ( LA(2) == ':' ) 204 { 205 token = doubleColon(); 206 } 207 else 208 { 209 token = colon(); 210 } 211 break; 212 } 213 214 case '*': 215 { 216 token = star(); 217 break; 218 } 219 220 case '.': 221 { 222 switch ( LA(2) ) 223 { 224 case '0': 225 case '1': 226 case '2': 227 case '3': 228 case '4': 229 case '5': 230 case '6': 231 case '7': 232 case '8': 233 case '9': 234 { 235 token = number(); 236 break; 237 } 238 default: 239 { 240 token = dots(); 241 break; 242 } 243 } 244 break; 245 } 246 247 case '0': 248 case '1': 249 case '2': 250 case '3': 251 case '4': 252 case '5': 253 case '6': 254 case '7': 255 case '8': 256 case '9': 257 { 258 token = number(); 259 break; 260 } 261 262 case ' ': 263 case '\t': 264 case '\n': 265 case '\r': 266 { 267 token = whitespace(); 268 break; 269 } 270 271 default: 272 { 273 if ( isIdentifierStartChar( LA(1) ) ) 274 { 275 token = identifierOrOperatorName(); 276 } 277 } 278 } 279 280 if ( token == null ) 281 { 282 if (!hasMoreChars()) 283 { 284 token = new Token( TokenTypes.EOF, 285 getXPath(), 286 currentPosition(), 287 endPosition() ); 288 } 289 else 290 { 291 token = new Token( TokenTypes.ERROR, 292 getXPath(), 293 currentPosition(), 294 endPosition() ); 295 } 296 } 297 298 } 299 while ( token.getTokenType() == TokenTypes.SKIP ); 300 301 setPreviousToken( token ); 302 303 return token; 304 } 305 306 private Token identifierOrOperatorName() 307 { 308 Token token = null; 309 310 if ( previousToken != null ) 311 { 312 323 switch ( previousToken.getTokenType() ) 324 { 325 case TokenTypes.AT: 326 case TokenTypes.DOUBLE_COLON: 327 case TokenTypes.LEFT_PAREN: 328 case TokenTypes.LEFT_BRACKET: 329 case TokenTypes.AND: 330 case TokenTypes.OR: 331 case TokenTypes.MOD: 332 case TokenTypes.DIV: 333 case TokenTypes.COLON: 334 case TokenTypes.SLASH: 335 case TokenTypes.DOUBLE_SLASH: 336 case TokenTypes.PIPE: 337 case TokenTypes.DOLLAR: 338 case TokenTypes.PLUS: 339 case TokenTypes.MINUS: 340 case TokenTypes.STAR: 341 case TokenTypes.COMMA: 342 case TokenTypes.LESS_THAN_SIGN: 343 case TokenTypes.GREATER_THAN_SIGN: 344 case TokenTypes.LESS_THAN_OR_EQUALS_SIGN: 345 case TokenTypes.GREATER_THAN_OR_EQUALS_SIGN: 346 case TokenTypes.EQUALS: 347 case TokenTypes.NOT_EQUALS: 348 { 349 token = identifier(); 350 break; 351 } 352 default: 353 { 354 token = operatorName(); 355 break; 356 } 357 } 358 } 359 else 360 { 361 token = identifier(); 362 } 363 364 return token; 365 } 366 367 private Token identifier() 368 { 369 Token token = null; 370 371 int start = currentPosition(); 372 373 while ( hasMoreChars() ) 374 { 375 if ( isIdentifierChar( LA(1) ) ) 376 { 377 consume(); 378 } 379 else 380 { 381 break; 382 } 383 } 384 385 token = new Token( TokenTypes.IDENTIFIER, 386 getXPath(), 387 start, 388 currentPosition() ); 389 390 return token; 391 } 392 393 private Token operatorName() 394 { 395 Token token = null; 396 397 switch ( LA(1) ) 398 { 399 case 'a': 400 { 401 token = and(); 402 break; 403 } 404 405 case 'o': 406 { 407 token = or(); 408 break; 409 } 410 411 case 'm': 412 { 413 token = mod(); 414 break; 415 } 416 417 case 'd': 418 { 419 token = div(); 420 break; 421 } 422 } 423 424 return token; 425 } 426 427 private Token mod() 428 { 429 Token token = null; 430 431 if ( ( LA(1) == 'm' ) 432 && 433 ( LA(2) == 'o' ) 434 && 435 ( LA(3) == 'd' ) 436 && 437 ( ! isIdentifierChar( LA(4) ) ) ) 438 { 439 token = new Token( TokenTypes.MOD, 440 getXPath(), 441 currentPosition(), 442 currentPosition()+3 ); 443 444 consume(); 445 consume(); 446 consume(); 447 } 448 449 return token; 450 } 451 452 private Token div() 453 { 454 Token token = null; 455 456 if ( ( LA(1) == 'd' ) 457 && 458 ( LA(2) == 'i' ) 459 && 460 ( LA(3) == 'v' ) 461 && 462 ( ! isIdentifierChar( LA(4) ) ) ) 463 { 464 token = new Token( TokenTypes.DIV, 465 getXPath(), 466 currentPosition(), 467 currentPosition()+3 ); 468 469 consume(); 470 consume(); 471 consume(); 472 } 473 474 return token; 475 } 476 477 private Token and() 478 { 479 Token token = null; 480 481 if ( ( LA(1) == 'a' ) 482 && 483 ( LA(2) == 'n' ) 484 && 485 ( LA(3) == 'd' ) 486 && 487 ( ! isIdentifierChar( LA(4) ) ) ) 488 { 489 token = new Token( TokenTypes.AND, 490 getXPath(), 491 currentPosition(), 492 currentPosition()+3 ); 493 494 consume(); 495 consume(); 496 consume(); 497 } 498 499 return token; 500 } 501 502 private Token or() 503 { 504 Token token = null; 505 506 if ( ( LA(1) == 'o' ) 507 && 508 ( LA(2) == 'r' ) 509 && 510 ( ! isIdentifierChar( LA(3) ) ) ) 511 { 512 token = new Token( TokenTypes.OR, 513 getXPath(), 514 currentPosition(), 515 currentPosition()+2 ); 516 517 consume(); 518 consume(); 519 } 520 521 return token; 522 } 523 524 private Token number() 525 { 526 int start = currentPosition(); 527 boolean periodAllowed = true; 528 529 loop: 530 while( true ) 531 { 532 switch ( LA(1) ) 533 { 534 case '.': 535 { 536 if ( periodAllowed ) 537 { 538 periodAllowed = false; 539 consume(); 540 } 541 else 542 { 543 break loop; 544 } 545 break; 546 } 547 548 case '0': 549 case '1': 550 case '2': 551 case '3': 552 case '4': 553 case '5': 554 case '6': 555 case '7': 556 case '8': 557 case '9': 558 { 559 consume(); 560 break; 561 } 562 default: 563 { 564 break loop; 565 } 566 } 567 } 568 569 Token token = null; 570 571 if ( periodAllowed ) 572 { 573 token = new Token( TokenTypes.INTEGER, 574 getXPath(), 575 start, 576 currentPosition() ); 577 } 578 else 579 { 580 token = new Token( TokenTypes.DOUBLE, 581 getXPath(), 582 start, 583 currentPosition() ); 584 } 585 586 return token; 587 } 588 589 private Token whitespace() 590 { 591 consume(); 592 593 loop: 594 while( hasMoreChars() ) 595 { 596 switch ( LA(1) ) 597 { 598 case ' ': 599 case '\t': 600 case '\n': 601 case '\r': 602 { 603 consume(); 604 break; 605 } 606 607 default: 608 { 609 break loop; 610 } 611 } 612 } 613 614 return new Token( TokenTypes.SKIP, 615 getXPath(), 616 0, 617 0 ); 618 } 619 620 private Token comma() 621 { 622 Token token = new Token( TokenTypes.COMMA, 623 getXPath(), 624 currentPosition(), 625 currentPosition()+1 ); 626 627 consume(); 628 629 return token; 630 } 631 632 private Token equals() 633 { 634 Token token = new Token( TokenTypes.EQUALS, 635 getXPath(), 636 currentPosition(), 637 currentPosition()+1 ); 638 639 consume(); 640 641 return token; 642 } 643 644 private Token minus() 645 { 646 Token token = new Token( TokenTypes.MINUS, 647 getXPath(), 648 currentPosition(), 649 currentPosition()+1 ); 650 consume(); 651 652 return token; 653 } 654 655 private Token plus() 656 { 657 Token token = new Token( TokenTypes.PLUS, 658 getXPath(), 659 currentPosition(), 660 currentPosition()+1 ); 661 consume(); 662 663 return token; 664 } 665 666 private Token dollar() 667 { 668 Token token = new Token( TokenTypes.DOLLAR, 669 getXPath(), 670 currentPosition(), 671 currentPosition()+1 ); 672 consume(); 673 674 return token; 675 } 676 677 private Token pipe() 678 { 679 Token token = new Token( TokenTypes.PIPE, 680 getXPath(), 681 currentPosition(), 682 currentPosition()+1 ); 683 684 consume(); 685 686 return token; 687 } 688 689 private Token at() 690 { 691 Token token = new Token( TokenTypes.AT, 692 getXPath(), 693 currentPosition(), 694 currentPosition()+1 ); 695 696 consume(); 697 698 return token; 699 } 700 701 private Token colon() 702 { 703 Token token = new Token( TokenTypes.COLON, 704 getXPath(), 705 currentPosition(), 706 currentPosition()+1 ); 707 consume(); 708 709 return token; 710 } 711 712 private Token doubleColon() 713 { 714 Token token = new Token( TokenTypes.DOUBLE_COLON, 715 getXPath(), 716 currentPosition(), 717 currentPosition()+2 ); 718 719 consume(); 720 consume(); 721 722 return token; 723 } 724 725 private Token not() 726 { 727 Token token = new Token( TokenTypes.NOT, 728 getXPath(), 729 currentPosition(), 730 currentPosition() + 1 ); 731 732 consume(); 733 734 return token; 735 } 736 737 private Token notEquals() 738 { 739 Token token = new Token( TokenTypes.NOT_EQUALS, 740 getXPath(), 741 currentPosition(), 742 currentPosition() + 2 ); 743 744 consume(); 745 consume(); 746 747 return token; 748 } 749 750 private Token relationalOperator() 751 { 752 Token token = null; 753 754 switch ( LA(1) ) 755 { 756 case '<': 757 { 758 if ( LA(2) == '=' ) 759 { 760 token = new Token( TokenTypes.LESS_THAN_OR_EQUALS_SIGN, 761 getXPath(), 762 currentPosition(), 763 currentPosition() + 2 ); 764 consume(); 765 } 766 else 767 { 768 token = new Token( TokenTypes.LESS_THAN_SIGN, 769 getXPath(), 770 currentPosition(), 771 currentPosition() + 1); 772 } 773 774 consume(); 775 break; 776 } 777 case '>': 778 { 779 if ( LA(2) == '=' ) 780 { 781 token = new Token( TokenTypes.GREATER_THAN_OR_EQUALS_SIGN, 782 getXPath(), 783 currentPosition(), 784 currentPosition() + 2 ); 785 consume(); 786 } 787 else 788 { 789 token = new Token( TokenTypes.GREATER_THAN_SIGN, 790 getXPath(), 791 currentPosition(), 792 currentPosition() + 1 ); 793 } 794 795 consume(); 796 break; 797 } 798 } 799 800 return token; 801 802 } 803 804 private Token star() 805 { 806 Token token = new Token( TokenTypes.STAR, 807 getXPath(), 808 currentPosition(), 809 currentPosition()+1 ); 810 811 consume(); 812 813 return token; 814 } 815 816 private Token literal() 817 { 818 Token token = null; 819 820 char match = LA(1); 821 822 consume(); 823 824 int start = currentPosition(); 825 826 while ( ( token == null ) 827 && 828 hasMoreChars() ) 829 { 830 if ( LA(1) == match ) 831 { 832 token = new Token( TokenTypes.LITERAL, 833 getXPath(), 834 start, 835 currentPosition() ); 836 } 837 consume(); 838 } 839 840 return token; 841 } 842 843 private Token dots() 844 { 845 Token token = null; 846 847 switch ( LA(2) ) 848 { 849 case '.': 850 { 851 token = new Token( TokenTypes.DOT_DOT, 852 getXPath(), 853 currentPosition(), 854 currentPosition()+2 ) ; 855 consume(); 856 consume(); 857 break; 858 } 859 default: 860 { 861 token = new Token( TokenTypes.DOT, 862 getXPath(), 863 currentPosition(), 864 currentPosition()+1 ); 865 consume(); 866 break; 867 } 868 } 869 870 return token; 871 } 872 873 private Token leftBracket() 874 { 875 Token token = new Token( TokenTypes.LEFT_BRACKET, 876 getXPath(), 877 currentPosition(), 878 currentPosition()+1 ); 879 880 consume(); 881 882 return token; 883 } 884 885 private Token rightBracket() 886 { 887 Token token = new Token( TokenTypes.RIGHT_BRACKET, 888 getXPath(), 889 currentPosition(), 890 currentPosition()+1 ); 891 892 consume(); 893 894 return token; 895 } 896 897 private Token leftParen() 898 { 899 Token token = new Token( TokenTypes.LEFT_PAREN, 900 getXPath(), 901 currentPosition(), 902 currentPosition()+1 ); 903 904 consume(); 905 906 return token; 907 } 908 909 private Token rightParen() 910 { 911 Token token = new Token( TokenTypes.RIGHT_PAREN, 912 getXPath(), 913 currentPosition(), 914 currentPosition()+1 ); 915 916 consume(); 917 918 return token; 919 } 920 921 private Token slashes() 922 { 923 Token token = null; 924 925 switch ( LA(2) ) 926 { 927 case '/': 928 { 929 token = new Token( TokenTypes.DOUBLE_SLASH, 930 getXPath(), 931 currentPosition(), 932 currentPosition()+2 ); 933 consume(); 934 consume(); 935 break; 936 } 937 default: 938 { 939 token = new Token( TokenTypes.SLASH, 940 getXPath(), 941 currentPosition(), 942 currentPosition()+1 ); 943 consume(); 944 } 945 } 946 947 return token; 948 } 949 950 private char LA(int i) 951 { 952 if ( currentPosition + ( i - 1 ) >= endPosition() ) 953 { 954 return (char) -1; 955 } 956 957 return getXPath().charAt( currentPosition() + (i - 1) ); 958 } 959 960 private void consume() 961 { 962 ++this.currentPosition; 963 } 964 965 private int currentPosition() 966 { 967 return this.currentPosition; 968 } 969 970 private int endPosition() 971 { 972 return this.endPosition; 973 } 974 975 private void setPreviousToken(Token previousToken) 976 { 977 this.previousToken = previousToken; 978 } 979 980 private boolean hasMoreChars() 981 { 982 return currentPosition() < endPosition(); 983 } 984 985 private boolean isIdentifierChar(char c) 986 { 987 return Verifier.isXMLNCNameCharacter( c ); 988 } 989 990 private boolean isIdentifierStartChar(char c) 991 { 992 return Verifier.isXMLNCNameStartCharacter( c ); 993 } 994 995 } 996 | Popular Tags |