1 package de.java2html.javasource; 2 3 import java.io.BufferedReader ; 4 import java.io.File ; 5 import java.io.FileReader ; 6 import java.io.IOException ; 7 import java.io.InputStream ; 8 import java.io.InputStreamReader ; 9 import java.io.Reader ; 10 import java.io.StringReader ; 11 import java.net.URL ; 12 import java.util.Hashtable ; 13 import java.util.StringTokenizer ; 14 15 import de.java2html.options.JavaSourceConversionOptions; 16 import de.java2html.util.IoUtilities; 17 18 55 public class JavaSourceParser { 56 57 private JavaSource source; 58 59 60 private String sourceCode; 61 62 63 private JavaSourceType[] sourceTypes; 64 65 private JavaSourceConversionOptions options; 66 67 68 private final static String NUM_DELIMITERS = " \t\n\r()[]{};:+-/\\*!?#%&|<>=^,"; 69 70 71 private final static String DELIMITERS = " \t\n\r()[]{};:.+-/\\*!?#%&|<>=^"; 72 73 74 private final static String EMPTY_STR = " \t\n\r\f"; 75 76 private final static String [] PRIMITIVE_DATATYPES = { 77 "boolean", 78 "byte", 79 "char", 80 "double", 81 "float", 82 "int", 83 "long", 84 "short", 85 "void" }; 86 87 90 private final static String [] JAVA_KEYWORDS = { 91 "assert", 92 "abstract", 93 "default", 94 "if", 95 "private", 96 "this", 97 "do", 98 "implements", 99 "protected", 100 "throw", 101 "break", 102 "import", 103 "public", 104 "throws", 105 "else", 106 "instanceof", 107 "return", 108 "transient", 109 "case", 110 "extends", 111 "try", 112 "catch", 113 "final", 114 "interface", 115 "static", 116 "finally", 117 "strictfp", 118 "volatile", 119 "class", 120 "native", 121 "super", 122 "while", 123 "const", 124 "for", 125 "new", 126 "strictfp", 127 "switch", 128 "continue", 129 "goto", 130 "package", 131 "synchronized", 132 "threadsafe", 133 "null", 134 "true", 135 "false", 136 "enum", 138 "@interface" }; 139 140 private final static String [] JAVADOC_KEYWORDS = { 141 "@author", 142 "@beaninfo", 143 "@docRoot", 144 "@deprecated", 145 "@exception", 146 "@link", 147 "@param", 148 "@return", 149 "@see", 150 "@serial", 151 "@serialData", 152 "@serialField", 153 "@since", 154 "@throws", 155 "@version", 156 "@linkplain", 158 "@inheritDoc", 159 "@value", 160 "@pre", 162 "@post", 163 "@inv", 164 "@published" }; 166 167 168 private static Hashtable tableJavaDocKeywords; 169 170 171 private static Hashtable tableJavaKeywords; 172 173 174 private final static short PARSESTATE_FINISHED = -1; 175 private final static short COD = 0; private final static short CAC = 1; private final static short CL = 2; private final static short CBJ1 = 3; private final static short CBJ2 = 4; private final static short CB = 5; private final static short CBA = 6; private final static short CJ = 7; private final static short CJA = 8; private final static short QU = 9; private final static short QUA = 10; private final static short CH1 = 11; private final static short CH2 = 12; private final static short CH3 = 13; private final static short CH4 = 14; private final static short CH5 = 15; private final static short CH6 = 16; 193 194 private final static short PARSESTATE_START = 0; 195 private final static short PARSESTATE_NEUTRAL = 1; 196 private final static short PARSESTATE_DA = 2; 197 private final static short PARSESTATE_NA = 3; 198 private final static short PARSESTATE_EXP = 4; 199 private final static short PARSESTATE_HEX = 5; 200 private final static short PARSESTATE_HIA = 6; 201 202 203 private int counter; 204 205 206 private final static char EOT = (char) -1; 207 208 209 private short parseState; 210 private int parseSourcePos; 211 private int parseTypePos; 212 213 public JavaSourceParser() { 214 this(JavaSourceConversionOptions.getDefault()); 215 } 216 217 public JavaSourceParser(JavaSourceConversionOptions options) { 218 buildTables(); 219 this.options = options; 220 } 221 222 226 private synchronized void buildTables() { 227 if (tableJavaDocKeywords != null && tableJavaKeywords != null) { 228 return; 229 } 230 231 tableJavaDocKeywords = new Hashtable ((int) (JAVADOC_KEYWORDS.length * 1.5)); 232 for (int i = 0; i < JAVADOC_KEYWORDS.length; ++i) { 233 tableJavaDocKeywords.put(JAVADOC_KEYWORDS[i], JAVADOC_KEYWORDS[i]); 234 } 235 236 tableJavaKeywords = new Hashtable ((int) (JAVA_KEYWORDS.length * 1.5)); 237 for (int i = 0; i < JAVA_KEYWORDS.length; ++i) { 238 tableJavaKeywords.put(JAVA_KEYWORDS[i], JAVA_KEYWORDS[i]); 239 } 240 } 241 242 private final static boolean isEmpty(char ch) { 243 return (EMPTY_STR.indexOf(ch) != -1); 244 } 245 246 private boolean isNumberDelimiter(char ch) { 247 return (NUM_DELIMITERS.indexOf(ch) != -1); 248 } 249 250 private final static int indexOf(char ch, String s, int start, int end) { 251 if (end < start) 252 return -1; 253 254 for (int i = start; i <= end; ++i) { 255 if (s.charAt(i) == ch) 256 return i; 257 } 258 259 return -1; 260 } 261 262 271 public JavaSource parse(File file) throws IOException { 272 source = parse(new FileReader (file)); 273 source.setFileName(file.getName()); 274 return source; 275 } 276 277 public JavaSource parse(String rawText) { 278 if (rawText == null) { 279 throw new NullPointerException (); 280 } 281 try { 282 return parse(new StringReader (rawText)); 283 } 284 catch (IOException e) { 285 System.err.println("Unexpected exception while parsing raw text: " + e); 286 return new JavaSource(""); 287 } 288 } 289 290 public JavaSource parse(URL url) throws IOException { 291 InputStream inputStream = null; 292 try { 293 inputStream = url.openStream(); 294 return parse(inputStream); 295 } 296 finally { 297 IoUtilities.close(inputStream); 298 } 299 } 300 301 public JavaSource parse(InputStream stream) throws IOException { 302 return parse(new InputStreamReader (stream)); 303 } 304 305 public JavaSource parse(Reader reader) throws IOException { 306 if (reader == null) { 307 throw new IllegalArgumentException ("reader may not be null"); 308 } 309 try { 310 sourceCode = readPlainSource(reader); 311 } 312 finally { 313 IoUtilities.close(reader); 314 } 315 replaceTabs(); 316 317 sourceTypes = new JavaSourceType[sourceCode.length()]; 318 source = new JavaSource(sourceCode); 319 source.setClassification(sourceTypes); 320 321 parseOne(); 322 parseTwo(); 323 parseThree(); 324 parseFour(); 325 326 doStatistics(); 327 328 return source; 329 } 330 331 private void parseFour() { 332 boolean isInsideAnnotation = false; 333 for (int i = 0; i < sourceTypes.length; ++i) { 334 if (!isInsideAnnotation && sourceTypes[i] == JavaSourceType.CODE && sourceCode.charAt(i) == '@') { 335 isInsideAnnotation = true; 336 sourceTypes[i] = JavaSourceType.ANNOTATION; 337 } 338 else if (isInsideAnnotation 339 && sourceTypes[i] == JavaSourceType.CODE 340 && (Character.isJavaIdentifierPart(sourceCode.charAt(i)) || sourceCode.charAt(i) == '.')) { 341 sourceTypes[i] = JavaSourceType.ANNOTATION; 342 } 343 else { 344 isInsideAnnotation = false; 345 } 346 } 347 } 348 349 354 private void doStatistics() { 355 int index = 0; 356 source.getStatistic().clear(); 357 source.getStatistic().setCharacterCount(sourceCode.length()); 358 int linesContainingAnything = 0; 359 360 if (sourceCode.length() == 0) { 361 source.getStatistic().setLineCount(0); 362 } 363 else { 364 StringTokenizer st = new StringTokenizer (sourceCode, "\n\r", true); 365 while (st.hasMoreTokens()) { 366 String line = st.nextToken(); 367 368 if (line.charAt(0) == '\r') { 369 ++index; 370 } 371 else if (line.charAt(0) == '\n') { 372 ++index; 373 source.getStatistic().setLineCount(source.getStatistic().getLineCount() + 1); 374 } 375 else { 376 ++linesContainingAnything; 377 statistics(line, index); 378 index += line.length(); 379 } 380 } 381 source.getStatistic().setLineCount(source.getStatistic().getLineCount() + 1); 382 } 383 384 source.getStatistic().setEmptyLineCount(source.getStatistic().getLineCount() - linesContainingAnything); 386 } 387 388 private void statistics(String line, int start) { 389 if (line.length() > source.getStatistic().getMaxLineLength()) { 390 source.getStatistic().setMaxLineLength(line.length()); 391 } 392 393 int end = start + line.length(); 394 395 boolean containsCode = false; 396 boolean containsComment = false; 397 398 for (int i = start; i < end; ++i) { 399 if (sourceTypes[i] == JavaSourceType.CODE 400 || sourceTypes[i] == JavaSourceType.KEYWORD 401 || sourceTypes[i] == JavaSourceType.CODE_TYPE 402 || sourceTypes[i] == JavaSourceType.CHAR_CONSTANT 403 || sourceTypes[i] == JavaSourceType.NUM_CONSTANT) { 404 containsCode = true; 405 if (containsComment) 406 break; 407 } 408 else if (sourceTypes[i] == JavaSourceType.COMMENT_BLOCK 409 || sourceTypes[i] == JavaSourceType.COMMENT_LINE 410 || sourceTypes[i] == JavaSourceType.JAVADOC 411 || sourceTypes[i] == JavaSourceType.JAVADOC_KEYWORD) { 412 containsComment = true; 413 if (containsCode) 414 break; 415 } 416 } 417 418 if (containsCode) 419 source.getStatistic().setCodeLineCount(source.getStatistic().getCodeLineCount() + 1); 420 if (containsComment) 421 source.getStatistic().setCommentLineCount(source.getStatistic().getCommentLineCount() + 1); 422 if (!containsCode && !containsComment) 423 source.getStatistic().setEmptyLineCount(source.getStatistic().getEmptyLineCount() + 1); 424 } 425 426 private String readPlainSource(Reader reader) throws IOException { 427 return readPlainSource(new BufferedReader (reader)); 428 } 429 430 private String readPlainSource(BufferedReader reader) throws IOException { 431 432 StringBuffer sb = new StringBuffer (); 433 String line; 434 while ((line = reader.readLine()) != null) { 435 sb.append(line); 436 sb.append("\r\n"); 437 } 438 if (sb.length() > 0) { 439 sb.setLength(sb.length() - 2); 440 } 441 return sb.toString(); 442 } 454 455 458 private void replaceTabs() { 459 char[] t = new char[options.getTabSize()]; 460 for (int i = 0; i < options.getTabSize(); ++i) { 461 t[i] = ' '; 462 } 463 464 StringBuffer sb = new StringBuffer ((int) (sourceCode.length() * 1.3)); 465 for (int i = 0; i < sourceCode.length(); ++i) { 466 char ch = sourceCode.charAt(i); 467 if (ch == '\t') { 468 sb.append(t); 469 } 470 else { 471 sb.append(ch); 472 } 473 } 474 475 sourceCode = sb.toString(); 476 } 477 478 484 private void parseOne() { 485 parseState = COD; 486 parseSourcePos = 0; 487 parseTypePos = 0; 488 489 while (parseState != PARSESTATE_FINISHED) { 490 parseOneDo(); 491 } 492 } 493 494 502 private void parseOneDo() { 503 char ch = EOT; 504 if (sourceCode.length() > parseSourcePos) { 505 ch = sourceCode.charAt(parseSourcePos++); 506 } 507 508 switch (parseState) { 509 case COD: 510 if (ch == EOT) { 511 parseState = PARSESTATE_FINISHED; 512 return; 513 } 514 if (ch == '/') { 515 parseState = CAC; 516 return; 517 } 518 if (ch == '"') { 519 sourceTypes[parseTypePos++] = JavaSourceType.STRING; 520 parseState = QU; 521 return; 522 } 523 if (ch == '\'') { 524 parseState = CH1; 525 return; 526 } 527 if (isEmpty(ch)) { 528 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 529 return; 530 } 531 532 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 533 return; 534 case CAC: 535 if (ch == EOT) { 536 parseState = PARSESTATE_FINISHED; 537 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 538 return; 539 } 540 if (ch == '/') { 541 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_LINE; 542 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_LINE; 543 parseState = CL; 544 return; 545 } 546 if (ch == '*') { 547 parseState = CBJ1; 548 return; 549 } 550 if (isEmpty(ch)) { 551 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 552 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 553 parseState = COD; 554 return; 555 } 556 557 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 558 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 559 parseState = COD; 560 return; 561 case CL: 562 if (ch == EOT) { 563 parseState = PARSESTATE_FINISHED; 564 return; 565 } 566 if (ch == '\n' || ch == '\r') { 567 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 568 parseState = COD; 570 return; 571 } 572 if (isEmpty(ch)) { 573 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 574 return; 575 } 576 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_LINE; 577 return; 578 case CB: 579 if (ch == EOT) { 580 parseState = PARSESTATE_FINISHED; 581 return; 582 } 583 if (ch == '*') { 584 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 585 parseState = CBA; 586 return; 587 } 588 if (isEmpty(ch)) { 589 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 590 return; 591 } 592 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 593 return; 594 case CBA: 595 if (ch == EOT) { 596 parseState = PARSESTATE_FINISHED; 597 return; 598 } 599 if (ch == '/') { 600 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 601 parseState = COD; 602 return; 603 } 604 if (ch == '*') { 605 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 606 parseState = CBA; 607 return; 608 } 609 if (isEmpty(ch)) { 610 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 611 parseState = CB; 612 return; 613 } 614 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 615 parseState = CB; 616 return; 617 case CJ: 618 if (ch == EOT) { 619 parseState = PARSESTATE_FINISHED; 620 return; 621 } 622 if (ch == '*') { 623 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 624 parseState = CJA; 625 return; 626 } 627 if (isEmpty(ch)) { 628 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 629 return; 630 } 631 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 632 return; 633 case CJA: 634 if (ch == EOT) { 635 parseState = PARSESTATE_FINISHED; 636 return; 637 } 638 if (ch == '/') { 639 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 640 parseState = COD; 641 return; 642 } 643 if (ch == '*') { 644 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 645 parseState = CJA; 646 return; 647 } 648 if (isEmpty(ch)) { 649 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 650 parseState = CJ; 651 return; 652 } 653 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 654 parseState = CJ; 655 return; 656 case QU: 657 if (ch == EOT) { 658 parseState = PARSESTATE_FINISHED; 659 return; 660 } 661 if (ch == '"') { 662 sourceTypes[parseTypePos++] = JavaSourceType.STRING; 663 parseState = COD; 664 return; 665 } 666 if (ch == '\\') { 667 parseState = QUA; 668 return; 669 } 670 if (isEmpty(ch)) { 671 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 672 return; 673 } 674 675 sourceTypes[parseTypePos++] = JavaSourceType.STRING; 676 return; 677 case QUA: 678 if (ch == EOT) { 679 sourceTypes[parseTypePos++] = JavaSourceType.STRING; 680 parseState = PARSESTATE_FINISHED; 681 return; 682 } 683 if (ch == '\\') { 684 sourceTypes[parseTypePos++] = JavaSourceType.STRING; 685 sourceTypes[parseTypePos++] = JavaSourceType.STRING; 686 parseState = QU; return; 688 } 689 if (isEmpty(ch)) { 690 sourceTypes[parseTypePos++] = JavaSourceType.STRING; 691 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 692 parseState = QU; 693 return; 694 } 695 sourceTypes[parseTypePos++] = JavaSourceType.STRING; 696 sourceTypes[parseTypePos++] = JavaSourceType.STRING; 697 parseState = QU; 698 return; 699 case CBJ1: 700 if (ch == EOT) { 701 parseState = PARSESTATE_FINISHED; 702 sourceTypes[parseTypePos++] = JavaSourceType.UNDEFINED; 703 sourceTypes[parseTypePos++] = JavaSourceType.UNDEFINED; 704 return; 705 } 706 if (ch == '*') { 707 parseState = CBJ2; 708 return; 709 } 710 if (isEmpty(ch)) { 711 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 712 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 713 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 714 parseState = CB; 715 return; 716 } 717 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 718 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 719 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 720 parseState = CB; 721 return; 722 case CBJ2: 723 if (ch == EOT) { 724 parseState = PARSESTATE_FINISHED; 725 sourceTypes[parseTypePos++] = JavaSourceType.UNDEFINED; 726 sourceTypes[parseTypePos++] = JavaSourceType.UNDEFINED; 727 sourceTypes[parseTypePos++] = JavaSourceType.UNDEFINED; 728 return; 729 } 730 if (ch == '/') { 731 parseState = COD; 732 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 733 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 734 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 735 sourceTypes[parseTypePos++] = JavaSourceType.COMMENT_BLOCK; 736 return; 737 } 738 if (isEmpty(ch)) { 739 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 740 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 741 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 742 sourceTypes[parseTypePos++] = JavaSourceType.BACKGROUND; 743 parseState = CJ; 744 return; 745 } 746 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 747 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 748 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 749 sourceTypes[parseTypePos++] = JavaSourceType.JAVADOC; 750 parseState = CJ; 751 return; 752 case CH1: 753 if (ch == EOT) { 754 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 755 parseState = PARSESTATE_FINISHED; 756 return; 757 } 758 if (ch == '\\') { 759 parseState = CH3; 760 return; 761 } 762 parseState = CH2; 763 return; 764 case CH2: 765 if (ch == EOT) { 766 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 767 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 768 parseState = PARSESTATE_FINISHED; 769 return; 770 } 771 if (ch == '\'') { 772 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 773 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 774 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 775 parseState = COD; 776 return; 777 } 778 sourceTypes[parseTypePos++] = JavaSourceType.UNDEFINED; 779 sourceTypes[parseTypePos++] = JavaSourceType.UNDEFINED; 780 sourceTypes[parseTypePos++] = JavaSourceType.UNDEFINED; 781 parseState = COD; 782 return; 783 case CH3: 784 if (ch == EOT) { 785 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 786 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 787 parseState = PARSESTATE_FINISHED; 788 return; 789 } 790 if (ch == 'u') { 791 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 792 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 793 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 794 parseState = CH5; 795 return; 796 } 797 if (ch >= '1' && ch <= '9') { 798 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 799 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 800 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 801 parseState = CH6; 802 return; 803 } 804 parseState = CH4; 805 return; 806 case CH4: 807 if (ch == EOT) { 808 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 809 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 810 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 811 parseState = PARSESTATE_FINISHED; 812 return; 813 } 814 if (ch == '\'') { 815 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 816 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 817 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 818 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 819 parseState = COD; 820 return; 821 } 822 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 823 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 824 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 825 sourceTypes[parseTypePos++] = JavaSourceType.CODE; 826 parseState = COD; 827 return; 828 case CH6: 829 if (ch == EOT) { 830 parseState = PARSESTATE_FINISHED; 831 return; 832 } 833 if (ch == '\'') { 834 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 835 parseState = COD; 836 return; 837 } 838 if (ch >= '0' && ch <= '9') { 839 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 840 return; 841 } 842 sourceTypes[parseTypePos++] = JavaSourceType.UNDEFINED; 843 parseState = COD; 844 return; 845 case CH5: 846 if (ch == EOT) { 847 parseState = PARSESTATE_FINISHED; 848 return; 849 } 850 if (ch == '\'') { 851 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 852 parseState = COD; 853 return; 854 } 855 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) { 856 sourceTypes[parseTypePos++] = JavaSourceType.CHAR_CONSTANT; 857 return; 858 } 859 sourceTypes[parseTypePos++] = JavaSourceType.UNDEFINED; 860 parseState = COD; 861 return; 862 } 863 } 864 865 870 private void parseTwo() { 871 for (int index = 0; index < sourceTypes.length; ++index) { 872 if (sourceTypes[index] == JavaSourceType.CODE) { 873 if (isParenthesis(sourceCode.charAt(index))) { 874 mark(index, JavaSourceType.PARENTHESIS); 875 } 876 } 877 } 878 879 int start = 0; 880 int end = 0; 881 882 while (end < sourceTypes.length - 1) { 883 while (end < sourceTypes.length - 1 && sourceTypes[end + 1] == sourceTypes[start]) 884 ++end; 885 886 parseTwo(start, end); 887 888 start = end + 1; 889 end = start; 890 } 891 } 892 893 private boolean isParenthesis(char ch) { 894 return ch == '{' || ch == '}' || ch == '[' || ch == ']' || ch == '(' || ch == ')'; 895 } 896 897 private void parseTwo(int start, int end) { 898 if (sourceTypes[start] == JavaSourceType.JAVADOC) { 899 parseTwoCommentBlock(start, end); 900 return; 901 } 902 else if (sourceTypes[start] == JavaSourceType.CODE) { 903 parseTwoCode(start, end); 904 return; 905 } 906 907 return; 909 } 910 911 914 private void parseTwoCode(int start, int end) { 915 String code = sourceCode.substring(start, end + 1); 916 917 int index = start; 918 StringTokenizer st = new StringTokenizer (code, DELIMITERS, true); 919 while (st.hasMoreTokens()) { 920 String s = st.nextToken(); 921 if (tableJavaKeywords.containsKey(s)) { 932 mark(index, index + s.length(), JavaSourceType.KEYWORD); 933 if (s.equals("package")) { 934 int i1 = sourceCode.indexOf(';', index + 1); 935 if (i1 != -1) { 936 source.getStatistic().setPackageName(sourceCode.substring(index + s.length(), i1).trim()); 937 } 938 } 939 } 940 else { 941 for (int i = 0; i < PRIMITIVE_DATATYPES.length; ++i) { 943 if (s.equals(PRIMITIVE_DATATYPES[i])) { 944 mark(index, index + s.length(), JavaSourceType.CODE_TYPE); 945 break; 946 } 947 } 948 } 949 index += s.length(); 950 } 952 } 953 954 957 private void parseTwoCommentBlock(int start, int end) { 958 int i1 = indexOf('@', sourceCode, start, end); 959 960 while (i1 != -1 && i1 + 1 < end) { 961 int i2 = i1 + 1; 962 963 char ch = sourceCode.charAt(i2 + 1); 964 while (i2 < end && ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) { 965 ch = sourceCode.charAt(++i2 + 1); 966 } 967 968 String s = sourceCode.substring(i1, i2 + 1); 969 971 if (tableJavaDocKeywords.containsKey(s)) { 978 mark(i1, i2 + 1, JavaSourceType.JAVADOC_KEYWORD); 979 } 980 981 i1 = indexOf('@', sourceCode, i2, end); 982 } 983 984 i1 = indexOf('<', sourceCode, start, end); 986 while (i1 != -1 && i1 + 1 < end) { 987 int i2 = sourceCode.indexOf('>', i1 + 1); 988 989 if (i2 == -1) { 994 i1 = -1; 995 break; 996 } 997 if (hasTypeOrEmpty(sourceTypes, i1, i2 + 1, JavaSourceType.JAVADOC)) { 998 mark(i1, i2 + 1, JavaSourceType.JAVADOC_HTML_TAG); 999 } 1000 i1 = indexOf('<', sourceCode, i2, end); 1001 } 1002 } 1003 1004 private static boolean hasTypeOrEmpty( 1005 JavaSourceType[] sourceTypes, 1006 int startIndex, 1007 int endIndex, 1008 JavaSourceType javaSourceType) { 1009 1010 for (int i = startIndex; i <= endIndex; ++i) { 1011 if (!sourceTypes[i].equals(javaSourceType) && !sourceTypes[i].equals(JavaSourceType.BACKGROUND)) { 1012 return false; 1013 } 1014 } 1015 return true; 1016 } 1017 1018 1022 private void parseThree() { 1023 int start = 0; 1024 int end = 0; 1025 1026 while (end < sourceTypes.length - 1) { 1027 while (end < sourceTypes.length - 1 && sourceTypes[end + 1] == sourceTypes[start]) { 1028 ++end; 1029 } 1030 1031 if (sourceTypes[start] == JavaSourceType.CODE) { 1032 parseThree(start, end); 1033 } 1034 1035 start = end + 1; 1036 end = start; 1037 } 1038 1039 expandJavaDocLinks(); 1040 } 1041 1042 private void expandJavaDocLinks() { 1043 expandEmbracedJavaDocTag("@link", JavaSourceType.JAVADOC_LINKS); 1044 expandEmbracedJavaDocTag("@linkplain", JavaSourceType.JAVADOC_LINKS); 1045 } 1046 1047 private void expandEmbracedJavaDocTag(String tag, JavaSourceType type) { 1048 String pattern = "{" + tag; 1049 1050 for (int index = 0; index < sourceTypes.length; ++index) { 1051 int start = sourceCode.indexOf(pattern, index); 1052 if (start == -1) { 1053 break; 1054 } 1055 1056 char ch = sourceCode.charAt(start + pattern.length()); 1057 if (Character.isLetterOrDigit(ch)) { 1058 break; 1059 } 1060 1061 if (!checkRegion(start + 1, start + 1 + tag.length() - 1, new IJavaSourceTypeChecker() { 1062 public boolean isValid(JavaSourceType type) { 1063 return type.equals(JavaSourceType.JAVADOC_KEYWORD); 1064 } 1065 })) { 1066 break; 1067 } 1068 1069 int end = sourceCode.indexOf('}', start + pattern.length()); 1070 if (end == -1) { 1071 break; 1072 } 1073 1074 if (checkRegion(start + 1 + tag.length(), end, new IJavaSourceTypeChecker() { 1076 public boolean isValid(JavaSourceType type) { 1077 return type.equals(JavaSourceType.BACKGROUND) || type.equals(JavaSourceType.JAVADOC); 1078 } 1079 })) { 1080 markWithoutBackground(start, end, type); 1081 } 1082 index = end; 1083 } 1084 1085 } 1086 1087 private boolean checkRegion(int start, int end, IJavaSourceTypeChecker checker) { 1088 for (int i = start; i <= end; ++i) { 1089 if (!checker.isValid(sourceTypes[i])) { 1090 return false; 1091 } 1092 } 1093 return true; 1094 } 1095 1096 private void markWithoutBackground(int start, int end, JavaSourceType type) { 1097 for (int i = start; i <= end; ++i) { 1098 if (!sourceTypes[i].equals(JavaSourceType.BACKGROUND)) { 1099 sourceTypes[i] = type; 1100 } 1101 } 1102 } 1103 1104 1107 private void parseThree(int start, int end) { 1108 parseState = PARSESTATE_START; 1109 parseSourcePos = start; 1110 parseTypePos = start - 1; 1111 counter = 0; 1112 1113 while (parseState != PARSESTATE_FINISHED) { 1114 parseThreeDo(end); 1115 } 1116 } 1117 1118 1121 private void parseThreeDo(int end) { 1122 char ch = EOT; 1123 1124 if (parseSourcePos <= end) 1125 ch = sourceCode.charAt(parseSourcePos); 1126 1127 ++parseSourcePos; 1128 ++parseTypePos; 1129 1130 switch (parseState) { 1131 case PARSESTATE_START: 1132 if (ch == EOT) { 1133 parseState = PARSESTATE_FINISHED; 1134 return; 1135 } 1136 if (ch == '.') { 1137 ++counter; 1138 parseState = PARSESTATE_DA; 1139 return; 1140 } 1141 if (ch == '0') { 1142 ++counter; 1143 parseState = PARSESTATE_HIA; 1144 return; 1145 } 1146 if (ch >= '1' && ch <= '9') { 1147 ++counter; 1148 parseState = PARSESTATE_NA; 1149 return; 1150 } 1151 if (isNumberDelimiter(ch)) { 1152 return; 1154 } 1155 parseState = PARSESTATE_NEUTRAL; 1156 return; 1157 case PARSESTATE_NEUTRAL: 1158 if (ch == EOT) { 1159 parseState = PARSESTATE_FINISHED; 1160 return; 1161 } 1162 if (isNumberDelimiter(ch)) { 1163 parseState = PARSESTATE_START; 1164 return; 1165 } 1166 return; 1167 case PARSESTATE_DA: 1168 if (ch == EOT) { 1169 parseState = PARSESTATE_FINISHED; 1170 return; 1171 } 1172 if (ch >= '0' && ch <= '9') { 1173 ++counter; 1174 parseState = PARSESTATE_NA; 1175 return; 1176 } 1177 if (isNumberDelimiter(ch)) { 1178 parseState = PARSESTATE_START; 1179 counter = 0; 1180 return; 1181 } 1182 parseState = PARSESTATE_NEUTRAL; 1183 counter = 0; 1184 return; 1185 case PARSESTATE_NA: 1186 if (ch == EOT) { 1187 parseState = PARSESTATE_FINISHED; 1188 mark(parseTypePos - counter, parseTypePos, JavaSourceType.NUM_CONSTANT); 1189 return; 1190 } 1191 if (ch == '.' || (ch >= '0' && ch <= '9')) { 1192 ++counter; 1193 return; 1194 } 1195 if (ch == 'e') { 1196 parseState = PARSESTATE_EXP; 1197 ++counter; 1198 return; 1199 } 1200 if (ch == 'f' || ch == 'F' || ch == 'd' || ch == 'D' || ch == 'l' || ch == 'L') { 1201 ++counter; 1202 mark(parseTypePos - counter + 1, parseTypePos + 1, JavaSourceType.NUM_CONSTANT); 1203 parseState = PARSESTATE_NEUTRAL; 1204 counter = 0; 1205 return; 1206 } 1207 if (isNumberDelimiter(ch)) { 1208 parseState = PARSESTATE_START; 1209 mark(parseTypePos - counter, parseTypePos, JavaSourceType.NUM_CONSTANT); 1210 counter = 0; 1211 return; 1212 } 1213 mark(parseTypePos - counter, parseTypePos, JavaSourceType.NUM_CONSTANT); 1214 parseState = PARSESTATE_NEUTRAL; 1215 counter = 0; 1216 return; 1217 case PARSESTATE_HIA: 1218 if (ch == EOT) { 1219 parseState = PARSESTATE_FINISHED; 1220 mark(parseTypePos - counter, parseTypePos, JavaSourceType.NUM_CONSTANT); 1221 return; 1222 } 1223 if (ch == 'x' || ch == 'X') { 1224 parseState = PARSESTATE_HEX; 1225 ++counter; 1226 return; 1227 } 1228 if (ch == '.' || (ch >= '0' && ch <= '9')) { 1229 ++counter; 1230 parseState = PARSESTATE_NA; 1231 return; 1232 } 1233 if (ch == 'f' || ch == 'F' || ch == 'd' || ch == 'D' || ch == 'l' || ch == 'L') { 1234 ++counter; 1235 mark(parseTypePos - counter + 1, parseTypePos + 1, JavaSourceType.NUM_CONSTANT); 1236 parseState = PARSESTATE_NEUTRAL; 1237 counter = 0; 1238 return; 1239 } 1240 if (isNumberDelimiter(ch)) { 1241 parseState = PARSESTATE_START; 1242 mark(parseTypePos - counter, parseTypePos, JavaSourceType.NUM_CONSTANT); 1243 counter = 0; 1244 return; 1245 } 1246 mark(parseTypePos - counter, parseTypePos, JavaSourceType.NUM_CONSTANT); 1247 parseState = PARSESTATE_NEUTRAL; 1248 counter = 0; 1249 return; 1250 case PARSESTATE_HEX: 1251 if (ch == EOT) { 1252 parseState = PARSESTATE_FINISHED; 1253 mark(parseTypePos - counter, parseTypePos, JavaSourceType.NUM_CONSTANT); 1254 return; 1255 } 1256 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) { 1257 ++counter; 1258 parseState = PARSESTATE_HEX; 1259 return; 1260 } 1261 if (ch == 'l' || ch == 'L') { 1262 ++counter; 1263 mark(parseTypePos - counter + 1, parseTypePos + 1, JavaSourceType.NUM_CONSTANT); 1264 parseState = PARSESTATE_NEUTRAL; 1265 counter = 0; 1266 return; 1267 } 1268 if (isNumberDelimiter(ch)) { 1269 parseState = PARSESTATE_START; 1270 mark(parseTypePos - counter, parseTypePos, JavaSourceType.NUM_CONSTANT); 1271 counter = 0; 1272 return; 1273 } 1274 mark(parseTypePos - counter, parseTypePos, JavaSourceType.NUM_CONSTANT); 1275 parseState = PARSESTATE_NEUTRAL; 1276 counter = 0; 1277 return; 1278 case PARSESTATE_EXP: 1279 if (ch == EOT) { 1280 parseState = PARSESTATE_FINISHED; 1281 mark(parseTypePos - counter, parseTypePos - 1, JavaSourceType.NUM_CONSTANT); 1282 return; 1283 } 1284 if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-') { 1285 ++counter; 1286 parseState = PARSESTATE_NA; 1287 return; 1288 } 1289 if (isNumberDelimiter(ch)) { 1290 parseState = PARSESTATE_START; 1291 mark(parseTypePos - counter, parseTypePos - 1, JavaSourceType.NUM_CONSTANT); 1292 counter = 0; 1293 return; 1294 } 1295 mark(parseTypePos - counter, parseTypePos - 1, JavaSourceType.NUM_CONSTANT); 1296 parseState = PARSESTATE_NEUTRAL; 1297 counter = 0; 1298 return; 1299 } 1300 } 1301 1302 1305 private void mark(int start, int endPlusOne, JavaSourceType type) { 1306 for (int i = start; i < endPlusOne; ++i) { 1307 sourceTypes[i] = type; 1308 } 1309 } 1310 1311 1314 private void mark(int index, JavaSourceType type) { 1315 sourceTypes[index] = type; 1316 } 1317 1318 1339} | Popular Tags |