1 43 44 package de.susebox.jtopas; 45 46 import java.lang.reflect.Constructor ; 50 import java.io.Reader ; 51 import java.io.StringReader ; 52 53 import junit.framework.Test; 54 import junit.framework.TestCase; 55 import junit.framework.TestSuite; 56 import junit.framework.Assert; 57 58 import de.susebox.TestUtilities; 59 60 61 65 75 public class TestDifficultSituations extends TestCase { 76 77 81 82 86 89 public static void main(String [] args) { 90 String [] tests = { TestDifficultSituations.class.getName() }; 91 92 TestUtilities.run(tests, args); 93 } 94 95 96 100 106 public static Test suite() { 107 TestSuite suite = new TestSuite(TestDifficultSituations.class.getName()); 108 Class [] sourceClasses = { ReaderSource.class, StringSource.class }; 109 110 for (int sourceIndex = 0; sourceIndex < sourceClasses.length; ++sourceIndex) { 111 suite.addTest(new TestDifficultSituations("testSequencesAndSeparators", sourceClasses[sourceIndex])); 112 suite.addTest(new TestDifficultSituations("testSmallSource", sourceClasses[sourceIndex])); 113 suite.addTest(new TestDifficultSituations("testEmptySource", sourceClasses[sourceIndex])); 114 suite.addTest(new TestDifficultSituations("testSimilarSpecialSequences", sourceClasses[sourceIndex])); 115 suite.addTest(new TestDifficultSituations("testNonASCIICharacters", sourceClasses[sourceIndex])); 116 suite.addTest(new TestDifficultSituations("testEOFInLineComment", sourceClasses[sourceIndex])); 117 suite.addTest(new TestDifficultSituations("testEOFInBlockComment", sourceClasses[sourceIndex])); 118 suite.addTest(new TestDifficultSituations("testEOFInString", sourceClasses[sourceIndex])); 119 suite.addTest(new TestDifficultSituations("testStringEscapes1", sourceClasses[sourceIndex])); 120 suite.addTest(new TestDifficultSituations("testStringEscapes2", sourceClasses[sourceIndex])); 121 suite.addTest(new TestDifficultSituations("testNestedComments", sourceClasses[sourceIndex])); 122 suite.addTest(new TestDifficultSituations("testReaderSwitching", sourceClasses[sourceIndex])); 123 suite.addTest(new TestDifficultSituations("testDOSEOL", sourceClasses[sourceIndex])); 124 suite.addTest(new TestDifficultSituations("testMACEOL", sourceClasses[sourceIndex])); 125 suite.addTest(new TestDifficultSituations("testSpecialCalls", sourceClasses[sourceIndex])); 126 suite.addTest(new TestDifficultSituations("testLineCounting", sourceClasses[sourceIndex])); 127 suite.addTest(new TestDifficultSituations("testUncommonWhitespaces", sourceClasses[sourceIndex])); 128 suite.addTest(new TestDifficultSituations("testWhitespaceHandling", sourceClasses[sourceIndex])); 129 } 130 return suite; 131 } 132 133 134 138 142 public TestDifficultSituations(String test, Class sourceClass) { 143 super(test); 144 _sourceClass = sourceClass; 145 } 146 147 148 152 156 protected void setUp() throws Exception {} 157 158 159 163 protected void tearDown() throws Exception {} 164 165 166 170 private static final String PLUS = "+"; 172 private static final String DOUBLE_PLUS = "++"; 173 private static final String TRIPLE_PLUS = "+++"; 174 private static final String PLUS_EQUAL = "+="; 175 private static final String PLUS_MINUS = "+-"; 176 private static final String HTML_OPEN = "<"; 177 private static final String HTML_COMMENT1 = "<!"; 178 private static final String HTML_COMMENT2 = "<!--"; 179 private static final String HTML_HEAD = "<head>"; 180 private static final String HTML_HEADER = "<h>"; 181 private static final String HTML_HT = "<ht>"; 182 private static final String HTML_CLOSE = ">"; 183 private static final String MINUS = "-"; 184 private static final String DOUBLE_MINUS = "--"; 185 private static final String HTML_COMMENT_END = "-->"; 186 private static final String HTML_HEAD_END = "</head>"; 187 private static final String HTML_HEADER_END = "</h>"; 188 private static final String SHIFT_LEFT = "<<"; 189 private static final String SHIFT_RIGHT = ">>"; 190 private static final String COLON = "."; 191 private static final String EURO = "€"; 192 private static final String DOUBLE_EURO = "€€"; 193 private static final String EUROURO = "€uro"; 194 private static final String AE = "æ"; 195 private static final String OERE = "ø"; 196 private static final String BUG = "ð"; 197 private static final String DOUBLE_BUG = "ðð"; 198 199 202 public void testSimilarSpecialSequences() throws Throwable { 203 TokenizerSource source = getSource( "lots+of++special+=sequences+in+++a+-row\n" 204 + "with <HEAD>HTML-tags-in-between</head>\n" 205 + "like <h>headings</h><open and close> tags\n" 206 + "and <!even--comments-->+<!--in<ht>many+=forms-->>\n" 207 + "some<<as>>operators.\n" 208 + "+++++<<<>>>.\n" 209 ); 210 String [] expectedToken = { 211 PLUS, DOUBLE_PLUS, PLUS_EQUAL, PLUS, TRIPLE_PLUS, PLUS_MINUS, HTML_HEAD, MINUS, MINUS, MINUS, HTML_HEAD_END, HTML_HEADER, HTML_HEADER_END, HTML_OPEN, HTML_CLOSE, HTML_COMMENT1, DOUBLE_MINUS, HTML_COMMENT_END, PLUS, 215 HTML_COMMENT2, HTML_HT, PLUS_EQUAL, HTML_COMMENT_END, HTML_CLOSE, SHIFT_LEFT, SHIFT_RIGHT, COLON, TRIPLE_PLUS, DOUBLE_PLUS, SHIFT_LEFT, HTML_OPEN, SHIFT_RIGHT, 218 HTML_CLOSE, COLON }; 220 221 TokenizerProperties props = new StandardTokenizerProperties(); 222 Tokenizer tokenizer = getTokenizer(props); 223 224 try { 225 props.addSpecialSequence(COLON, COLON); 226 props.addSpecialSequence(PLUS, PLUS); 227 props.addSpecialSequence(DOUBLE_PLUS, DOUBLE_PLUS); 228 props.addSpecialSequence(TRIPLE_PLUS, TRIPLE_PLUS); 229 props.addSpecialSequence(PLUS_EQUAL, PLUS_EQUAL); 230 props.addSpecialSequence(PLUS_MINUS, PLUS_MINUS); 231 props.addSpecialSequence(SHIFT_LEFT, SHIFT_LEFT); 232 props.addSpecialSequence(HTML_OPEN, HTML_OPEN, Flags.F_NO_CASE); 233 props.addSpecialSequence(HTML_COMMENT1, HTML_COMMENT1, Flags.F_NO_CASE); 234 props.addSpecialSequence(HTML_COMMENT2, HTML_COMMENT2, Flags.F_NO_CASE); 235 props.addSpecialSequence(HTML_HEAD, HTML_HEAD, Flags.F_NO_CASE); 236 props.addSpecialSequence(HTML_HEADER, HTML_HEADER, Flags.F_NO_CASE); 237 props.addSpecialSequence(HTML_HT, HTML_HT, Flags.F_NO_CASE); 238 props.addSpecialSequence(HTML_CLOSE, HTML_CLOSE, Flags.F_NO_CASE); 239 props.addSpecialSequence(SHIFT_RIGHT, SHIFT_RIGHT); 240 props.addSpecialSequence(MINUS, MINUS); 241 props.addSpecialSequence(DOUBLE_MINUS, DOUBLE_MINUS); 242 props.addSpecialSequence(HTML_COMMENT_END, HTML_COMMENT_END, Flags.F_NO_CASE); 243 props.addSpecialSequence(HTML_HEAD_END, HTML_HEAD_END, Flags.F_NO_CASE); 244 props.addSpecialSequence(HTML_HEADER_END, HTML_HEADER_END, Flags.F_NO_CASE); 245 tokenizer.setSource(source); 246 247 int index = 0; 249 250 while (tokenizer.hasMoreToken()) { 251 Token token = tokenizer.nextToken(); 252 boolean isOK; 253 254 switch (token.getType()) { 255 case Token.NORMAL: 256 System.out.println(token.getImage()); 257 break; 258 case Token.SPECIAL_SEQUENCE: 259 if (props.isFlagSet(props.getSpecialSequence(token.getImage()), Flags.F_NO_CASE)) { 260 isOK = expectedToken[index].equalsIgnoreCase(token.getImage()); 261 } else { 262 isOK = expectedToken[index].equals(token.getImage()); 263 } 264 assertTrue("Index " + index + ": expected \"" + expectedToken[index] + "\", got \"" + token.getImage() + "\".", isOK); 265 index++; 266 break; 267 } 268 } 269 } finally { 270 tokenizer.close(); 271 } 272 } 273 274 275 278 public void testNonASCIICharacters() throws Throwable { 279 TokenizerSource source = getSource("1€ is an æ to much. Orøtakeðthis: €€ or €uro and ðð."); 280 281 String [] expectedToken = { 282 EURO, AE, OERE, BUG, DOUBLE_EURO, EUROURO, DOUBLE_BUG 283 }; 284 285 TokenizerProperties props = new StandardTokenizerProperties(); 286 Tokenizer tokenizer = getTokenizer(props); 287 288 try { 289 props.addSpecialSequence(EURO, EURO); 290 props.addSpecialSequence(DOUBLE_EURO, DOUBLE_EURO); 291 props.addSpecialSequence(EUROURO, EUROURO); 292 props.addSpecialSequence(AE, AE); 293 props.addSpecialSequence(OERE, OERE); 294 props.addSpecialSequence(BUG, BUG); 295 props.addSpecialSequence(DOUBLE_BUG, DOUBLE_BUG); 296 tokenizer.setSource(source); 297 298 int index = 0; 300 301 while (tokenizer.hasMoreToken()) { 302 Token token = tokenizer.nextToken(); 303 boolean isOK; 304 305 switch (token.getType()) { 306 case Token.NORMAL: 307 System.out.println(token.getImage()); 308 break; 309 case Token.SPECIAL_SEQUENCE: 310 assertTrue( "Index " + index + ": expected \"" + expectedToken[index] + "\", got \"" + token.getImage() + "\".", 311 expectedToken[index].equals(token.getImage())); 312 index++; 313 break; 314 } 315 } 316 } finally { 317 tokenizer.close(); 318 } 319 } 320 321 322 326 public void testEmptySource() throws Throwable { 327 TokenizerSource source = getSource(""); 328 TokenizerProperties props = new StandardTokenizerProperties(); 329 Tokenizer tokenizer = getTokenizer(props); 330 Token token; 331 332 try { 333 props.setParseFlags(Flags.F_RETURN_WHITESPACES); 334 props.addLineComment("//"); 335 tokenizer.setSource(source); 336 337 assertTrue(tokenizer.hasMoreToken()); 338 token = tokenizer.nextToken(); 339 assertTrue(token.getType() == Token.EOF); 340 assertTrue( ! tokenizer.hasMoreToken()); 341 } finally { 342 tokenizer.close(); 343 } 344 } 345 346 347 350 public void testSmallSource() throws Throwable { 351 TokenizerProperties props = new StandardTokenizerProperties(); 352 Tokenizer tokenizer = getTokenizer(props); 353 Token token; 354 355 try { 356 props.setParseFlags(Flags.F_RETURN_WHITESPACES); 357 props.addLineComment("//"); 358 props.addSpecialSequence(PLUS, PLUS); 359 props.addSpecialSequence(DOUBLE_PLUS, DOUBLE_PLUS); 360 props.addSpecialSequence(MINUS, MINUS); 361 props.addSpecialSequence(DOUBLE_MINUS, DOUBLE_MINUS); 362 363 char[] contents = new char[8192]; 365 int bytes; 366 367 tokenizer.setSource(getSource("A")); 368 369 assertTrue(tokenizer.hasMoreToken()); 370 token = tokenizer.nextToken(); 371 assertTrue(token.getType() == Token.NORMAL); 372 assertTrue(token.getImage().equals("A")); 373 assertTrue(tokenizer.hasMoreToken()); 374 token = tokenizer.nextToken(); 375 assertTrue(token.getType() == Token.EOF); 376 assertTrue( ! tokenizer.hasMoreToken()); 377 378 tokenizer.setSource(getSource("++")); 380 381 assertTrue(tokenizer.hasMoreToken()); 382 token = tokenizer.nextToken(); 383 assertTrue(token.getType() == Token.SPECIAL_SEQUENCE); 384 assertTrue(token.getCompanion() == DOUBLE_PLUS); 385 assertTrue(tokenizer.hasMoreToken()); 386 token = tokenizer.nextToken(); 387 assertTrue(token.getType() == Token.EOF); 388 assertTrue( ! tokenizer.hasMoreToken()); 389 390 tokenizer.setSource(getSource("//")); 392 393 assertTrue(tokenizer.hasMoreToken()); 394 token = tokenizer.nextToken(); 395 assertTrue(token.getType() == Token.LINE_COMMENT); 396 assertTrue(token.getImage().equals("//")); 397 assertTrue(tokenizer.hasMoreToken()); 398 token = tokenizer.nextToken(); 399 assertTrue(token.getType() == Token.EOF); 400 assertTrue( ! tokenizer.hasMoreToken()); 401 402 } finally { 403 tokenizer.close(); 405 } 406 } 407 408 409 415 public void testEOFInLineComment() throws Throwable { 416 TokenizerSource source = getSource("// end of file occurs in line comment."); 417 TokenizerProperties props = new StandardTokenizerProperties(); 418 Tokenizer tokenizer = getTokenizer(props); 419 Token token; 420 421 try { 422 props.setParseFlags(Flags.F_RETURN_WHITESPACES); 423 props.addLineComment("//"); 424 tokenizer.setSource(source); 425 426 assertTrue(tokenizer.hasMoreToken()); 427 token = tokenizer.nextToken(); 428 assertTrue(token.getType() == Token.LINE_COMMENT); 429 assertTrue(tokenizer.hasMoreToken()); 430 token = tokenizer.nextToken(); 431 assertTrue(token.getType() == Token.EOF); 432 } finally { 433 tokenizer.close(); 435 } 436 } 437 438 442 public void testEOFInBlockComment() throws Throwable { 443 TokenizerSource source = getSource("/* end of file occurs\nin a block comment."); 444 TokenizerProperties props = new StandardTokenizerProperties(); 445 Tokenizer tokenizer = getTokenizer(props); 446 Token token; 447 448 try { 449 props.setParseFlags(Flags.F_RETURN_WHITESPACES); 450 props.addBlockComment("/*", "*/"); 451 tokenizer.setSource(source); 452 453 assertTrue(tokenizer.hasMoreToken()); 454 token = tokenizer.nextToken(); 455 assertTrue(token.getType() == Token.BLOCK_COMMENT); 456 assertTrue(tokenizer.hasMoreToken()); 457 token = tokenizer.nextToken(); 458 assertTrue(token.getType() == Token.EOF); 459 } finally { 460 tokenizer.close(); 462 } 463 } 464 465 469 public void testEOFInString() throws Throwable { 470 TokenizerSource source = getSource("-- end of file in String\n\"Thats the string, but rather unterminated |-("); 471 TokenizerProperties props = new StandardTokenizerProperties(); 472 Tokenizer tokenizer = getTokenizer(props); 473 Token token; 474 475 try { 476 props.addLineComment("--"); 477 props.addString("\"", "\"", "\""); 478 tokenizer.setSource(source); 479 480 assertTrue(tokenizer.hasMoreToken()); 481 token = tokenizer.nextToken(); 482 assertTrue(token.getType() == Token.STRING); 483 assertTrue(tokenizer.hasMoreToken()); 484 token = tokenizer.nextToken(); 485 assertTrue(token.getType() == Token.EOF); 486 } finally { 487 tokenizer.close(); 489 } 490 } 491 492 495 public void testSpecialCalls() throws Throwable { 496 TokenizerSource source = getSource("A simple text"); 497 TokenizerProperties props = new StandardTokenizerProperties(); 498 Tokenizer tokenizer = getTokenizer(props); 499 Token token = null; 500 501 try { 502 tokenizer.setSource(source); 503 504 try { 505 tokenizer.currentToken(); 506 assertTrue("Tokenizer should have thrown an exception here.", false); 507 } catch (TokenizerException ex) {}; 508 try { 509 tokenizer.currentImage(); 510 assertTrue("Tokenizer should have thrown an exception here.", false); 511 } catch (TokenizerException ex) {}; 512 513 while (tokenizer.hasMoreToken()) { 514 Token newToken = tokenizer.nextToken(); 515 assertTrue( ! tokenizer.currentToken().equals(token)); 516 assertTrue(tokenizer.currentToken() != null); 517 assertTrue(tokenizer.currentToken().equals(newToken)); 518 assertTrue(tokenizer.currentToken().equals(tokenizer.currentToken())); 519 if (newToken.getType() != Token.EOF) { 520 assertTrue(tokenizer.currentImage() != null); 521 assertTrue(tokenizer.currentImage().equals(tokenizer.currentImage())); 522 } else { 523 assertTrue( ! tokenizer.hasMoreToken()); 524 } 525 token = newToken; 526 } 527 } finally { 528 tokenizer.close(); 530 } 531 } 532 533 540 public void testStringEscapes1() throws Throwable { 541 TokenizerSource source = getSource( 542 "\"String escape \\\" in the middle\"\n" 543 + "\"String escape on end \\\"\"\n" 544 + "\"\\\" String escape on begin\"\n" 545 + "\"Two string escapes \\\"\\\" after each other\"\n" 546 + "\"Two string escapes on end \\\"\\\"\"\n"); 547 548 int lines = 5; 549 TokenizerProperties props = new StandardTokenizerProperties(); 550 Tokenizer tokenizer = getTokenizer(props); 551 Token token; 552 553 try { 554 props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES); 555 props.addString("\"", "\"", "\\"); 556 tokenizer.setSource(source); 557 558 for (int line = 0; line < lines; ++line) { 559 assertTrue("(1) No more token at line " + line, tokenizer.hasMoreToken()); 560 token = tokenizer.nextToken(); 561 assertTrue("String not recognized at line " + line, token.getType() == Token.STRING); 562 assertTrue("(2) No more token at line " + line, tokenizer.hasMoreToken()); 563 token = tokenizer.nextToken(); 564 assertTrue("Newline not recognized as whitespace at line " + line, token.getType() == Token.WHITESPACE); 565 } 566 assertTrue(tokenizer.hasMoreToken()); 567 token = tokenizer.nextToken(); 568 assertTrue(token.getType() == Token.EOF); 569 } finally { 570 tokenizer.close(); 572 } 573 } 574 575 582 public void testStringEscapes2() throws Throwable { 583 TokenizerSource source = getSource( 584 "'String escape '' in the middle'\n" 585 + "'String escape on end '''\n" 586 + "''' String escape on begin'\n" 587 + "'Two string escapes '''' after each other'\n" 588 + "'Two string escapes on end '''''\n"); 589 590 int lines = 5; 591 TokenizerProperties props = new StandardTokenizerProperties(); 592 Tokenizer tokenizer = getTokenizer(props); 593 Token token; 594 595 try { 596 props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES); 597 props.addString("'", "'", "'"); 598 tokenizer.setSource(source); 599 600 for (int line = 0; line < lines; ++line) { 601 assertTrue("(1) No more token at line " + line, tokenizer.hasMoreToken()); 602 token = tokenizer.nextToken(); 603 assertTrue("String not recognized at line " + line, token.getType() == Token.STRING); 604 assertTrue("(2) No more token at line " + line, tokenizer.hasMoreToken()); 605 token = tokenizer.nextToken(); 606 assertTrue("Newline not recognized as whitespace at line " + line, token.getType() == Token.WHITESPACE); 607 } 608 assertTrue(tokenizer.hasMoreToken()); 609 token = tokenizer.nextToken(); 610 assertTrue(token.getType() == Token.EOF); 611 } finally { 612 tokenizer.close(); 614 } 615 } 616 617 620 public void testNestedComments() throws Throwable { 621 TokenizerSource source = getSource( 622 "// line comment including // line comment sequence\n" 623 + "/* block comment with\n" 624 + " /* a nested block\n" 625 + " comment\n" 626 + " */\n" 627 + " normal token or not ?\n" 628 + "*/\n" 629 + "// line comment with /* block comment */\n" 630 + "'a string with // line comment'\n" 631 + "'a string with /* block comment */'\n"); 632 633 int lines = 10; 634 TokenizerProperties props = new StandardTokenizerProperties(); 635 Tokenizer tokenizer = getTokenizer(props); 636 Token token; 637 638 try { 639 props.setParseFlags(Flags.F_RETURN_WHITESPACES 640 | Flags.F_COUNT_LINES 641 | Flags.F_ALLOW_NESTED_COMMENTS); 642 props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT); 643 props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END); 644 props.addString("'", "'", "'"); 645 tokenizer.setSource(source); 646 647 assertTrue(tokenizer.hasMoreToken()); 649 token = tokenizer.nextToken(); 650 assertTrue("(1) line comment not recognized", token.getType() == Token.LINE_COMMENT); 651 assertTrue("(2) wrong start position " + token.getStartPosition(), token.getStartPosition() == 0); 652 assertTrue("(3) wrong start line " + token.getStartLine(), token.getStartLine() == 0); 653 assertTrue("(4) wrong start column" + token.getStartColumn(), token.getStartColumn() == 0); 654 assertTrue("(5) wrong end line " + token.getEndLine(), token.getEndLine() == token.getStartLine() + 1); 655 assertTrue("(6) wrong end column" + token.getEndColumn(), token.getEndColumn() == 0); 656 657 assertTrue(tokenizer.hasMoreToken()); 659 token = tokenizer.nextToken(); 660 assertTrue("(10) block comment not recognized", token.getType() == Token.BLOCK_COMMENT); 661 assertTrue("(11) wrong start line " + token.getStartLine(), token.getStartLine() == 1); 662 assertTrue("(12) wrong start column" + token.getStartColumn(), token.getStartColumn() == 0); 663 assertTrue("(13) wrong end line " + token.getEndLine(), token.getEndLine() == token.getStartLine() + 5); 664 assertTrue("(14) wrong end column" + token.getEndColumn(), token.getEndColumn() == 2); 665 assertTrue(tokenizer.hasMoreToken()); 666 token = tokenizer.nextToken(); 667 assertTrue("(15) newline behind block comment not recognized as whitespace", token.getType() == Token.WHITESPACE); 668 assertTrue("(16) newline behind block comment not recognized as literal", tokenizer.currentImage().equals("\n")); 669 670 assertTrue(tokenizer.hasMoreToken()); 672 token = tokenizer.nextToken(); 673 assertTrue("(21) line comment not recognized", token.getType() == Token.LINE_COMMENT); 674 assertTrue("(22) wrong start line " + token.getStartLine(), token.getStartLine() == 7); 675 assertTrue("(23) wrong end line " + token.getEndLine(), token.getEndLine() == token.getStartLine() + 1); 676 677 assertTrue(tokenizer.hasMoreToken()); 679 token = tokenizer.nextToken(); 680 assertTrue("(31) string not recognized", token.getType() == Token.STRING); 681 assertTrue("(32) wrong start line " + token.getStartLine(), token.getStartLine() == 8); 682 assertTrue("(33) wrong start column" + token.getStartColumn(), token.getStartColumn() == 0); 683 assertTrue("(34) wrong end line " + token.getEndLine(), token.getEndLine() == token.getStartLine()); 684 assertTrue(tokenizer.hasMoreToken()); 685 token = tokenizer.nextToken(); 686 assertTrue("(35) newline behind string not recognized as whitespace", token.getType() == Token.WHITESPACE); 687 assertTrue("(36) newline behind string not recognized as literal", tokenizer.currentImage().equals("\n")); 688 689 assertTrue(tokenizer.hasMoreToken()); 691 token = tokenizer.nextToken(); 692 assertTrue("(41) string not recognized", token.getType() == Token.STRING); 693 assertTrue("(42) wrong start line " + token.getStartLine(), token.getStartLine() == 9); 694 assertTrue("(43) wrong start column" + token.getStartColumn(), token.getStartColumn() == 0); 695 assertTrue("(44) wrong end line " + token.getEndLine(), token.getEndLine() == token.getStartLine()); 696 assertTrue(tokenizer.hasMoreToken()); 697 token = tokenizer.nextToken(); 698 assertTrue("(45) newline behind string not recognized as whitespace", token.getType() == Token.WHITESPACE); 699 assertTrue("(46) newline behind string not recognized as literal", tokenizer.currentImage().equals("\n")); 700 701 token = tokenizer.nextToken(); 703 assertTrue(token.getType() == Token.EOF); 704 705 } finally { 706 tokenizer.close(); 708 } 709 } 710 711 712 715 public void testReaderSwitching() throws Throwable { 716 TokenizerSource source1 = getSource("0/2 4/6 8/10"); 717 TokenizerSource source2 = getSource("0/2 4/6 8/10"); 718 TokenizerSource source3 = getSource("0/2 4/6 8/10"); 719 TokenizerSource[] sources = { source1, source2, source3 }; 720 721 TokenizerProperties props = new StandardTokenizerProperties(); 722 Tokenizer tokenizer = getTokenizer(props); 723 Token token; 724 725 try { 726 for (int sourceIndex = 0; sourceIndex < sources.length; ++sourceIndex) { 727 tokenizer.setSource(sources[sourceIndex]); 728 for (int ii = 0; ii <= 8; ii += 4) { 729 assertTrue(tokenizer.hasMoreToken()); 730 token = tokenizer.nextToken(); 731 assertTrue("Wrong start position " + token.getStartPosition(), token.getStartPosition() == ii); 732 assertTrue("Wrong type " + token.getType(), token.getType() == Token.NORMAL); 733 assertTrue("Token not recognized as literal", tokenizer.currentImage().equals(Integer.toString(ii))); 734 assertTrue(tokenizer.hasMoreToken()); 735 token = tokenizer.nextToken(); 736 assertTrue("Wrong start position " + token.getStartPosition(), token.getStartPosition() == ii + 1); 737 assertTrue("Wrong type " + token.getType(), token.getType() == Token.SEPARATOR); 738 assertTrue("Separator not recognized as literal", tokenizer.currentImage().equals("/")); 739 assertTrue(tokenizer.hasMoreToken()); 740 token = tokenizer.nextToken(); 741 assertTrue("Wrong start position " + token.getStartPosition(), token.getStartPosition() == ii + 2); 742 assertTrue("Wrong type " + token.getType(), token.getType() == Token.NORMAL); 743 assertTrue("Token not recognized as literal", tokenizer.currentImage().equals(Integer.toString(ii + 2))); 744 } 745 } 746 } finally { 747 tokenizer.close(); 749 } 750 } 751 752 753 756 public void testDOSEOL() throws Throwable { 757 TokenizerSource source = getSource( 758 "// line comment with DOS line ending\r\n" 759 + "void main(int argc)\r\n" 760 + "{\r\n" 761 + " // another line comment\r\n" 762 + " /* a block comment\r\n" 763 + " with more than one line\r\n" 764 + " */\r\n" 765 + "}\r\n"); 766 767 int lines = 8; 768 TokenizerProperties props = new StandardTokenizerProperties(); 769 Tokenizer tokenizer = getTokenizer(props); 770 Token token; 771 772 try { 773 props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES); 774 props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT); 775 props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END); 776 props.addString("\"", "\"", "\\"); 777 tokenizer.setSource(source); 778 779 assertTrue(tokenizer.hasMoreToken()); 781 token = tokenizer.nextToken(); 782 assertTrue("(1) line comment not recognized", token.getType() == Token.LINE_COMMENT); 783 assertTrue("(2) start line wrong", token.getStartLine() == 0); 784 assertTrue("(3) start column wrong", token.getStartColumn() == 0); 785 assertTrue("(4) end line wrong", token.getEndLine() == 1); 786 assertTrue("(5) end column wrong", token.getEndColumn() == 0); 787 788 assertTrue(tokenizer.hasMoreToken()); 790 token = tokenizer.nextToken(); 791 assertTrue("(10) token \"void\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("void")); 792 assertTrue("(11) start line wrong", token.getStartLine() == 1); 793 assertTrue("(12) start column wrong", token.getStartColumn() == 0); 794 assertTrue("(13) end line wrong", token.getEndLine() == 1); 795 assertTrue("(14) end column wrong", token.getEndColumn() == 4); 796 797 assertTrue(tokenizer.hasMoreToken()); 798 token = tokenizer.nextToken(); 799 assertTrue("(15) whitespace not recognized", token.getType() == Token.WHITESPACE); 800 801 assertTrue(tokenizer.hasMoreToken()); 803 token = tokenizer.nextToken(); 804 assertTrue("(20) token \"main\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("main")); 805 assertTrue("(21) start line wrong", token.getStartLine() == 1); 806 assertTrue("(22) start column wrong", token.getStartColumn() == 5); 807 assertTrue("(23) end line wrong", token.getEndLine() == 1); 808 assertTrue("(24) end column wrong", token.getEndColumn() == 9); 809 810 assertTrue(tokenizer.hasMoreToken()); 812 token = tokenizer.nextToken(); 813 assertTrue("(30) token \"(\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("(")); 814 assertTrue("(31) start line wrong", token.getStartLine() == 1); 815 assertTrue("(32) start column wrong", token.getStartColumn() == 9); 816 assertTrue("(33) end line wrong", token.getEndLine() == 1); 817 assertTrue("(34) end column wrong", token.getEndColumn() == 10); 818 819 assertTrue(tokenizer.hasMoreToken()); 821 token = tokenizer.nextToken(); 822 assertTrue("(40) token \"int\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("int")); 823 assertTrue("(41) start line wrong", token.getStartLine() == 1); 824 assertTrue("(42) start column wrong", token.getStartColumn() == 10); 825 assertTrue("(43) end line wrong", token.getEndLine() == 1); 826 assertTrue("(44) end column wrong", token.getEndColumn() == 13); 827 828 assertTrue(tokenizer.hasMoreToken()); 829 token = tokenizer.nextToken(); 830 assertTrue("(45) whitespace not recognized", token.getType() == Token.WHITESPACE); 831 832 assertTrue(tokenizer.hasMoreToken()); 834 token = tokenizer.nextToken(); 835 assertTrue("(50) token \"argc\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("argc")); 836 assertTrue("(51) start line wrong", token.getStartLine() == 1); 837 assertTrue("(52) start column wrong", token.getStartColumn() == 14); 838 assertTrue("(53) end line wrong", token.getEndLine() == 1); 839 assertTrue("(54) end column wrong", token.getEndColumn() == 18); 840 841 assertTrue(tokenizer.hasMoreToken()); 843 token = tokenizer.nextToken(); 844 assertTrue("(60) token \")\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals(")")); 845 assertTrue("(61) start line wrong", token.getStartLine() == 1); 846 assertTrue("(62) start column wrong", token.getStartColumn() == 18); 847 assertTrue("(63) end line wrong", token.getEndLine() == 1); 848 assertTrue("(64) end column wrong", token.getEndColumn() == 19); 849 850 assertTrue(tokenizer.hasMoreToken()); 852 token = tokenizer.nextToken(); 853 assertTrue("(60) token \"\\r\\n\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r\n")); 854 assertTrue("(61) start line wrong", token.getStartLine() == 1); 855 assertTrue("(62) start column wrong", token.getStartColumn() == 19); 856 assertTrue("(63) end line wrong", token.getEndLine() == 2); 857 assertTrue("(64) end column wrong", token.getEndColumn() == 0); 858 assertTrue("(65) wrong length", token.getLength() == 2); 859 860 assertTrue(tokenizer.hasMoreToken()); 862 token = tokenizer.nextToken(); 863 assertTrue("(70) token \"{\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("{")); 864 assertTrue("(71) start line wrong", token.getStartLine() == 2); 865 assertTrue("(72) start column wrong", token.getStartColumn() == 0); 866 assertTrue("(73) end line wrong", token.getEndLine() == 2); 867 assertTrue("(74) end column wrong", token.getEndColumn() == 1); 868 869 assertTrue(tokenizer.hasMoreToken()); 871 token = tokenizer.nextToken(); 872 assertTrue("(80) token \"\\r\\n \" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r\n ")); 873 assertTrue("(81) start line wrong", token.getStartLine() == 2); 874 assertTrue("(82) start column wrong", token.getStartColumn() == 1); 875 assertTrue("(83) end line wrong", token.getEndLine() == 3); 876 assertTrue("(84) end column wrong", token.getEndColumn() == 2); 877 assertTrue("(85) wrong length", token.getLength() == 4); 878 879 assertTrue(tokenizer.hasMoreToken()); 881 token = tokenizer.nextToken(); 882 assertTrue("(91) line comment not recognized", token.getType() == Token.LINE_COMMENT); 883 assertTrue("(92) start line wrong", token.getStartLine() == 3); 884 assertTrue("(93) start column wrong", token.getStartColumn() == 2); 885 assertTrue("(94) end line wrong", token.getEndLine() == 4); 886 assertTrue("(95) end column wrong", token.getEndColumn() == 0); 887 888 assertTrue(tokenizer.hasMoreToken()); 889 token = tokenizer.nextToken(); 890 assertTrue("(96) whitespace not recognized", token.getType() == Token.WHITESPACE); 891 892 assertTrue(tokenizer.hasMoreToken()); 894 token = tokenizer.nextToken(); 895 assertTrue("(101) block comment not recognized", token.getType() == Token.BLOCK_COMMENT); 896 assertTrue("(102) start line wrong", token.getStartLine() == 4); 897 assertTrue("(103) start column wrong", token.getStartColumn() == 2); 898 assertTrue("(104) end line wrong", token.getEndLine() == 6); 899 assertTrue("(105) end column wrong", token.getEndColumn() == 4); 900 901 assertTrue(tokenizer.hasMoreToken()); 903 token = tokenizer.nextToken(); 904 assertTrue("(110) token \"\\r\\n\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r\n")); 905 assertTrue("(111) start line wrong", token.getStartLine() == 6); 906 assertTrue("(112) start column wrong", token.getStartColumn() == 4); 907 assertTrue("(113) end line wrong", token.getEndLine() == 7); 908 assertTrue("(114) end column wrong", token.getEndColumn() == 0); 909 assertTrue("(115) wrong length", token.getLength() == 2); 910 911 assertTrue(tokenizer.hasMoreToken()); 913 token = tokenizer.nextToken(); 914 assertTrue("(120) token \"}\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("}")); 915 assertTrue("(121) start line wrong", token.getStartLine() == 7); 916 assertTrue("(122) start column wrong", token.getStartColumn() == 0); 917 assertTrue("(123) end line wrong", token.getEndLine() == 7); 918 assertTrue("(124) end column wrong", token.getEndColumn() == 1); 919 920 assertTrue(tokenizer.hasMoreToken()); 922 token = tokenizer.nextToken(); 923 assertTrue("(130) token \"\\r\\n\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r\n")); 924 assertTrue("(131) start line wrong", token.getStartLine() == 7); 925 assertTrue("(132) start column wrong", token.getStartColumn() == 1); 926 assertTrue("(133) end line wrong", token.getEndLine() == 8); 927 assertTrue("(134) end column wrong", token.getEndColumn() == 0); 928 assertTrue("(135) wrong length", token.getLength() == 2); 929 930 } finally { 931 tokenizer.close(); 933 } 934 } 935 936 939 public void testMACEOL() throws Throwable { 940 TokenizerSource source = getSource( 941 "// line comment with DOS line ending\r" 942 + "void main(int argc)\r" 943 + "{\r" 944 + " // another line comment\r" 945 + " /* a block comment\r" 946 + " with more than one line\r" 947 + " */\r" 948 + "}\r"); 949 950 int lines = 8; 951 TokenizerProperties props = new StandardTokenizerProperties(); 952 Tokenizer tokenizer = getTokenizer(props); 953 Token token; 954 955 try { 956 props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES); 957 props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT); 958 props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END); 959 props.addString("\"", "\"", "\\"); 960 tokenizer.setSource(source); 961 962 assertTrue(tokenizer.hasMoreToken()); 964 token = tokenizer.nextToken(); 965 assertTrue("(1) line comment not recognized", token.getType() == Token.LINE_COMMENT); 966 assertTrue("(2) start line wrong", token.getStartLine() == 0); 967 assertTrue("(3) start column wrong", token.getStartColumn() == 0); 968 assertTrue("(4) end line wrong", token.getEndLine() == 1); 969 assertTrue("(5) end column wrong", token.getEndColumn() == 0); 970 971 assertTrue(tokenizer.hasMoreToken()); 973 token = tokenizer.nextToken(); 974 assertTrue("(10) token \"void\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("void")); 975 assertTrue("(11) start line wrong", token.getStartLine() == 1); 976 assertTrue("(12) start column wrong", token.getStartColumn() == 0); 977 assertTrue("(13) end line wrong", token.getEndLine() == 1); 978 assertTrue("(14) end column wrong", token.getEndColumn() == 4); 979 980 assertTrue(tokenizer.hasMoreToken()); 981 token = tokenizer.nextToken(); 982 assertTrue("(15) whitespace not recognized", token.getType() == Token.WHITESPACE); 983 984 assertTrue(tokenizer.hasMoreToken()); 986 token = tokenizer.nextToken(); 987 assertTrue("(20) token \"main\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("main")); 988 assertTrue("(21) start line wrong", token.getStartLine() == 1); 989 assertTrue("(22) start column wrong", token.getStartColumn() == 5); 990 assertTrue("(23) end line wrong", token.getEndLine() == 1); 991 assertTrue("(24) end column wrong", token.getEndColumn() == 9); 992 993 assertTrue(tokenizer.hasMoreToken()); 995 token = tokenizer.nextToken(); 996 assertTrue("(30) token \"(\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("(")); 997 assertTrue("(31) start line wrong", token.getStartLine() == 1); 998 assertTrue("(32) start column wrong", token.getStartColumn() == 9); 999 assertTrue("(33) end line wrong", token.getEndLine() == 1); 1000 assertTrue("(34) end column wrong", token.getEndColumn() == 10); 1001 1002 assertTrue(tokenizer.hasMoreToken()); 1004 token = tokenizer.nextToken(); 1005 assertTrue("(40) token \"int\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("int")); 1006 assertTrue("(41) start line wrong", token.getStartLine() == 1); 1007 assertTrue("(42) start column wrong", token.getStartColumn() == 10); 1008 assertTrue("(43) end line wrong", token.getEndLine() == 1); 1009 assertTrue("(44) end column wrong", token.getEndColumn() == 13); 1010 1011 assertTrue(tokenizer.hasMoreToken()); 1012 token = tokenizer.nextToken(); 1013 assertTrue("(45) whitespace not recognized", token.getType() == Token.WHITESPACE); 1014 1015 assertTrue(tokenizer.hasMoreToken()); 1017 token = tokenizer.nextToken(); 1018 assertTrue("(50) token \"argc\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("argc")); 1019 assertTrue("(51) start line wrong", token.getStartLine() == 1); 1020 assertTrue("(52) start column wrong", token.getStartColumn() == 14); 1021 assertTrue("(53) end line wrong", token.getEndLine() == 1); 1022 assertTrue("(54) end column wrong", token.getEndColumn() == 18); 1023 1024 assertTrue(tokenizer.hasMoreToken()); 1026 token = tokenizer.nextToken(); 1027 assertTrue("(60) token \")\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals(")")); 1028 assertTrue("(61) start line wrong", token.getStartLine() == 1); 1029 assertTrue("(62) start column wrong", token.getStartColumn() == 18); 1030 assertTrue("(63) end line wrong", token.getEndLine() == 1); 1031 assertTrue("(64) end column wrong", token.getEndColumn() == 19); 1032 1033 assertTrue(tokenizer.hasMoreToken()); 1035 token = tokenizer.nextToken(); 1036 assertTrue("(60) token \"\\r\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r")); 1037 assertTrue("(61) start line wrong", token.getStartLine() == 1); 1038 assertTrue("(62) start column wrong", token.getStartColumn() == 19); 1039 assertTrue("(63) end line wrong", token.getEndLine() == 2); 1040 assertTrue("(64) end column wrong", token.getEndColumn() == 0); 1041 assertTrue("(65) wrong length", token.getLength() == 1); 1042 1043 assertTrue(tokenizer.hasMoreToken()); 1045 token = tokenizer.nextToken(); 1046 assertTrue("(70) token \"{\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("{")); 1047 assertTrue("(71) start line wrong", token.getStartLine() == 2); 1048 assertTrue("(72) start column wrong", token.getStartColumn() == 0); 1049 assertTrue("(73) end line wrong", token.getEndLine() == 2); 1050 assertTrue("(74) end column wrong", token.getEndColumn() == 1); 1051 1052 assertTrue(tokenizer.hasMoreToken()); 1054 token = tokenizer.nextToken(); 1055 assertTrue("(80) token \"\\r \" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r ")); 1056 assertTrue("(81) start line wrong", token.getStartLine() == 2); 1057 assertTrue("(82) start column wrong", token.getStartColumn() == 1); 1058 assertTrue("(83) end line wrong", token.getEndLine() == 3); 1059 assertTrue("(84) end column wrong", token.getEndColumn() == 2); 1060 assertTrue("(85) wrong length", token.getLength() == 3); 1061 1062 assertTrue(tokenizer.hasMoreToken()); 1064 token = tokenizer.nextToken(); 1065 assertTrue("(91) line comment not recognized", token.getType() == Token.LINE_COMMENT); 1066 assertTrue("(92) start line wrong", token.getStartLine() == 3); 1067 assertTrue("(93) start column wrong", token.getStartColumn() == 2); 1068 assertTrue("(94) end line wrong", token.getEndLine() == 4); 1069 assertTrue("(95) end column wrong", token.getEndColumn() == 0); 1070 1071 assertTrue(tokenizer.hasMoreToken()); 1072 token = tokenizer.nextToken(); 1073 assertTrue("(96) whitespace not recognized", token.getType() == Token.WHITESPACE); 1074 1075 assertTrue(tokenizer.hasMoreToken()); 1077 token = tokenizer.nextToken(); 1078 assertTrue("(101) block comment not recognized", token.getType() == Token.BLOCK_COMMENT); 1079 assertTrue("(102) start line wrong", token.getStartLine() == 4); 1080 assertTrue("(103) start column wrong", token.getStartColumn() == 2); 1081 assertTrue("(104) end line wrong", token.getEndLine() == 6); 1082 assertTrue("(105) end column wrong", token.getEndColumn() == 4); 1083 1084 assertTrue(tokenizer.hasMoreToken()); 1086 token = tokenizer.nextToken(); 1087 assertTrue("(110) token \"\\r\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r")); 1088 assertTrue("(111) start line wrong", token.getStartLine() == 6); 1089 assertTrue("(112) start column wrong", token.getStartColumn() == 4); 1090 assertTrue("(113) end line wrong", token.getEndLine() == 7); 1091 assertTrue("(114) end column wrong", token.getEndColumn() == 0); 1092 assertTrue("(115) wrong length", token.getLength() == 1); 1093 1094 assertTrue(tokenizer.hasMoreToken()); 1096 token = tokenizer.nextToken(); 1097 assertTrue("(120) token \"}\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("}")); 1098 assertTrue("(121) start line wrong", token.getStartLine() == 7); 1099 assertTrue("(122) start column wrong", token.getStartColumn() == 0); 1100 assertTrue("(123) end line wrong", token.getEndLine() == 7); 1101 assertTrue("(124) end column wrong", token.getEndColumn() == 1); 1102 1103 assertTrue(tokenizer.hasMoreToken()); 1105 token = tokenizer.nextToken(); 1106 assertTrue("(130) token \"\\r\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r")); 1107 assertTrue("(131) start line wrong", token.getStartLine() == 7); 1108 assertTrue("(132) start column wrong", token.getStartColumn() == 1); 1109 assertTrue("(133) end line wrong", token.getEndLine() == 8); 1110 assertTrue("(134) end column wrong", token.getEndColumn() == 0); 1111 assertTrue("(135) wrong length", token.getLength() == 1); 1112 1113 } finally { 1114 tokenizer.close(); 1116 } 1117 } 1118 1119 1122 public void testLineCounting() throws Throwable { 1123 TokenizerSource source = getSource( 1124 "01234 67 9\r\n" 1125 + "0 2 4 6 8\r" 1126 + " 1 3 5 7 9\n" 1127 + "01 34 67 9\n" 1128 + "/* block comment\n" 1129 + " in three lines\r\n" 1130 + "*/\n" 1131 + "// line comment 1\r" 1132 + "// line comment 2\r\n" 1133 + "// line comment 3\n" 1134 + "abc // line comment 1\r" 1135 + "01 34 67 // line comment 2\r\n" 1136 + "/* block comment */ // line comment 3\n"); 1137 1138 int[] expectedLines = { 1139 0, 0, 0, 1140 1, 1, 1, 1, 1, 1141 2, 2, 2, 2, 2, 1142 3, 3, 3, 3, 1143 4, 1144 7, 1145 8, 1146 9, 1147 10, 10, 1148 11, 11, 11, 11, 1149 12, 12 1150 }; 1151 int[] expectedColumns = { 1152 0, 6, 9, 1153 0, 2, 4, 6, 8, 1154 1, 3, 5, 7, 9, 1155 0, 3, 6, 9, 1156 0, 1157 0, 1158 0, 1159 0, 1160 0, 4, 1161 0, 3, 6, 9, 1162 0, 20 1163 }; 1164 1165 TokenizerProperties props = new StandardTokenizerProperties(); 1166 Tokenizer tokenizer = getTokenizer(props); 1167 Token token1; 1168 Token token2; 1169 int line = 0; 1170 int column = 0; 1171 int index = 0; 1172 1173 try { 1174 props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES); 1175 props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT); 1176 props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END); 1177 tokenizer.setSource(source); 1178 1179 while (tokenizer.hasMoreToken()) { 1180 token1 = tokenizer.nextToken(); 1181 assertTrue("Wrong line/column " + token1.getStartLine() + "/" + token1.getStartColumn(), 1182 token1.getStartLine() == line && token1.getStartColumn() == column); 1183 1184 tokenizer.setReadPositionRelative(-token1.getLength()); 1185 token2 = tokenizer.nextToken(); 1186 assertTrue("Wrong line/column " + token2.getStartLine() + "/" + token2.getStartColumn(), 1187 token2.getStartLine() == line && token2.getStartColumn() == column); 1188 1189 assertTrue("Token mismatch:\n " + token1 + "\n " + token2, token1.equals(token2)); 1190 1191 line = token1.getEndLine(); 1192 column = token1.getEndColumn(); 1193 1194 if (token1.getType() != Token.WHITESPACE && token1.getType() != Token.EOF) { 1196 assertTrue("Expected line " + expectedLines[index] + ", found " + token1.getStartLine(), 1197 token1.getStartLine() == expectedLines[index]); 1198 assertTrue("Expected column " + expectedColumns[index] + ", found " + token1.getStartColumn(), 1199 token1.getStartColumn() == expectedColumns[index]); 1200 index++; 1201 } 1202 } 1203 } finally { 1204 tokenizer.close(); 1206 } 1207 } 1208 1209 1213 public void testUncommonWhitespaces() throws Throwable { 1214 String data = 1215 "This text has spaces\r" 1216 + "and newlines. Depending on the flags\n" 1217 + "the spaces are considered as special sequences\r\n" 1218 + "or real\twhitespaces.\n\n" 1219 + "/** also included\r" 1220 + "* are line and block comments\r" 1221 + "*/\n" 1222 + "here comes // the line comment\n" 1223 + "// and another\n"; 1224 1225 TokenizerProperties props = new StandardTokenizerProperties(); 1226 Tokenizer tokenizer = getTokenizer(props); 1227 String [] ws = { "\r\n", " \t", " \t\n", " \t\r", " \n", " \r", "\t\r", "\t\n" }; 1228 int[] wsCount = { 5, 18, 22, 20, 21, 19, 3, 5 }; 1229 int[] seqCount = { 21, 7, 2, 5, 3, 6, 25, 22 }; 1230 int[] lineCount = { 10, 11, 11, 11, 11, 11, 11, 11 }; 1231 TokenizerProperty spaceProp = new TokenizerProperty(Token.SPECIAL_SEQUENCE, new String [] { " " } ); 1232 TokenizerProperty tabProp = new TokenizerProperty(Token.SPECIAL_SEQUENCE, new String [] { "\t" } ); 1233 TokenizerProperty lfProp = new TokenizerProperty(Token.SPECIAL_SEQUENCE, new String [] { "\n" } ); 1234 TokenizerProperty crProp = new TokenizerProperty(Token.SPECIAL_SEQUENCE, new String [] { "\r" } ); 1235 1236 try { 1237 props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES); 1238 props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT); 1239 props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END); 1240 props.addProperty(spaceProp); 1241 props.addProperty(tabProp); 1242 props.addProperty(lfProp); 1243 props.addProperty(crProp); 1244 1245 for (int ii = 0; ii < ws.length; ++ii) { 1246 int seqCounter = 0; 1247 int wsCounter = 0; 1248 1249 props.setWhitespaces(ws[ii]); 1250 tokenizer.setSource(getSource(data)); 1251 1252 System.out.println("Loop " + ii); 1253 while (tokenizer.hasMoreToken()) { 1254 Token token = tokenizer.nextToken(); 1255 1256 System.out.println(token.toString()); 1257 switch (token.getType()) { 1258 case Token.SPECIAL_SEQUENCE: 1259 seqCounter++; 1260 break; 1261 case Token.WHITESPACE: 1262 wsCounter++; 1263 break; 1264 case Token.EOF: 1265 assertTrue("Loop " + ii + ": Expected " + lineCount[ii] + " lines, got " + token.getEndLine(), 1266 lineCount[ii] == token.getEndLine()); 1267 break; 1268 } 1269 } 1270 assertTrue("Loop " + ii + ": Expected " + wsCount[ii] + " whitespaces, got " + wsCounter, 1271 wsCount[ii] == wsCounter); 1272 assertTrue("Loop " + ii + ": Expected " + seqCount[ii] + " special sequences, got " + seqCounter, 1273 seqCount[ii] == seqCounter); 1274 } 1275 } finally { 1276 tokenizer.close(); 1278 } 1279 } 1280 1281 1284 public void testWhitespaceHandling() throws Throwable { 1285 String data = 1286 "/* this is a block comment " 1287 + " followed by a newline (whitespace) sequence */\r\n" 1288 + "// a line comment\r\n" 1289 + "// another line comment\r\n" 1290 + " /* whitespaces with a block comment in between */ \n" 1291 + "// a EOF-terminated line comment"; 1292 1293 TokenizerProperties props = new StandardTokenizerProperties(); 1294 Tokenizer tokenizer = getTokenizer(props); 1295 int[] flags = { Flags.F_RETURN_BLOCK_COMMENTS, 1296 Flags.F_RETURN_LINE_COMMENTS, 1297 Flags.F_RETURN_BLOCK_COMMENTS + Flags.F_RETURN_LINE_COMMENTS, 1298 Flags.F_RETURN_WHITESPACES, 1299 Flags.F_RETURN_LINE_COMMENTS + Flags.F_RETURN_SIMPLE_WHITESPACES, 1300 Flags.F_RETURN_BLOCK_COMMENTS + Flags.F_RETURN_SIMPLE_WHITESPACES, 1301 Flags.F_RETURN_SIMPLE_WHITESPACES, 1302 0 1303 }; 1304 boolean[] propsFlag = { true, false }; 1305 1306 try { 1307 props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT); 1308 props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END); 1309 1310 for (int ii = 0; ii < propsFlag.length; ++ii) { 1311 for (int kk = 0; kk < flags.length; ++kk) { 1312 if (propsFlag[ii]) { 1313 props.setParseFlags(flags[kk]); 1314 } else { 1315 tokenizer.changeParseFlags(flags[kk], Flags.F_RETURN_WHITESPACES); 1316 } 1317 1318 tokenizer.setSource(getSource(data)); 1319 1320 System.out.println("Loop " + ii + "/" + kk); 1321 while (tokenizer.hasMoreToken()) { 1322 Token token = tokenizer.nextToken(); 1323 1324 System.out.println(token.toString()); 1325 switch (token.getType()) { 1326 case Token.BLOCK_COMMENT: 1327 assertTrue("Tokenizer returned a block comment without the flag set: " + tokenizer.currentImage(), 1328 (flags[kk] & Flags.F_RETURN_BLOCK_COMMENTS) != 0); 1329 break; 1330 case Token.LINE_COMMENT: 1331 assertTrue("Tokenizer returned a line comment without the flag set: " + tokenizer.currentImage(), 1332 (flags[kk] & Flags.F_RETURN_LINE_COMMENTS) != 0); 1333 break; 1334 case Token.WHITESPACE: 1335 assertTrue("Tokenizer returned a simple whitespace sequence without the flag set: " + tokenizer.currentImage(), 1336 (flags[kk] & Flags.F_RETURN_SIMPLE_WHITESPACES) != 0); 1337 break; 1338 } 1339 } 1340 } 1341 } 1342 } finally { 1343 tokenizer.close(); 1345 } 1346 } 1347 1348 1351 public void testSequencesAndSeparators() throws Throwable { 1352 String data = "(...::==:=: =====>==<=..()>>>>> >> >>>>)"; 1353 int[] expected = { Token.SEPARATOR, Token.SPECIAL_SEQUENCE, Token.SEPARATOR, Token.SEPARATOR, Token.SPECIAL_SEQUENCE, Token.SEPARATOR, Token.SPECIAL_SEQUENCE, Token.SEPARATOR, Token.SPECIAL_SEQUENCE, Token.SPECIAL_SEQUENCE, Token.SEPARATOR, Token.SPECIAL_SEQUENCE, Token.SEPARATOR, Token.SPECIAL_SEQUENCE, Token.SPECIAL_SEQUENCE, Token.SPECIAL_SEQUENCE, Token.SPECIAL_SEQUENCE, Token.SPECIAL_SEQUENCE, Token.SPECIAL_SEQUENCE, Token.SPECIAL_SEQUENCE, Token.SEPARATOR, Token.SEPARATOR, Token.EOF }; 1376 1377 TokenizerProperties props = new StandardTokenizerProperties(); 1378 Tokenizer tokenizer = getTokenizer(props); 1379 int count = 0; 1380 1381 try { 1382 props.addSpecialSequence(":="); 1383 props.addSpecialSequence(">="); 1384 props.addSpecialSequence("<="); 1385 props.addSpecialSequence("=="); 1386 props.addSpecialSequence(".."); 1387 props.addSpecialSequence("()"); 1388 props.addSpecialSequence(".."); 1389 props.addSpecialSequence(">>>"); 1390 props.addSpecialSequence(">>"); 1391 1392 tokenizer.setSource(getSource(data)); 1393 1394 while (tokenizer.hasMoreToken()) { 1395 Token token = tokenizer.nextToken(); 1396 1397 System.out.println(token.getImage()); 1398 assertTrue("Token #" + (count + 1) + ": expected type " + Token.getTypeName(expected[count]) + ", got " + Token.getTypeName(token.getType()), 1399 token.getType() == expected[count]); 1400 count++; 1401 } 1402 } finally { 1403 tokenizer.close(); 1404 } 1405 } 1406 1407 1408 1412 1415 private TokenizerSource getSource(String data) { 1416 try { 1417 return (TokenizerSource)_sourceClass.getConstructor( new Class [] { String .class } ).newInstance(new Object [] { data } ); 1418 } catch (Throwable ex) { 1419 return new ReaderSource(new StringReader (data)); 1420 } 1421 } 1422 1423 1427 private Tokenizer getTokenizer(TokenizerProperties props) throws Throwable { 1428 return new StandardTokenizer(props); 1429 } 1430 1431 1432 private Class _sourceClass; 1436} 1437 1438 | Popular Tags |