KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > susebox > jtopas > TestDifficultSituations


1 /*
2  * TestDifficultSituations.java: JUnit test for a Tokenizer
3  *
4  * Copyright (C) 2002 Heiko Blau
5  *
6  * This file belongs to the JTopas test suite.
7  * The JTopas test suite is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU Lesser General Public License as published by the
9  * Free Software Foundation; either version 2.1 of the License, or (at your option)
10  * any later version.
11  *
12  * This software is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.
15  * See the GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License along
18  * with the JTopas test suite. If not, write to the
19  *
20  * Free Software Foundation, Inc.
21  * 59 Temple Place, Suite 330,
22  * Boston, MA 02111-1307
23  * USA
24  *
25  * or check the Internet: http://www.fsf.org
26  *
27  * The JTopas test suite uses the test framework JUnit by Kent Beck and Erich Gamma.
28  * You should have received a copy of their JUnit licence agreement along with
29  * the JTopas test suite.
30  *
31  * We do NOT provide the JUnit archive junit.jar nessecary to compile and run
32  * our tests, since we assume, that You either have it already or would like
33  * to get the current release Yourself.
34  * Please visit either:
35  * http://sourceforge.net/projects/junit
36  * or
37  * http://junit.org
38  * to obtain JUnit.
39  *
40  * Contact:
41  * email: heiko@susebox.de
42  */

43
44 package de.susebox.jtopas;
45
46 //-----------------------------------------------------------------------------
47
// Imports
48
//
49
import java.lang.reflect.Constructor JavaDoc;
50 import java.io.Reader JavaDoc;
51 import java.io.StringReader JavaDoc;
52
53 import junit.framework.Test;
54 import junit.framework.TestCase;
55 import junit.framework.TestSuite;
56 import junit.framework.Assert;
57
58 import de.susebox.TestUtilities;
59
60
61 //-----------------------------------------------------------------------------
62
// Class TestDifficultSituations
63
//
64

65 /**<p>
66  * The class contains a number of test cases that are supposed to be difficult
67  * to handle for a {@link Tokenizer}, e.g. EOF conditions inside strings etc.
68  *</p>
69  *
70  * @see Tokenizer
71  * @see StandardTokenizer
72  * @see StandardTokenizerProperties
73  * @author Heiko Blau
74  */

75 public class TestDifficultSituations extends TestCase {
76   
77   //---------------------------------------------------------------------------
78
// properties
79
//
80

81   
82   //---------------------------------------------------------------------------
83
// main method
84
//
85

86   /**
87    * call this method to invoke the tests
88    */

89   public static void main(String JavaDoc[] args) {
90     String JavaDoc[] tests = { TestDifficultSituations.class.getName() };
91
92     TestUtilities.run(tests, args);
93   }
94   
95
96   //---------------------------------------------------------------------------
97
// suite method
98
//
99

100   /**
101    * Implementation of the JUnit method <code>suite</code>. For each set of test
102    * properties one or more tests are instantiated.
103    *
104    * @return a test suite
105    */

106   public static Test suite() {
107     TestSuite suite = new TestSuite(TestDifficultSituations.class.getName());
108     Class JavaDoc[] sourceClasses = { ReaderSource.class, StringSource.class };
109     
110     for (int sourceIndex = 0; sourceIndex < sourceClasses.length; ++sourceIndex) {
111       suite.addTest(new TestDifficultSituations("testSequencesAndSeparators", sourceClasses[sourceIndex]));
112       suite.addTest(new TestDifficultSituations("testSmallSource", sourceClasses[sourceIndex]));
113       suite.addTest(new TestDifficultSituations("testEmptySource", sourceClasses[sourceIndex]));
114       suite.addTest(new TestDifficultSituations("testSimilarSpecialSequences", sourceClasses[sourceIndex]));
115       suite.addTest(new TestDifficultSituations("testNonASCIICharacters", sourceClasses[sourceIndex]));
116       suite.addTest(new TestDifficultSituations("testEOFInLineComment", sourceClasses[sourceIndex]));
117       suite.addTest(new TestDifficultSituations("testEOFInBlockComment", sourceClasses[sourceIndex]));
118       suite.addTest(new TestDifficultSituations("testEOFInString", sourceClasses[sourceIndex]));
119       suite.addTest(new TestDifficultSituations("testStringEscapes1", sourceClasses[sourceIndex]));
120       suite.addTest(new TestDifficultSituations("testStringEscapes2", sourceClasses[sourceIndex]));
121       suite.addTest(new TestDifficultSituations("testNestedComments", sourceClasses[sourceIndex]));
122       suite.addTest(new TestDifficultSituations("testReaderSwitching", sourceClasses[sourceIndex]));
123       suite.addTest(new TestDifficultSituations("testDOSEOL", sourceClasses[sourceIndex]));
124       suite.addTest(new TestDifficultSituations("testMACEOL", sourceClasses[sourceIndex]));
125       suite.addTest(new TestDifficultSituations("testSpecialCalls", sourceClasses[sourceIndex]));
126       suite.addTest(new TestDifficultSituations("testLineCounting", sourceClasses[sourceIndex]));
127       suite.addTest(new TestDifficultSituations("testUncommonWhitespaces", sourceClasses[sourceIndex]));
128       suite.addTest(new TestDifficultSituations("testWhitespaceHandling", sourceClasses[sourceIndex]));
129     }
130     return suite;
131   }
132   
133   
134   //---------------------------------------------------------------------------
135
// Constructor
136
//
137

138   /**
139    * Default constructor. Standard input {@link java.lang.System#in} is used
140    * to construct the input stream reader.
141    */

142   public TestDifficultSituations(String JavaDoc test, Class JavaDoc sourceClass) {
143     super(test);
144     _sourceClass = sourceClass;
145   }
146
147   
148   //---------------------------------------------------------------------------
149
// Fixture setup and release
150
//
151

152   /**
153    * Sets up the fixture, for example, open a network connection.
154    * This method is called before a test is executed.
155    */

156   protected void setUp() throws Exception JavaDoc {}
157
158   
159   /**
160    * Tears down the fixture, for example, close a network connection.
161    * This method is called after a test is executed.
162    */

163   protected void tearDown() throws Exception JavaDoc {}
164   
165   
166   //---------------------------------------------------------------------------
167
// test cases
168
//
169

170   // various constants
171
private static final String JavaDoc PLUS = "+";
172   private static final String JavaDoc DOUBLE_PLUS = "++";
173   private static final String JavaDoc TRIPLE_PLUS = "+++";
174   private static final String JavaDoc PLUS_EQUAL = "+=";
175   private static final String JavaDoc PLUS_MINUS = "+-";
176   private static final String JavaDoc HTML_OPEN = "<";
177   private static final String JavaDoc HTML_COMMENT1 = "<!";
178   private static final String JavaDoc HTML_COMMENT2 = "<!--";
179   private static final String JavaDoc HTML_HEAD = "<head>";
180   private static final String JavaDoc HTML_HEADER = "<h>";
181   private static final String JavaDoc HTML_HT = "<ht>";
182   private static final String JavaDoc HTML_CLOSE = ">";
183   private static final String JavaDoc MINUS = "-";
184   private static final String JavaDoc DOUBLE_MINUS = "--";
185   private static final String JavaDoc HTML_COMMENT_END = "-->";
186   private static final String JavaDoc HTML_HEAD_END = "</head>";
187   private static final String JavaDoc HTML_HEADER_END = "</h>";
188   private static final String JavaDoc SHIFT_LEFT = "<<";
189   private static final String JavaDoc SHIFT_RIGHT = ">>";
190   private static final String JavaDoc COLON = ".";
191   private static final String JavaDoc EURO = "€";
192   private static final String JavaDoc DOUBLE_EURO = "€€";
193   private static final String JavaDoc EUROURO = "€uro";
194   private static final String JavaDoc AE = "æ";
195   private static final String JavaDoc OERE = "ø";
196   private static final String JavaDoc BUG = "ð";
197   private static final String JavaDoc DOUBLE_BUG = "ðð";
198   
199   /**
200    * Test similar special sequences.
201    */

202   public void testSimilarSpecialSequences() throws Throwable JavaDoc {
203     TokenizerSource source = getSource( "lots+of++special+=sequences+in+++a+-row\n"
204                                       + "with <HEAD>HTML-tags-in-between</head>\n"
205                                       + "like <h>headings</h><open and close> tags\n"
206                                       + "and <!even--comments-->+<!--in<ht>many+=forms-->>\n"
207                                       + "some<<as>>operators.\n"
208                                       + "+++++<<<>>>.\n"
209                                       );
210     String JavaDoc[] expectedToken = {
211       PLUS, DOUBLE_PLUS, PLUS_EQUAL, PLUS, TRIPLE_PLUS, PLUS_MINUS, // "lots+of++special+=sequences+in+++a+-row\n"
212
HTML_HEAD, MINUS, MINUS, MINUS, HTML_HEAD_END, // "with <HEAD>HTML-tags-in-between</head>\n"
213
HTML_HEADER, HTML_HEADER_END, HTML_OPEN, HTML_CLOSE, // "like <h>headings</h><open and close> tags\n"
214
HTML_COMMENT1, DOUBLE_MINUS, HTML_COMMENT_END, PLUS,
215       HTML_COMMENT2, HTML_HT, PLUS_EQUAL, HTML_COMMENT_END, HTML_CLOSE, // "and <!even--comments-->+<!--in<ht>many+=forms-->>\n"
216
SHIFT_LEFT, SHIFT_RIGHT, COLON, // "some<<as>>operators."
217
TRIPLE_PLUS, DOUBLE_PLUS, SHIFT_LEFT, HTML_OPEN, SHIFT_RIGHT,
218       HTML_CLOSE, COLON // "+++++<<<>>>.\n"
219
};
220     
221     TokenizerProperties props = new StandardTokenizerProperties();
222     Tokenizer tokenizer = getTokenizer(props);
223
224     try {
225       props.addSpecialSequence(COLON, COLON);
226       props.addSpecialSequence(PLUS, PLUS);
227       props.addSpecialSequence(DOUBLE_PLUS, DOUBLE_PLUS);
228       props.addSpecialSequence(TRIPLE_PLUS, TRIPLE_PLUS);
229       props.addSpecialSequence(PLUS_EQUAL, PLUS_EQUAL);
230       props.addSpecialSequence(PLUS_MINUS, PLUS_MINUS);
231       props.addSpecialSequence(SHIFT_LEFT, SHIFT_LEFT);
232       props.addSpecialSequence(HTML_OPEN, HTML_OPEN, Flags.F_NO_CASE);
233       props.addSpecialSequence(HTML_COMMENT1, HTML_COMMENT1, Flags.F_NO_CASE);
234       props.addSpecialSequence(HTML_COMMENT2, HTML_COMMENT2, Flags.F_NO_CASE);
235       props.addSpecialSequence(HTML_HEAD, HTML_HEAD, Flags.F_NO_CASE);
236       props.addSpecialSequence(HTML_HEADER, HTML_HEADER, Flags.F_NO_CASE);
237       props.addSpecialSequence(HTML_HT, HTML_HT, Flags.F_NO_CASE);
238       props.addSpecialSequence(HTML_CLOSE, HTML_CLOSE, Flags.F_NO_CASE);
239       props.addSpecialSequence(SHIFT_RIGHT, SHIFT_RIGHT);
240       props.addSpecialSequence(MINUS, MINUS);
241       props.addSpecialSequence(DOUBLE_MINUS, DOUBLE_MINUS);
242       props.addSpecialSequence(HTML_COMMENT_END, HTML_COMMENT_END, Flags.F_NO_CASE);
243       props.addSpecialSequence(HTML_HEAD_END, HTML_HEAD_END, Flags.F_NO_CASE);
244       props.addSpecialSequence(HTML_HEADER_END, HTML_HEADER_END, Flags.F_NO_CASE);
245       tokenizer.setSource(source);
246
247       // start tokenizing
248
int index = 0;
249
250       while (tokenizer.hasMoreToken()) {
251         Token token = tokenizer.nextToken();
252         boolean isOK;
253
254         switch (token.getType()) {
255         case Token.NORMAL:
256           System.out.println(token.getImage());
257           break;
258         case Token.SPECIAL_SEQUENCE:
259           if (props.isFlagSet(props.getSpecialSequence(token.getImage()), Flags.F_NO_CASE)) {
260             isOK = expectedToken[index].equalsIgnoreCase(token.getImage());
261           } else {
262             isOK = expectedToken[index].equals(token.getImage());
263           }
264           assertTrue("Index " + index + ": expected \"" + expectedToken[index] + "\", got \"" + token.getImage() + "\".", isOK);
265           index++;
266           break;
267         }
268       }
269     } finally {
270       tokenizer.close();
271     }
272   }
273
274   
275   /**
276    * Test similar special sequences.
277    */

278   public void testNonASCIICharacters() throws Throwable JavaDoc {
279     TokenizerSource source = getSource("1€ is an æ to much. Orøtakeðthis: €€ or €uro and ðð.");
280     
281     String JavaDoc[] expectedToken = {
282       EURO, AE, OERE, BUG, DOUBLE_EURO, EUROURO, DOUBLE_BUG
283     };
284     
285     TokenizerProperties props = new StandardTokenizerProperties();
286     Tokenizer tokenizer = getTokenizer(props);
287
288     try {
289       props.addSpecialSequence(EURO, EURO);
290       props.addSpecialSequence(DOUBLE_EURO, DOUBLE_EURO);
291       props.addSpecialSequence(EUROURO, EUROURO);
292       props.addSpecialSequence(AE, AE);
293       props.addSpecialSequence(OERE, OERE);
294       props.addSpecialSequence(BUG, BUG);
295       props.addSpecialSequence(DOUBLE_BUG, DOUBLE_BUG);
296       tokenizer.setSource(source);
297
298       // start tokenizing
299
int index = 0;
300
301       while (tokenizer.hasMoreToken()) {
302         Token token = tokenizer.nextToken();
303         boolean isOK;
304
305         switch (token.getType()) {
306         case Token.NORMAL:
307           System.out.println(token.getImage());
308           break;
309         case Token.SPECIAL_SEQUENCE:
310           assertTrue( "Index " + index + ": expected \"" + expectedToken[index] + "\", got \"" + token.getImage() + "\".",
311                       expectedToken[index].equals(token.getImage()));
312           index++;
313           break;
314         }
315       }
316     } finally {
317       tokenizer.close();
318     }
319   }
320
321   
322   /**
323    * Test the case of an completely empty data source. This is always a good
324    * candidate for failures :-)
325    */

326   public void testEmptySource() throws Throwable JavaDoc {
327     TokenizerSource source = getSource("");
328     TokenizerProperties props = new StandardTokenizerProperties();
329     Tokenizer tokenizer = getTokenizer(props);
330     Token token;
331
332     try {
333       props.setParseFlags(Flags.F_RETURN_WHITESPACES);
334       props.addLineComment("//");
335       tokenizer.setSource(source);
336
337       assertTrue(tokenizer.hasMoreToken());
338       token = tokenizer.nextToken();
339       assertTrue(token.getType() == Token.EOF);
340       assertTrue( ! tokenizer.hasMoreToken());
341     } finally {
342       tokenizer.close();
343     }
344   }
345
346   
347   /**
348    * Test small sources.
349    */

350   public void testSmallSource() throws Throwable JavaDoc {
351     TokenizerProperties props = new StandardTokenizerProperties();
352     Tokenizer tokenizer = getTokenizer(props);
353     Token token;
354
355     try {
356       props.setParseFlags(Flags.F_RETURN_WHITESPACES);
357       props.addLineComment("//");
358       props.addSpecialSequence(PLUS, PLUS);
359       props.addSpecialSequence(DOUBLE_PLUS, DOUBLE_PLUS);
360       props.addSpecialSequence(MINUS, MINUS);
361       props.addSpecialSequence(DOUBLE_MINUS, DOUBLE_MINUS);
362
363       // a single character
364
char[] contents = new char[8192];
365       int bytes;
366       
367       tokenizer.setSource(getSource("A"));
368
369       assertTrue(tokenizer.hasMoreToken());
370       token = tokenizer.nextToken();
371       assertTrue(token.getType() == Token.NORMAL);
372       assertTrue(token.getImage().equals("A"));
373       assertTrue(tokenizer.hasMoreToken());
374       token = tokenizer.nextToken();
375       assertTrue(token.getType() == Token.EOF);
376       assertTrue( ! tokenizer.hasMoreToken());
377
378       // a single special sequence
379
tokenizer.setSource(getSource("++"));
380
381       assertTrue(tokenizer.hasMoreToken());
382       token = tokenizer.nextToken();
383       assertTrue(token.getType() == Token.SPECIAL_SEQUENCE);
384       assertTrue(token.getCompanion() == DOUBLE_PLUS);
385       assertTrue(tokenizer.hasMoreToken());
386       token = tokenizer.nextToken();
387       assertTrue(token.getType() == Token.EOF);
388       assertTrue( ! tokenizer.hasMoreToken());
389
390       // an empty line comment
391
tokenizer.setSource(getSource("//"));
392
393       assertTrue(tokenizer.hasMoreToken());
394       token = tokenizer.nextToken();
395       assertTrue(token.getType() == Token.LINE_COMMENT);
396       assertTrue(token.getImage().equals("//"));
397       assertTrue(tokenizer.hasMoreToken());
398       token = tokenizer.nextToken();
399       assertTrue(token.getType() == Token.EOF);
400       assertTrue( ! tokenizer.hasMoreToken());
401
402     } finally {
403       // Cleanup
404
tokenizer.close();
405     }
406   }
407
408   
409   /**
410    * Test the case, when a line comment is not terminated by a newline character.
411    * This happens when the last line of a file is a line comment without a
412    * newline on its end.
413    * This is a rather common situation.
414    */

415   public void testEOFInLineComment() throws Throwable JavaDoc {
416     TokenizerSource source = getSource("// end of file occurs in line comment.");
417     TokenizerProperties props = new StandardTokenizerProperties();
418     Tokenizer tokenizer = getTokenizer(props);
419     Token token;
420
421     try {
422       props.setParseFlags(Flags.F_RETURN_WHITESPACES);
423       props.addLineComment("//");
424       tokenizer.setSource(source);
425
426       assertTrue(tokenizer.hasMoreToken());
427       token = tokenizer.nextToken();
428       assertTrue(token.getType() == Token.LINE_COMMENT);
429       assertTrue(tokenizer.hasMoreToken());
430       token = tokenizer.nextToken();
431       assertTrue(token.getType() == Token.EOF);
432     } finally {
433       // Cleanup
434
tokenizer.close();
435     }
436   }
437
438   /**
439    * Test the case, when a block comment is not terminated. That means EOF
440    * occurs unexpectedly in a block comment.
441    */

442   public void testEOFInBlockComment() throws Throwable JavaDoc {
443     TokenizerSource source = getSource("/* end of file occurs\nin a block comment.");
444     TokenizerProperties props = new StandardTokenizerProperties();
445     Tokenizer tokenizer = getTokenizer(props);
446     Token token;
447
448     try {
449       props.setParseFlags(Flags.F_RETURN_WHITESPACES);
450       props.addBlockComment("/*", "*/");
451       tokenizer.setSource(source);
452
453       assertTrue(tokenizer.hasMoreToken());
454       token = tokenizer.nextToken();
455       assertTrue(token.getType() == Token.BLOCK_COMMENT);
456       assertTrue(tokenizer.hasMoreToken());
457       token = tokenizer.nextToken();
458       assertTrue(token.getType() == Token.EOF);
459     } finally {
460       // Cleanup
461
tokenizer.close();
462     }
463   }
464
465   /**
466    * Test the case, when a block comment is not terminated. That means EOF
467    * occurs unexpectedly in a block comment.
468    */

469   public void testEOFInString() throws Throwable JavaDoc {
470     TokenizerSource source = getSource("-- end of file in String\n\"Thats the string, but rather unterminated |-(");
471     TokenizerProperties props = new StandardTokenizerProperties();
472     Tokenizer tokenizer = getTokenizer(props);
473     Token token;
474
475     try {
476       props.addLineComment("--");
477       props.addString("\"", "\"", "\"");
478       tokenizer.setSource(source);
479
480       assertTrue(tokenizer.hasMoreToken());
481       token = tokenizer.nextToken();
482       assertTrue(token.getType() == Token.STRING);
483       assertTrue(tokenizer.hasMoreToken());
484       token = tokenizer.nextToken();
485       assertTrue(token.getType() == Token.EOF);
486     } finally {
487       // Cleanup
488
tokenizer.close();
489     }
490   }
491   
492   /**
493    * Test various calls to methods with a special contract.
494    */

495   public void testSpecialCalls() throws Throwable JavaDoc {
496     TokenizerSource source = getSource("A simple text");
497     TokenizerProperties props = new StandardTokenizerProperties();
498     Tokenizer tokenizer = getTokenizer(props);
499     Token token = null;
500
501     try {
502       tokenizer.setSource(source);
503
504       try {
505         tokenizer.currentToken();
506         assertTrue("Tokenizer should have thrown an exception here.", false);
507       } catch (TokenizerException ex) {};
508       try {
509         tokenizer.currentImage();
510         assertTrue("Tokenizer should have thrown an exception here.", false);
511       } catch (TokenizerException ex) {};
512
513       while (tokenizer.hasMoreToken()) {
514         Token newToken = tokenizer.nextToken();
515         assertTrue( ! tokenizer.currentToken().equals(token));
516         assertTrue(tokenizer.currentToken() != null);
517         assertTrue(tokenizer.currentToken().equals(newToken));
518         assertTrue(tokenizer.currentToken().equals(tokenizer.currentToken()));
519         if (newToken.getType() != Token.EOF) {
520           assertTrue(tokenizer.currentImage() != null);
521           assertTrue(tokenizer.currentImage().equals(tokenizer.currentImage()));
522         } else {
523           assertTrue( ! tokenizer.hasMoreToken());
524         }
525         token = newToken;
526       }
527     } finally {
528       // Cleanup
529
tokenizer.close();
530     }
531   }
532   
533   /**
534    * Test various situations of string escapes, if the escape character is the
535    * backslash (not equal to the string character).
536    * This test takes a number of lines each with a string including escapes in
537    * it. It passes if the right number of strings is returned and also the line
538    * counting is ok.
539    */

540   public void testStringEscapes1() throws Throwable JavaDoc {
541     TokenizerSource source = getSource(
542       "\"String escape \\\" in the middle\"\n"
543     + "\"String escape on end \\\"\"\n"
544     + "\"\\\" String escape on begin\"\n"
545     + "\"Two string escapes \\\"\\\" after each other\"\n"
546     + "\"Two string escapes on end \\\"\\\"\"\n");
547     
548     int lines = 5;
549     TokenizerProperties props = new StandardTokenizerProperties();
550     Tokenizer tokenizer = getTokenizer(props);
551     Token token;
552
553     try {
554       props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES);
555       props.addString("\"", "\"", "\\");
556       tokenizer.setSource(source);
557
558       for (int line = 0; line < lines; ++line) {
559         assertTrue("(1) No more token at line " + line, tokenizer.hasMoreToken());
560         token = tokenizer.nextToken();
561         assertTrue("String not recognized at line " + line, token.getType() == Token.STRING);
562         assertTrue("(2) No more token at line " + line, tokenizer.hasMoreToken());
563         token = tokenizer.nextToken();
564         assertTrue("Newline not recognized as whitespace at line " + line, token.getType() == Token.WHITESPACE);
565       }
566       assertTrue(tokenizer.hasMoreToken());
567       token = tokenizer.nextToken();
568       assertTrue(token.getType() == Token.EOF);
569     } finally {
570       // Cleanup
571
tokenizer.close();
572     }
573   }
574
575   /**
576    * Test various situations of string escapes, if the escape character is equal
577    * to the string character).
578    * This test takes a number of lines each with a string including escapes in
579    * it. It passes if the right number of strings is returned and also the line
580    * counting is ok.
581    */

582   public void testStringEscapes2() throws Throwable JavaDoc {
583     TokenizerSource source = getSource(
584       "'String escape '' in the middle'\n"
585     + "'String escape on end '''\n"
586     + "''' String escape on begin'\n"
587     + "'Two string escapes '''' after each other'\n"
588     + "'Two string escapes on end '''''\n");
589     
590     int lines = 5;
591     TokenizerProperties props = new StandardTokenizerProperties();
592     Tokenizer tokenizer = getTokenizer(props);
593     Token token;
594
595     try {
596       props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES);
597       props.addString("'", "'", "'");
598       tokenizer.setSource(source);
599
600       for (int line = 0; line < lines; ++line) {
601         assertTrue("(1) No more token at line " + line, tokenizer.hasMoreToken());
602         token = tokenizer.nextToken();
603         assertTrue("String not recognized at line " + line, token.getType() == Token.STRING);
604         assertTrue("(2) No more token at line " + line, tokenizer.hasMoreToken());
605         token = tokenizer.nextToken();
606         assertTrue("Newline not recognized as whitespace at line " + line, token.getType() == Token.WHITESPACE);
607       }
608       assertTrue(tokenizer.hasMoreToken());
609       token = tokenizer.nextToken();
610       assertTrue(token.getType() == Token.EOF);
611     } finally {
612       // Cleanup
613
tokenizer.close();
614     }
615   }
616
617   /**
618    * Test nested comments.
619    */

620   public void testNestedComments() throws Throwable JavaDoc {
621     TokenizerSource source = getSource(
622       "// line comment including // line comment sequence\n"
623     + "/* block comment with\n"
624     + " /* a nested block\n"
625     + " comment\n"
626     + " */\n"
627     + " normal token or not ?\n"
628     + "*/\n"
629     + "// line comment with /* block comment */\n"
630     + "'a string with // line comment'\n"
631     + "'a string with /* block comment */'\n");
632     
633     int lines = 10;
634     TokenizerProperties props = new StandardTokenizerProperties();
635     Tokenizer tokenizer = getTokenizer(props);
636     Token token;
637
638     try {
639       props.setParseFlags(Flags.F_RETURN_WHITESPACES
640                         | Flags.F_COUNT_LINES
641                         | Flags.F_ALLOW_NESTED_COMMENTS);
642       props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
643       props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
644       props.addString("'", "'", "'");
645       tokenizer.setSource(source);
646
647       // first line comment
648
assertTrue(tokenizer.hasMoreToken());
649       token = tokenizer.nextToken();
650       assertTrue("(1) line comment not recognized", token.getType() == Token.LINE_COMMENT);
651       assertTrue("(2) wrong start position " + token.getStartPosition(), token.getStartPosition() == 0);
652       assertTrue("(3) wrong start line " + token.getStartLine(), token.getStartLine() == 0);
653       assertTrue("(4) wrong start column" + token.getStartColumn(), token.getStartColumn() == 0);
654       assertTrue("(5) wrong end line " + token.getEndLine(), token.getEndLine() == token.getStartLine() + 1);
655       assertTrue("(6) wrong end column" + token.getEndColumn(), token.getEndColumn() == 0);
656
657       // block comment
658
assertTrue(tokenizer.hasMoreToken());
659       token = tokenizer.nextToken();
660       assertTrue("(10) block comment not recognized", token.getType() == Token.BLOCK_COMMENT);
661       assertTrue("(11) wrong start line " + token.getStartLine(), token.getStartLine() == 1);
662       assertTrue("(12) wrong start column" + token.getStartColumn(), token.getStartColumn() == 0);
663       assertTrue("(13) wrong end line " + token.getEndLine(), token.getEndLine() == token.getStartLine() + 5);
664       assertTrue("(14) wrong end column" + token.getEndColumn(), token.getEndColumn() == 2);
665       assertTrue(tokenizer.hasMoreToken());
666       token = tokenizer.nextToken();
667       assertTrue("(15) newline behind block comment not recognized as whitespace", token.getType() == Token.WHITESPACE);
668       assertTrue("(16) newline behind block comment not recognized as literal", tokenizer.currentImage().equals("\n"));
669
670       // second line comment
671
assertTrue(tokenizer.hasMoreToken());
672       token = tokenizer.nextToken();
673       assertTrue("(21) line comment not recognized", token.getType() == Token.LINE_COMMENT);
674       assertTrue("(22) wrong start line " + token.getStartLine(), token.getStartLine() == 7);
675       assertTrue("(23) wrong end line " + token.getEndLine(), token.getEndLine() == token.getStartLine() + 1);
676
677       // string 1
678
assertTrue(tokenizer.hasMoreToken());
679       token = tokenizer.nextToken();
680       assertTrue("(31) string not recognized", token.getType() == Token.STRING);
681       assertTrue("(32) wrong start line " + token.getStartLine(), token.getStartLine() == 8);
682       assertTrue("(33) wrong start column" + token.getStartColumn(), token.getStartColumn() == 0);
683       assertTrue("(34) wrong end line " + token.getEndLine(), token.getEndLine() == token.getStartLine());
684       assertTrue(tokenizer.hasMoreToken());
685       token = tokenizer.nextToken();
686       assertTrue("(35) newline behind string not recognized as whitespace", token.getType() == Token.WHITESPACE);
687       assertTrue("(36) newline behind string not recognized as literal", tokenizer.currentImage().equals("\n"));
688
689       // string 2
690
assertTrue(tokenizer.hasMoreToken());
691       token = tokenizer.nextToken();
692       assertTrue("(41) string not recognized", token.getType() == Token.STRING);
693       assertTrue("(42) wrong start line " + token.getStartLine(), token.getStartLine() == 9);
694       assertTrue("(43) wrong start column" + token.getStartColumn(), token.getStartColumn() == 0);
695       assertTrue("(44) wrong end line " + token.getEndLine(), token.getEndLine() == token.getStartLine());
696       assertTrue(tokenizer.hasMoreToken());
697       token = tokenizer.nextToken();
698       assertTrue("(45) newline behind string not recognized as whitespace", token.getType() == Token.WHITESPACE);
699       assertTrue("(46) newline behind string not recognized as literal", tokenizer.currentImage().equals("\n"));
700
701       // EOF should be reached here
702
token = tokenizer.nextToken();
703       assertTrue(token.getType() == Token.EOF);
704
705     } finally {
706       // Cleanup
707
tokenizer.close();
708     }
709   }
710   
711   
712   /**
713    * Test reader switching
714    */

715   public void testReaderSwitching() throws Throwable JavaDoc {
716     TokenizerSource source1 = getSource("0/2 4/6 8/10");
717     TokenizerSource source2 = getSource("0/2 4/6 8/10");
718     TokenizerSource source3 = getSource("0/2 4/6 8/10");
719     TokenizerSource[] sources = { source1, source2, source3 };
720     
721     TokenizerProperties props = new StandardTokenizerProperties();
722     Tokenizer tokenizer = getTokenizer(props);
723     Token token;
724
725     try {
726       for (int sourceIndex = 0; sourceIndex < sources.length; ++sourceIndex) {
727         tokenizer.setSource(sources[sourceIndex]);
728         for (int ii = 0; ii <= 8; ii += 4) {
729           assertTrue(tokenizer.hasMoreToken());
730           token = tokenizer.nextToken();
731           assertTrue("Wrong start position " + token.getStartPosition(), token.getStartPosition() == ii);
732           assertTrue("Wrong type " + token.getType(), token.getType() == Token.NORMAL);
733           assertTrue("Token not recognized as literal", tokenizer.currentImage().equals(Integer.toString(ii)));
734           assertTrue(tokenizer.hasMoreToken());
735           token = tokenizer.nextToken();
736           assertTrue("Wrong start position " + token.getStartPosition(), token.getStartPosition() == ii + 1);
737           assertTrue("Wrong type " + token.getType(), token.getType() == Token.SEPARATOR);
738           assertTrue("Separator not recognized as literal", tokenizer.currentImage().equals("/"));
739           assertTrue(tokenizer.hasMoreToken());
740           token = tokenizer.nextToken();
741           assertTrue("Wrong start position " + token.getStartPosition(), token.getStartPosition() == ii + 2);
742           assertTrue("Wrong type " + token.getType(), token.getType() == Token.NORMAL);
743           assertTrue("Token not recognized as literal", tokenizer.currentImage().equals(Integer.toString(ii + 2)));
744         }
745       }
746     } finally {
747       // Cleanup
748
tokenizer.close();
749     }
750   }
751
752
753   /**
754    * Line counting and line comments in DOS files
755    */

756   public void testDOSEOL() throws Throwable JavaDoc {
757     TokenizerSource source = getSource(
758       "// line comment with DOS line ending\r\n"
759     + "void main(int argc)\r\n"
760     + "{\r\n"
761     + " // another line comment\r\n"
762     + " /* a block comment\r\n"
763     + " with more than one line\r\n"
764     + " */\r\n"
765     + "}\r\n");
766     
767     int lines = 8;
768     TokenizerProperties props = new StandardTokenizerProperties();
769     Tokenizer tokenizer = getTokenizer(props);
770     Token token;
771
772     try {
773       props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES);
774       props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
775       props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
776       props.addString("\"", "\"", "\\");
777       tokenizer.setSource(source);
778
779       // zero line comment
780
assertTrue(tokenizer.hasMoreToken());
781       token = tokenizer.nextToken();
782       assertTrue("(1) line comment not recognized", token.getType() == Token.LINE_COMMENT);
783       assertTrue("(2) start line wrong", token.getStartLine() == 0);
784       assertTrue("(3) start column wrong", token.getStartColumn() == 0);
785       assertTrue("(4) end line wrong", token.getEndLine() == 1);
786       assertTrue("(5) end column wrong", token.getEndColumn() == 0);
787
788       // first line: void
789
assertTrue(tokenizer.hasMoreToken());
790       token = tokenizer.nextToken();
791       assertTrue("(10) token \"void\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("void"));
792       assertTrue("(11) start line wrong", token.getStartLine() == 1);
793       assertTrue("(12) start column wrong", token.getStartColumn() == 0);
794       assertTrue("(13) end line wrong", token.getEndLine() == 1);
795       assertTrue("(14) end column wrong", token.getEndColumn() == 4);
796
797       assertTrue(tokenizer.hasMoreToken());
798       token = tokenizer.nextToken();
799       assertTrue("(15) whitespace not recognized", token.getType() == Token.WHITESPACE);
800
801       // first line: main
802
assertTrue(tokenizer.hasMoreToken());
803       token = tokenizer.nextToken();
804       assertTrue("(20) token \"main\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("main"));
805       assertTrue("(21) start line wrong", token.getStartLine() == 1);
806       assertTrue("(22) start column wrong", token.getStartColumn() == 5);
807       assertTrue("(23) end line wrong", token.getEndLine() == 1);
808       assertTrue("(24) end column wrong", token.getEndColumn() == 9);
809
810       // first line: (
811
assertTrue(tokenizer.hasMoreToken());
812       token = tokenizer.nextToken();
813       assertTrue("(30) token \"(\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("("));
814       assertTrue("(31) start line wrong", token.getStartLine() == 1);
815       assertTrue("(32) start column wrong", token.getStartColumn() == 9);
816       assertTrue("(33) end line wrong", token.getEndLine() == 1);
817       assertTrue("(34) end column wrong", token.getEndColumn() == 10);
818
819       // first line: int
820
assertTrue(tokenizer.hasMoreToken());
821       token = tokenizer.nextToken();
822       assertTrue("(40) token \"int\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("int"));
823       assertTrue("(41) start line wrong", token.getStartLine() == 1);
824       assertTrue("(42) start column wrong", token.getStartColumn() == 10);
825       assertTrue("(43) end line wrong", token.getEndLine() == 1);
826       assertTrue("(44) end column wrong", token.getEndColumn() == 13);
827
828       assertTrue(tokenizer.hasMoreToken());
829       token = tokenizer.nextToken();
830       assertTrue("(45) whitespace not recognized", token.getType() == Token.WHITESPACE);
831
832       // first line: argc
833
assertTrue(tokenizer.hasMoreToken());
834       token = tokenizer.nextToken();
835       assertTrue("(50) token \"argc\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("argc"));
836       assertTrue("(51) start line wrong", token.getStartLine() == 1);
837       assertTrue("(52) start column wrong", token.getStartColumn() == 14);
838       assertTrue("(53) end line wrong", token.getEndLine() == 1);
839       assertTrue("(54) end column wrong", token.getEndColumn() == 18);
840
841       // first line: )
842
assertTrue(tokenizer.hasMoreToken());
843       token = tokenizer.nextToken();
844       assertTrue("(60) token \")\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals(")"));
845       assertTrue("(61) start line wrong", token.getStartLine() == 1);
846       assertTrue("(62) start column wrong", token.getStartColumn() == 18);
847       assertTrue("(63) end line wrong", token.getEndLine() == 1);
848       assertTrue("(64) end column wrong", token.getEndColumn() == 19);
849
850       // first line: EOL
851
assertTrue(tokenizer.hasMoreToken());
852       token = tokenizer.nextToken();
853       assertTrue("(60) token \"\\r\\n\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r\n"));
854       assertTrue("(61) start line wrong", token.getStartLine() == 1);
855       assertTrue("(62) start column wrong", token.getStartColumn() == 19);
856       assertTrue("(63) end line wrong", token.getEndLine() == 2);
857       assertTrue("(64) end column wrong", token.getEndColumn() == 0);
858       assertTrue("(65) wrong length", token.getLength() == 2);
859
860       // second line: {
861
assertTrue(tokenizer.hasMoreToken());
862       token = tokenizer.nextToken();
863       assertTrue("(70) token \"{\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("{"));
864       assertTrue("(71) start line wrong", token.getStartLine() == 2);
865       assertTrue("(72) start column wrong", token.getStartColumn() == 0);
866       assertTrue("(73) end line wrong", token.getEndLine() == 2);
867       assertTrue("(74) end column wrong", token.getEndColumn() == 1);
868
869       // second/third line: EOL + whitespaces
870
assertTrue(tokenizer.hasMoreToken());
871       token = tokenizer.nextToken();
872       assertTrue("(80) token \"\\r\\n \" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r\n "));
873       assertTrue("(81) start line wrong", token.getStartLine() == 2);
874       assertTrue("(82) start column wrong", token.getStartColumn() == 1);
875       assertTrue("(83) end line wrong", token.getEndLine() == 3);
876       assertTrue("(84) end column wrong", token.getEndColumn() == 2);
877       assertTrue("(85) wrong length", token.getLength() == 4);
878
879       // third line: line comment
880
assertTrue(tokenizer.hasMoreToken());
881       token = tokenizer.nextToken();
882       assertTrue("(91) line comment not recognized", token.getType() == Token.LINE_COMMENT);
883       assertTrue("(92) start line wrong", token.getStartLine() == 3);
884       assertTrue("(93) start column wrong", token.getStartColumn() == 2);
885       assertTrue("(94) end line wrong", token.getEndLine() == 4);
886       assertTrue("(95) end column wrong", token.getEndColumn() == 0);
887
888       assertTrue(tokenizer.hasMoreToken());
889       token = tokenizer.nextToken();
890       assertTrue("(96) whitespace not recognized", token.getType() == Token.WHITESPACE);
891
892       // forth line: block comment
893
assertTrue(tokenizer.hasMoreToken());
894       token = tokenizer.nextToken();
895       assertTrue("(101) block comment not recognized", token.getType() == Token.BLOCK_COMMENT);
896       assertTrue("(102) start line wrong", token.getStartLine() == 4);
897       assertTrue("(103) start column wrong", token.getStartColumn() == 2);
898       assertTrue("(104) end line wrong", token.getEndLine() == 6);
899       assertTrue("(105) end column wrong", token.getEndColumn() == 4);
900
901       // 6th line: EOL
902
assertTrue(tokenizer.hasMoreToken());
903       token = tokenizer.nextToken();
904       assertTrue("(110) token \"\\r\\n\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r\n"));
905       assertTrue("(111) start line wrong", token.getStartLine() == 6);
906       assertTrue("(112) start column wrong", token.getStartColumn() == 4);
907       assertTrue("(113) end line wrong", token.getEndLine() == 7);
908       assertTrue("(114) end column wrong", token.getEndColumn() == 0);
909       assertTrue("(115) wrong length", token.getLength() == 2);
910
911       // 7th line: }
912
assertTrue(tokenizer.hasMoreToken());
913       token = tokenizer.nextToken();
914       assertTrue("(120) token \"}\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("}"));
915       assertTrue("(121) start line wrong", token.getStartLine() == 7);
916       assertTrue("(122) start column wrong", token.getStartColumn() == 0);
917       assertTrue("(123) end line wrong", token.getEndLine() == 7);
918       assertTrue("(124) end column wrong", token.getEndColumn() == 1);
919
920       // 7th line: EOL
921
assertTrue(tokenizer.hasMoreToken());
922       token = tokenizer.nextToken();
923       assertTrue("(130) token \"\\r\\n\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r\n"));
924       assertTrue("(131) start line wrong", token.getStartLine() == 7);
925       assertTrue("(132) start column wrong", token.getStartColumn() == 1);
926       assertTrue("(133) end line wrong", token.getEndLine() == 8);
927       assertTrue("(134) end column wrong", token.getEndColumn() == 0);
928       assertTrue("(135) wrong length", token.getLength() == 2);
929       
930     } finally {
931       // Cleanup
932
tokenizer.close();
933     }
934   }
935
936   /**
937    * Line counting and line comments in MAC files
938    */

939   public void testMACEOL() throws Throwable JavaDoc {
940     TokenizerSource source = getSource(
941       "// line comment with DOS line ending\r"
942     + "void main(int argc)\r"
943     + "{\r"
944     + " // another line comment\r"
945     + " /* a block comment\r"
946     + " with more than one line\r"
947     + " */\r"
948     + "}\r");
949     
950     int lines = 8;
951     TokenizerProperties props = new StandardTokenizerProperties();
952     Tokenizer tokenizer = getTokenizer(props);
953     Token token;
954
955     try {
956       props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES);
957       props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
958       props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
959       props.addString("\"", "\"", "\\");
960       tokenizer.setSource(source);
961
962       // zero line comment
963
assertTrue(tokenizer.hasMoreToken());
964       token = tokenizer.nextToken();
965       assertTrue("(1) line comment not recognized", token.getType() == Token.LINE_COMMENT);
966       assertTrue("(2) start line wrong", token.getStartLine() == 0);
967       assertTrue("(3) start column wrong", token.getStartColumn() == 0);
968       assertTrue("(4) end line wrong", token.getEndLine() == 1);
969       assertTrue("(5) end column wrong", token.getEndColumn() == 0);
970
971       // first line: void
972
assertTrue(tokenizer.hasMoreToken());
973       token = tokenizer.nextToken();
974       assertTrue("(10) token \"void\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("void"));
975       assertTrue("(11) start line wrong", token.getStartLine() == 1);
976       assertTrue("(12) start column wrong", token.getStartColumn() == 0);
977       assertTrue("(13) end line wrong", token.getEndLine() == 1);
978       assertTrue("(14) end column wrong", token.getEndColumn() == 4);
979
980       assertTrue(tokenizer.hasMoreToken());
981       token = tokenizer.nextToken();
982       assertTrue("(15) whitespace not recognized", token.getType() == Token.WHITESPACE);
983
984       // first line: main
985
assertTrue(tokenizer.hasMoreToken());
986       token = tokenizer.nextToken();
987       assertTrue("(20) token \"main\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("main"));
988       assertTrue("(21) start line wrong", token.getStartLine() == 1);
989       assertTrue("(22) start column wrong", token.getStartColumn() == 5);
990       assertTrue("(23) end line wrong", token.getEndLine() == 1);
991       assertTrue("(24) end column wrong", token.getEndColumn() == 9);
992
993       // first line: (
994
assertTrue(tokenizer.hasMoreToken());
995       token = tokenizer.nextToken();
996       assertTrue("(30) token \"(\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("("));
997       assertTrue("(31) start line wrong", token.getStartLine() == 1);
998       assertTrue("(32) start column wrong", token.getStartColumn() == 9);
999       assertTrue("(33) end line wrong", token.getEndLine() == 1);
1000      assertTrue("(34) end column wrong", token.getEndColumn() == 10);
1001
1002      // first line: int
1003
assertTrue(tokenizer.hasMoreToken());
1004      token = tokenizer.nextToken();
1005      assertTrue("(40) token \"int\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("int"));
1006      assertTrue("(41) start line wrong", token.getStartLine() == 1);
1007      assertTrue("(42) start column wrong", token.getStartColumn() == 10);
1008      assertTrue("(43) end line wrong", token.getEndLine() == 1);
1009      assertTrue("(44) end column wrong", token.getEndColumn() == 13);
1010
1011      assertTrue(tokenizer.hasMoreToken());
1012      token = tokenizer.nextToken();
1013      assertTrue("(45) whitespace not recognized", token.getType() == Token.WHITESPACE);
1014
1015      // first line: argc
1016
assertTrue(tokenizer.hasMoreToken());
1017      token = tokenizer.nextToken();
1018      assertTrue("(50) token \"argc\" not recognized.", token.getType() == Token.NORMAL && token.getImage().equals("argc"));
1019      assertTrue("(51) start line wrong", token.getStartLine() == 1);
1020      assertTrue("(52) start column wrong", token.getStartColumn() == 14);
1021      assertTrue("(53) end line wrong", token.getEndLine() == 1);
1022      assertTrue("(54) end column wrong", token.getEndColumn() == 18);
1023
1024      // first line: )
1025
assertTrue(tokenizer.hasMoreToken());
1026      token = tokenizer.nextToken();
1027      assertTrue("(60) token \")\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals(")"));
1028      assertTrue("(61) start line wrong", token.getStartLine() == 1);
1029      assertTrue("(62) start column wrong", token.getStartColumn() == 18);
1030      assertTrue("(63) end line wrong", token.getEndLine() == 1);
1031      assertTrue("(64) end column wrong", token.getEndColumn() == 19);
1032
1033      // first line: EOL
1034
assertTrue(tokenizer.hasMoreToken());
1035      token = tokenizer.nextToken();
1036      assertTrue("(60) token \"\\r\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r"));
1037      assertTrue("(61) start line wrong", token.getStartLine() == 1);
1038      assertTrue("(62) start column wrong", token.getStartColumn() == 19);
1039      assertTrue("(63) end line wrong", token.getEndLine() == 2);
1040      assertTrue("(64) end column wrong", token.getEndColumn() == 0);
1041      assertTrue("(65) wrong length", token.getLength() == 1);
1042
1043      // second line: {
1044
assertTrue(tokenizer.hasMoreToken());
1045      token = tokenizer.nextToken();
1046      assertTrue("(70) token \"{\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("{"));
1047      assertTrue("(71) start line wrong", token.getStartLine() == 2);
1048      assertTrue("(72) start column wrong", token.getStartColumn() == 0);
1049      assertTrue("(73) end line wrong", token.getEndLine() == 2);
1050      assertTrue("(74) end column wrong", token.getEndColumn() == 1);
1051
1052      // second/third line: EOL + whitespaces
1053
assertTrue(tokenizer.hasMoreToken());
1054      token = tokenizer.nextToken();
1055      assertTrue("(80) token \"\\r \" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r "));
1056      assertTrue("(81) start line wrong", token.getStartLine() == 2);
1057      assertTrue("(82) start column wrong", token.getStartColumn() == 1);
1058      assertTrue("(83) end line wrong", token.getEndLine() == 3);
1059      assertTrue("(84) end column wrong", token.getEndColumn() == 2);
1060      assertTrue("(85) wrong length", token.getLength() == 3);
1061
1062      // third line: line comment
1063
assertTrue(tokenizer.hasMoreToken());
1064      token = tokenizer.nextToken();
1065      assertTrue("(91) line comment not recognized", token.getType() == Token.LINE_COMMENT);
1066      assertTrue("(92) start line wrong", token.getStartLine() == 3);
1067      assertTrue("(93) start column wrong", token.getStartColumn() == 2);
1068      assertTrue("(94) end line wrong", token.getEndLine() == 4);
1069      assertTrue("(95) end column wrong", token.getEndColumn() == 0);
1070
1071      assertTrue(tokenizer.hasMoreToken());
1072      token = tokenizer.nextToken();
1073      assertTrue("(96) whitespace not recognized", token.getType() == Token.WHITESPACE);
1074
1075      // forth line: block comment
1076
assertTrue(tokenizer.hasMoreToken());
1077      token = tokenizer.nextToken();
1078      assertTrue("(101) block comment not recognized", token.getType() == Token.BLOCK_COMMENT);
1079      assertTrue("(102) start line wrong", token.getStartLine() == 4);
1080      assertTrue("(103) start column wrong", token.getStartColumn() == 2);
1081      assertTrue("(104) end line wrong", token.getEndLine() == 6);
1082      assertTrue("(105) end column wrong", token.getEndColumn() == 4);
1083
1084      // 6th line: EOL
1085
assertTrue(tokenizer.hasMoreToken());
1086      token = tokenizer.nextToken();
1087      assertTrue("(110) token \"\\r\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r"));
1088      assertTrue("(111) start line wrong", token.getStartLine() == 6);
1089      assertTrue("(112) start column wrong", token.getStartColumn() == 4);
1090      assertTrue("(113) end line wrong", token.getEndLine() == 7);
1091      assertTrue("(114) end column wrong", token.getEndColumn() == 0);
1092      assertTrue("(115) wrong length", token.getLength() == 1);
1093
1094      // 7th line: }
1095
assertTrue(tokenizer.hasMoreToken());
1096      token = tokenizer.nextToken();
1097      assertTrue("(120) token \"}\" not recognized.", token.getType() == Token.SEPARATOR && token.getImage().equals("}"));
1098      assertTrue("(121) start line wrong", token.getStartLine() == 7);
1099      assertTrue("(122) start column wrong", token.getStartColumn() == 0);
1100      assertTrue("(123) end line wrong", token.getEndLine() == 7);
1101      assertTrue("(124) end column wrong", token.getEndColumn() == 1);
1102
1103      // 7th line: EOL
1104
assertTrue(tokenizer.hasMoreToken());
1105      token = tokenizer.nextToken();
1106      assertTrue("(130) token \"\\r\" not recognized.", token.getType() == Token.WHITESPACE && token.getImage().equals("\r"));
1107      assertTrue("(131) start line wrong", token.getStartLine() == 7);
1108      assertTrue("(132) start column wrong", token.getStartColumn() == 1);
1109      assertTrue("(133) end line wrong", token.getEndLine() == 8);
1110      assertTrue("(134) end column wrong", token.getEndColumn() == 0);
1111      assertTrue("(135) wrong length", token.getLength() == 1);
1112
1113    } finally {
1114      // Cleanup
1115
tokenizer.close();
1116    }
1117  }
1118
1119  /**
1120   * Line counting with setReadPosition
1121   */

1122  public void testLineCounting() throws Throwable JavaDoc {
1123    TokenizerSource source = getSource(
1124      "01234 67 9\r\n"
1125    + "0 2 4 6 8\r"
1126    + " 1 3 5 7 9\n"
1127    + "01 34 67 9\n"
1128    + "/* block comment\n"
1129    + " in three lines\r\n"
1130    + "*/\n"
1131    + "// line comment 1\r"
1132    + "// line comment 2\r\n"
1133    + "// line comment 3\n"
1134    + "abc // line comment 1\r"
1135    + "01 34 67 // line comment 2\r\n"
1136    + "/* block comment */ // line comment 3\n");
1137    
1138    int[] expectedLines = {
1139      0, 0, 0,
1140      1, 1, 1, 1, 1,
1141      2, 2, 2, 2, 2,
1142      3, 3, 3, 3,
1143      4,
1144      7,
1145      8,
1146      9,
1147      10, 10,
1148      11, 11, 11, 11,
1149      12, 12
1150    };
1151    int[] expectedColumns = {
1152      0, 6, 9,
1153      0, 2, 4, 6, 8,
1154      1, 3, 5, 7, 9,
1155      0, 3, 6, 9,
1156      0,
1157      0,
1158      0,
1159      0,
1160      0, 4,
1161      0, 3, 6, 9,
1162      0, 20
1163    };
1164    
1165    TokenizerProperties props = new StandardTokenizerProperties();
1166    Tokenizer tokenizer = getTokenizer(props);
1167    Token token1;
1168    Token token2;
1169    int line = 0;
1170    int column = 0;
1171    int index = 0;
1172
1173    try {
1174      props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES);
1175      props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
1176      props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
1177      tokenizer.setSource(source);
1178
1179      while (tokenizer.hasMoreToken()) {
1180        token1 = tokenizer.nextToken();
1181        assertTrue("Wrong line/column " + token1.getStartLine() + "/" + token1.getStartColumn(),
1182                   token1.getStartLine() == line && token1.getStartColumn() == column);
1183
1184        tokenizer.setReadPositionRelative(-token1.getLength());
1185        token2 = tokenizer.nextToken();
1186        assertTrue("Wrong line/column " + token2.getStartLine() + "/" + token2.getStartColumn(),
1187                   token2.getStartLine() == line && token2.getStartColumn() == column);
1188
1189        assertTrue("Token mismatch:\n " + token1 + "\n " + token2, token1.equals(token2));
1190
1191        line = token1.getEndLine();
1192        column = token1.getEndColumn();
1193
1194        // cross check the line and columns
1195
if (token1.getType() != Token.WHITESPACE && token1.getType() != Token.EOF) {
1196          assertTrue("Expected line " + expectedLines[index] + ", found " + token1.getStartLine(),
1197                      token1.getStartLine() == expectedLines[index]);
1198          assertTrue("Expected column " + expectedColumns[index] + ", found " + token1.getStartColumn(),
1199                      token1.getStartColumn() == expectedColumns[index]);
1200          index++;
1201        }
1202      }
1203    } finally {
1204      // Cleanup
1205
tokenizer.close();
1206    }
1207  }
1208
1209  /**
1210   * Test the uncommon whitespaces. Note that the \r\n-combination is only treated
1211   * as one newline only, if both characters fall into one token.
1212   */

1213  public void testUncommonWhitespaces() throws Throwable JavaDoc {
1214    String JavaDoc data =
1215      "This text has spaces\r"
1216    + "and newlines. Depending on the flags\n"
1217    + "the spaces are considered as special sequences\r\n"
1218    + "or real\twhitespaces.\n\n"
1219    + "/** also included\r"
1220    + "* are line and block comments\r"
1221    + "*/\n"
1222    + "here comes // the line comment\n"
1223    + "// and another\n";
1224
1225    TokenizerProperties props = new StandardTokenizerProperties();
1226    Tokenizer tokenizer = getTokenizer(props);
1227    String JavaDoc[] ws = { "\r\n", " \t", " \t\n", " \t\r", " \n", " \r", "\t\r", "\t\n" };
1228    int[] wsCount = { 5, 18, 22, 20, 21, 19, 3, 5 };
1229    int[] seqCount = { 21, 7, 2, 5, 3, 6, 25, 22 };
1230    int[] lineCount = { 10, 11, 11, 11, 11, 11, 11, 11 };
1231    TokenizerProperty spaceProp = new TokenizerProperty(Token.SPECIAL_SEQUENCE, new String JavaDoc[] { " " } );
1232    TokenizerProperty tabProp = new TokenizerProperty(Token.SPECIAL_SEQUENCE, new String JavaDoc[] { "\t" } );
1233    TokenizerProperty lfProp = new TokenizerProperty(Token.SPECIAL_SEQUENCE, new String JavaDoc[] { "\n" } );
1234    TokenizerProperty crProp = new TokenizerProperty(Token.SPECIAL_SEQUENCE, new String JavaDoc[] { "\r" } );
1235    
1236    try {
1237      props.setParseFlags(Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES);
1238      props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
1239      props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
1240      props.addProperty(spaceProp);
1241      props.addProperty(tabProp);
1242      props.addProperty(lfProp);
1243      props.addProperty(crProp);
1244      
1245      for (int ii = 0; ii < ws.length; ++ii) {
1246        int seqCounter = 0;
1247        int wsCounter = 0;
1248
1249        props.setWhitespaces(ws[ii]);
1250        tokenizer.setSource(getSource(data));
1251
1252        System.out.println("Loop " + ii);
1253        while (tokenizer.hasMoreToken()) {
1254          Token token = tokenizer.nextToken();
1255
1256          System.out.println(token.toString());
1257          switch (token.getType()) {
1258          case Token.SPECIAL_SEQUENCE:
1259            seqCounter++;
1260            break;
1261          case Token.WHITESPACE:
1262            wsCounter++;
1263            break;
1264          case Token.EOF:
1265            assertTrue("Loop " + ii + ": Expected " + lineCount[ii] + " lines, got " + token.getEndLine(),
1266                        lineCount[ii] == token.getEndLine());
1267            break;
1268          }
1269        }
1270        assertTrue("Loop " + ii + ": Expected " + wsCount[ii] + " whitespaces, got " + wsCounter,
1271                   wsCount[ii] == wsCounter);
1272        assertTrue("Loop " + ii + ": Expected " + seqCount[ii] + " special sequences, got " + seqCounter,
1273                   seqCount[ii] == seqCounter);
1274      }
1275    } finally {
1276      // Cleanup
1277
tokenizer.close();
1278    }
1279  }
1280    
1281  /**
1282   * Test the various whitespace flags
1283   */

1284  public void testWhitespaceHandling() throws Throwable JavaDoc {
1285    String JavaDoc data =
1286      "/* this is a block comment "
1287    + " followed by a newline (whitespace) sequence */\r\n"
1288    + "// a line comment\r\n"
1289    + "// another line comment\r\n"
1290    + " /* whitespaces with a block comment in between */ \n"
1291    + "// a EOF-terminated line comment";
1292
1293    TokenizerProperties props = new StandardTokenizerProperties();
1294    Tokenizer tokenizer = getTokenizer(props);
1295    int[] flags = { Flags.F_RETURN_BLOCK_COMMENTS,
1296                                      Flags.F_RETURN_LINE_COMMENTS,
1297                                      Flags.F_RETURN_BLOCK_COMMENTS + Flags.F_RETURN_LINE_COMMENTS,
1298                                      Flags.F_RETURN_WHITESPACES,
1299                                      Flags.F_RETURN_LINE_COMMENTS + Flags.F_RETURN_SIMPLE_WHITESPACES,
1300                                      Flags.F_RETURN_BLOCK_COMMENTS + Flags.F_RETURN_SIMPLE_WHITESPACES,
1301                                      Flags.F_RETURN_SIMPLE_WHITESPACES,
1302                                      0
1303                                    };
1304    boolean[] propsFlag = { true, false };
1305
1306    try {
1307      props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
1308      props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
1309    
1310      for (int ii = 0; ii < propsFlag.length; ++ii) {
1311        for (int kk = 0; kk < flags.length; ++kk) {
1312          if (propsFlag[ii]) {
1313            props.setParseFlags(flags[kk]);
1314          } else {
1315            tokenizer.changeParseFlags(flags[kk], Flags.F_RETURN_WHITESPACES);
1316          }
1317          
1318          tokenizer.setSource(getSource(data));
1319
1320          System.out.println("Loop " + ii + "/" + kk);
1321          while (tokenizer.hasMoreToken()) {
1322            Token token = tokenizer.nextToken();
1323
1324            System.out.println(token.toString());
1325            switch (token.getType()) {
1326            case Token.BLOCK_COMMENT:
1327              assertTrue("Tokenizer returned a block comment without the flag set: " + tokenizer.currentImage(),
1328                        (flags[kk] & Flags.F_RETURN_BLOCK_COMMENTS) != 0);
1329              break;
1330            case Token.LINE_COMMENT:
1331              assertTrue("Tokenizer returned a line comment without the flag set: " + tokenizer.currentImage(),
1332                        (flags[kk] & Flags.F_RETURN_LINE_COMMENTS) != 0);
1333              break;
1334            case Token.WHITESPACE:
1335              assertTrue("Tokenizer returned a simple whitespace sequence without the flag set: " + tokenizer.currentImage(),
1336                        (flags[kk] & Flags.F_RETURN_SIMPLE_WHITESPACES) != 0);
1337              break;
1338            }
1339          }
1340        }
1341      }
1342    } finally {
1343      // Cleanup
1344
tokenizer.close();
1345    }
1346  }
1347  
1348  /**
1349   * Check mixed special sequences and separators
1350   */

1351  public void testSequencesAndSeparators() throws Throwable JavaDoc {
1352    String JavaDoc data = "(...::==:=: =====>==<=..()>>>>> >> >>>>)";
1353    int[] expected = { Token.SEPARATOR, // (
1354
Token.SPECIAL_SEQUENCE, // ..
1355
Token.SEPARATOR, // .
1356
Token.SEPARATOR, // :
1357
Token.SPECIAL_SEQUENCE, // :=
1358
Token.SEPARATOR, // =
1359
Token.SPECIAL_SEQUENCE, // :=
1360
Token.SEPARATOR, // :
1361
Token.SPECIAL_SEQUENCE, // ==
1362
Token.SPECIAL_SEQUENCE, // ==
1363
Token.SEPARATOR, // =
1364
Token.SPECIAL_SEQUENCE, // >=
1365
Token.SEPARATOR, // =
1366
Token.SPECIAL_SEQUENCE, // <=
1367
Token.SPECIAL_SEQUENCE, // ..
1368
Token.SPECIAL_SEQUENCE, // ()
1369
Token.SPECIAL_SEQUENCE, // >>>
1370
Token.SPECIAL_SEQUENCE, // >>
1371
Token.SPECIAL_SEQUENCE, // >>
1372
Token.SPECIAL_SEQUENCE, // >>>
1373
Token.SEPARATOR, // >
1374
Token.SEPARATOR, // )
1375
Token.EOF };
1376
1377    TokenizerProperties props = new StandardTokenizerProperties();
1378    Tokenizer tokenizer = getTokenizer(props);
1379    int count = 0;
1380
1381    try {
1382      props.addSpecialSequence(":=");
1383      props.addSpecialSequence(">=");
1384      props.addSpecialSequence("<=");
1385      props.addSpecialSequence("==");
1386      props.addSpecialSequence("..");
1387      props.addSpecialSequence("()");
1388      props.addSpecialSequence("..");
1389      props.addSpecialSequence(">>>");
1390      props.addSpecialSequence(">>");
1391      
1392      tokenizer.setSource(getSource(data));
1393
1394      while (tokenizer.hasMoreToken()) {
1395        Token token = tokenizer.nextToken();
1396
1397        System.out.println(token.getImage());
1398        assertTrue("Token #" + (count + 1) + ": expected type " + Token.getTypeName(expected[count]) + ", got " + Token.getTypeName(token.getType()),
1399                    token.getType() == expected[count]);
1400        count++;
1401      }
1402    } finally {
1403      tokenizer.close();
1404    }
1405  }
1406  
1407  
1408  //---------------------------------------------------------------------------
1409
// Implementation
1410
//
1411

1412  /**
1413   * Get the {@link TokenizerSource}.
1414   */

1415  private TokenizerSource getSource(String JavaDoc data) {
1416    try {
1417      return (TokenizerSource)_sourceClass.getConstructor( new Class JavaDoc[] { String JavaDoc.class } ).newInstance(new Object JavaDoc[] { data } );
1418    } catch (Throwable JavaDoc ex) {
1419      return new ReaderSource(new StringReader JavaDoc(data));
1420    }
1421  }
1422  
1423  /**
1424   * Get the {@link Tokenizer} instance according to the class passed to the
1425   * constructor.
1426   */

1427  private Tokenizer getTokenizer(TokenizerProperties props) throws Throwable JavaDoc {
1428    return new StandardTokenizer(props);
1429  }
1430  
1431  
1432  //---------------------------------------------------------------------------
1433
// Members
1434
//
1435
private Class JavaDoc _sourceClass;
1436}
1437
1438
Popular Tags