TestTokenizerFlags


1   /*
2    * TestTokenizerFlags.java: JUnit test for TokenizerProperties implementations
3    *
4    * Copyright (C) 2004 Heiko Blau
5    *
6    * This file belongs to the JTopas test suite.
7    * The JTopas test suite is free software; you can redistribute it and/or modify it 
8    * under the terms of the GNU Lesser General Public License as published by the 
9    * Free Software Foundation; either version 2.1 of the License, or (at your option) 
10   * any later version.
11   *
12   * This software is distributed in the hope that it will be useful, but WITHOUT
13   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
14   * FITNESS FOR A PARTICULAR PURPOSE. 
15   * See the GNU Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public License along 
18   * with the JTopas test suite. If not, write to the
19   *
20   *   Free Software Foundation, Inc.
21   *   59 Temple Place, Suite 330, 
22   *   Boston, MA 02111-1307 
23   *   USA
24   *
25   * or check the Internet: http://www.fsf.org
26   *
27   * The JTopas test suite uses the test framework JUnit by Kent Beck and Erich Gamma.
28   * You should have received a copy of their JUnit licence agreement along with 
29   * the JTopas test suite.
30   *
31   * We do NOT provide the JUnit archive junit.jar nessecary to compile and run 
32   * our tests, since we assume, that You  either have it already or would like 
33   * to get the current release Yourself. 
34   * Please visit either:
35   *   http://sourceforge.net/projects/junit
36   * or
37   *   http://junit.org
38   * to obtain JUnit.
39   *
40   * Contact:
41   *   email: heiko@susebox.de 
42   */
43  
44  package de.susebox.jtopas;
45  
46  //-----------------------------------------------------------------------------
47  // Imports
48  //
49  import java.util.Iterator  ;
50  
51  import junit.framework.Test;
52  import junit.framework.TestCase;
53  import junit.framework.TestSuite;
54  
55  import de.susebox.TestUtilities;
56  
57  
58  //-----------------------------------------------------------------------------
59  // Class TestTokenizerFlags
60  //
61  
62  /**<p>
63   * This class tests the behaviour of a {@link Tokenizer} regarding the flags set 
64   * in the backing {@link TokenizerProperties} and the registered 
65   * {@link TokenizerProperty} objects.
66   *</p>
67   *
68   * @see     TokenizerProperties
69   * @see     TokenizerProperty
70   * @see     Tokenizer
71   * @author  Heiko Blau
72   */
73  public class TestTokenizerFlags extends TestCase {
74    
75    //---------------------------------------------------------------------------
76    // main method
77    //
78    
79    /**
80     * call this method to invoke the tests
81     */
82    public static void main(String  [] args) {
83      String  []   tests = { TestTokenizerFlags.class.getName() };
84  
85      TestUtilities.run(tests, args);
86    }
87    
88  
89    //---------------------------------------------------------------------------
90    // suite method
91    //
92    
93    /**
94     * Implementation of the JUnit method <code>suite</code>. For each set of test
95     * properties one or more tests are instantiated.
96     *
97     * @return a test suite
98     */
99    public static Test suite() {
100     TestSuite   suite = new TestSuite(TestTokenizerFlags.class.getName());
101     
102     suite.addTest(new TestTokenizerFlags("testCasePrecedence"));
103     suite.addTest(new TestTokenizerFlags("testTokenPosOnly"));
104     suite.addTest(new TestTokenizerFlags("testReturnImageParts"));
105     suite.addTest(new TestTokenizerFlags("testReturnWhitespaces"));
106     return suite;
107   }
108   
109   
110   //---------------------------------------------------------------------------
111   // Constructor
112   //
113   
114   /**
115    * Default constructor. Standard input {@link java.lang.System#in} is used
116    * to construct the input stream reader.
117    */  
118   public TestTokenizerFlags(String   test) {
119     super(test);
120   }
121 
122   
123   //---------------------------------------------------------------------------
124   // test cases
125   //
126 
127   /**
128    * Verifying the {@link TokenizerProperties#F_CASE} and TokenizerProperties#F_NO_CASE}
129    * flags.
130    */
131   public void testCasePrecedence() throws Throwable   {
132     StandardTokenizer           tokenizer;
133     StandardTokenizerProperties props;
134     Token                       token;
135     
136     // Precedence of the case flag
137     Object   noCaseComp = new Object  ();
138     Object   caseComp   = new Object  ();
139     Object   CASEComp   = new Object  ();
140 
141     props = new StandardTokenizerProperties();
142     props.addKeyword("noCase", noCaseComp, Flags.F_NO_CASE);
143     props.addKeyword("case", caseComp);
144     props.addKeyword("CASE", CASEComp);
145     
146     tokenizer = new StandardTokenizer(props);
147     try {
148       tokenizer.setSource(new StringSource("noCase NOCASE NocASE nocase"));
149       for (int index = 0; index < 4; index++) {
150         assertTrue("No more token at index " + index + ".", tokenizer.hasMoreToken());
151         token = tokenizer.nextToken();
152         assertTrue("Token is null at index " + index + ".", token != null);
153         switch (index) {
154         case 4:
155           assertTrue("Token is not EOF at index " + index + ".", token.getType() == Token.EOF);
156           break;
157         default:
158           assertTrue("Token is no keyword at index " + index + ".", token.getType() == Token.KEYWORD);
159           assertTrue("Unexpected token companion at index " + index + ".", token.getCompanion() == noCaseComp);
160         }
161       }
162 
163       tokenizer.setSource(new StringSource("case Case CASE cASE"));
164       for (int index = 0; index < 5; index++) {
165         assertTrue("No more token at index " + index + ".", tokenizer.hasMoreToken());
166         token = tokenizer.nextToken();
167         assertTrue("Token is null at index " + index + ".", token != null);
168         switch (index) {
169         case 0:
170         case 2:
171           assertTrue("Token is no keyword at index " + index + ".", token.getType() == Token.KEYWORD);
172           if (index == 0) {
173             assertTrue("Unexpected token companion at index " + index + ".", token.getCompanion() == caseComp);
174           } else {
175             assertTrue("Unexpected token companion at index " + index + ".", token.getCompanion() == CASEComp);
176           }
177           break;
178         case 4:
179           assertTrue("Token is not EOF at index " + index + ".", token.getType() == Token.EOF);
180           break;
181         default:
182           assertTrue("No normal token at index " + index + ".", token.getType() == Token.NORMAL);
183         }
184       }
185     } finally {
186       tokenizer.close();
187     }
188   }
189   
190   /**
191    * Verifying the {@link TokenizerProperties#F_TOKEN_POS_ONLY} flag.
192    */
193   public void testTokenPosOnly() throws Throwable   {
194     StandardTokenizer           tokenizer;
195     StandardTokenizerProperties props;
196     Token                       token;
197     
198     // Precedence of the case flag
199     props = new StandardTokenizerProperties(Flags.F_TOKEN_POS_ONLY);
200     props.addProperty(new TokenizerProperty(Token.STRING, new String  [] { "\"", "\"", "\""}, null, 0, Flags.F_TOKEN_POS_ONLY));
201     props.addString("'", "'", "'");
202     
203     tokenizer = new StandardTokenizer(props);
204     try {
205       tokenizer.setSource(new StringSource("\"a string that is returned\""));
206       assertTrue("No more token.", tokenizer.hasMoreToken());
207       token = tokenizer.nextToken();
208       assertTrue("Token is no string.", token != null && token.getType() == Token.STRING);
209       assertTrue("Token has no image.", token.getImage() != null);
210       assertTrue("No more token.", tokenizer.hasMoreToken());
211       token = tokenizer.nextToken();
212       assertTrue("Token is not EOF.", token != null && token.getType() == Token.EOF);
213 
214       tokenizer.setSource(new StringSource("'a string that is not returned'"));
215       assertTrue("No more token.", tokenizer.hasMoreToken());
216       token = tokenizer.nextToken();
217       assertTrue("Token is no string.", token != null && token.getType() == Token.STRING);
218       assertTrue("Token has image.", token.getImage() == null);
219       assertTrue("No more token.", tokenizer.hasMoreToken());
220       token = tokenizer.nextToken();
221       assertTrue("Token is not EOF.", token != null && token.getType() == Token.EOF);
222     } finally {
223       tokenizer.close();
224     }
225   }
226   
227   /**
228    * Verifying the {@link TokenizerProperties#F_RETURN_IMAGE_PARTS} flag.
229    */
230   public void testReturnImageParts() throws Throwable   {
231     StandardTokenizer           tokenizer;
232     StandardTokenizerProperties props;
233     Token                       token;
234     String  []                    imageParts;
235     
236     // add properties
237     props = new StandardTokenizerProperties(Flags.F_TOKEN_POS_ONLY);
238     props.setSeparators(":");
239     props.addString("\"", "\"", "\\", null, Flags.F_RETURN_IMAGE_PARTS);
240     props.addPattern("([\\+\\-]?)([0-9]+)", null, Flags.F_RETURN_IMAGE_PARTS);
241     props.addPattern("([\\+\\-]?)([0-9]+)\\.([0-9]+)", null, Flags.F_RETURN_IMAGE_PARTS);
242     props.addString("'", "'", "'", null, Flags.F_RETURN_IMAGE_PARTS);
243     props.addLineComment("--", null, Flags.F_RETURN_LINE_COMMENTS | Flags.F_RETURN_IMAGE_PARTS);
244     props.addBlockComment("[[", "]]", null, Flags.F_RETURN_IMAGE_PARTS | Flags.F_RETURN_BLOCK_COMMENTS);
245     
246     // tokenize data
247     tokenizer = new StandardTokenizer(props);
248     try {
249       tokenizer.setSource(new StringSource("[[\rblock comment with empty first line]] ImageParts [[with a block comment\r over 2 lines]]: +9745 1 -234 +0.09 14.1 \"a \\\"string\\\" with escapes\" 'a SQL string with ''escapes'' and\n with linefeeds\r\n' -- a line comment"));
250       
251       // first block comment
252       assertTrue("No more token.", tokenizer.hasMoreToken());
253       token = tokenizer.nextToken();
254       verifyImageParts(token, new String  [] { "", "block comment with empty first line" } );
255 
256       // token "ImageParts"
257       assertTrue("No more token.", tokenizer.hasMoreToken());
258       token = tokenizer.nextToken();
259       assertTrue(token.getType() == Token.NORMAL);
260 
261       // block comment token
262       assertTrue("No more token.", tokenizer.hasMoreToken());
263       token = tokenizer.nextToken();
264       verifyImageParts(token, new String  [] { "with a block comment", " over 2 lines" } );
265 
266       // token ":"
267       assertTrue("No more token.", tokenizer.hasMoreToken());
268       token = tokenizer.nextToken();
269       assertTrue(token.getType() == Token.SEPARATOR);
270 
271       // token "+9745"
272       assertTrue("No more token.", tokenizer.hasMoreToken());
273       token = tokenizer.nextToken();
274       verifyImageParts(token, new String  [] { "+9745", "+", "9745" } );
275 
276       // token "1"
277       assertTrue("No more token.", tokenizer.hasMoreToken());
278       token = tokenizer.nextToken();
279       verifyImageParts(token, new String  [] { "1", "", "1" } );
280 
281       // token "-234"
282       assertTrue("No more token.", tokenizer.hasMoreToken());
283       token = tokenizer.nextToken();
284       verifyImageParts(token, new String  [] { "-234", "-", "234" } );
285 
286       // token "+0.09"
287       assertTrue("No more token.", tokenizer.hasMoreToken());
288       token = tokenizer.nextToken();
289       verifyImageParts(token, new String  [] { "+0.09", "+", "0", "09" } );
290 
291       // token "14.1"
292       assertTrue("No more token.", tokenizer.hasMoreToken());
293       token = tokenizer.nextToken();
294       verifyImageParts(token, new String  [] { "14.1", "", "14", "1" } );
295 
296       // string token #1
297       assertTrue("No more token.", tokenizer.hasMoreToken());
298       token = tokenizer.nextToken();
299       verifyImageParts(token, new String  [] { "a \"string\" with escapes" } );
300 
301       // string token #2
302       assertTrue("No more token.", tokenizer.hasMoreToken());
303       token = tokenizer.nextToken();
304       verifyImageParts(token, new String  [] { "a SQL string with 'escapes' and", " with linefeeds", "" } );
305 
306       // line comment token
307       assertTrue("No more token.", tokenizer.hasMoreToken());
308       token = tokenizer.nextToken();
309       verifyImageParts(token, new String  [] { " a line comment" } );
310 
311       // EOF reached
312       assertTrue("No more token.", tokenizer.hasMoreToken());
313       token = tokenizer.nextToken();
314       assertTrue("Token is not EOF.", token != null && token.getType() == Token.EOF);
315     } finally {
316       tokenizer.close();
317     }
318   }
319   
320   /**
321    * Test preceedence of {@link Tokenizer} flags over the {@link TokenizerProperties}
322    * flags
323    */
324   public void testReturnWhitespaces() throws Throwable   {
325     String   source = "// a text with several token types\n"
326                   + "void main(int argc, char* argv[]) {\n"
327                   +   "printf(\"Hello, world!\");\n"
328                   + "}";
329     int[]   flagMasks = { 0, Flags.F_RETURN_WHITESPACES, Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES };
330     int[][] expected  = { { Token.KEYWORD, Token.NORMAL, Token.SEPARATOR, Token.KEYWORD, Token.NORMAL, Token.SEPARATOR, Token.KEYWORD, Token.SEPARATOR, Token.NORMAL, Token.SEPARATOR, Token.SEPARATOR, Token.SEPARATOR, Token.SEPARATOR,
331                             Token.NORMAL, Token.SEPARATOR, Token.STRING, Token.SEPARATOR, Token.SEPARATOR,
332                             Token.SEPARATOR, Token.EOF },
333                           { Token.LINE_COMMENT,
334                             Token.KEYWORD, Token.WHITESPACE, Token.NORMAL, Token.SEPARATOR, Token.KEYWORD, Token.WHITESPACE, Token.NORMAL, Token.SEPARATOR, Token.WHITESPACE, Token.KEYWORD, Token.SEPARATOR, Token.WHITESPACE, Token.NORMAL, Token.SEPARATOR, Token.SEPARATOR, Token.SEPARATOR, Token.WHITESPACE, Token.SEPARATOR, Token.WHITESPACE, 
335                             Token.NORMAL, Token.SEPARATOR, Token.STRING, Token.SEPARATOR, Token.SEPARATOR, Token.WHITESPACE, 
336                             Token.SEPARATOR, Token.EOF },
337                           { Token.LINE_COMMENT,
338                             Token.KEYWORD, Token.WHITESPACE, Token.NORMAL, Token.SEPARATOR, Token.KEYWORD, Token.WHITESPACE, Token.NORMAL, Token.SEPARATOR, Token.WHITESPACE, Token.KEYWORD, Token.SEPARATOR, Token.WHITESPACE, Token.NORMAL, Token.SEPARATOR, Token.SEPARATOR, Token.SEPARATOR, Token.WHITESPACE, Token.SEPARATOR, Token.WHITESPACE, 
339                             Token.NORMAL, Token.SEPARATOR, Token.STRING, Token.SEPARATOR, Token.SEPARATOR, Token.WHITESPACE, 
340                             Token.SEPARATOR, Token.EOF }
341                         };  
342     int[][]  starts   = { { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
343                             -1, -1, -1, -1, -1,
344                             -1, -1 },
345                           { -1,
346                             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
347                             -1, -1, -1, -1, -1, -1,
348                             -1, -1 },
349                           {  0,
350                              0,  4,  5,  9, 10, 13, 14, 18, 19, 20, 24, 25, 26, 30, 31, 32, 33, 34, 35,
351                              0,  6,  7, 22, 23, 24,
352                              0,  1 }
353                         };
354                              
355     TokenizerProperties props = new StandardTokenizerProperties(/*Flags.F_TOKEN_POS_ONLY*/);
356     
357     // add properties
358     props.addLineComment("//");
359     props.addString("\"", "\"", "\\");
360     props.addKeyword("void");
361     props.addKeyword("int");
362     props.addKeyword("char");
363     
364     for (int index = 0; index < flagMasks.length; ++index) {
365       Token     token;
366       int       count     = 0;
367       Tokenizer tokenizer = new StandardTokenizer(props);
368       
369       try {
370         tokenizer.changeParseFlags(flagMasks[index], flagMasks[index]);
371         tokenizer.setSource(new StringSource(source));
372 
373         while (tokenizer.hasMoreToken()) {
374           token = tokenizer.nextToken();
375   
376           System.out.println(token);
377           assertTrue("Index " + index + ", token #" + count + ": expected token type \"" + Token.getTypeName(expected[index][count]) + "\", got \"" + Token.getTypeName(token.getType()) + "\": " + token, 
378                      token.getType() == expected[index][count]);
379           assertTrue("Index " + index + ", token #" + count + ": expected start column " + starts[index][count] + ", got " + token.getStartColumn() + ": " + token, 
380                      token.getStartColumn() == starts[index][count]);
381           count++;
382         }
383       } finally {
384         tokenizer.close();
385       }
386     }
387   }
388   
389     
390   //---------------------------------------------------------------------------
391   // Implementation
392   //
393   
394   /**
395    * Checks image parts
396    */
397   private void verifyImageParts(Token token, String  [] expected) throws Throwable   {
398     String  [] imageParts = token.getImageParts();
399     
400     if (expected != null) {
401       assertTrue("Token has no image parts: " + token, imageParts != null);
402       assertTrue("Expected " + expected.length + " image parts, got " + imageParts.length + ": " + token, 
403                 imageParts.length == expected.length);
404       
405       for (int index = 0; index < expected.length; ++index) {
406         if (expected[index] != null) {
407           assertTrue("Image part " + index + ": expected \"" + expected[index] + "\", got \"" + imageParts[index] + "\": " + token, 
408                      imageParts[index] != null && imageParts[index].equals(expected[index]));
409         } else {
410           assertTrue("Image part " + index + ": expected null, got \"" + imageParts[index] + "\": " + token, imageParts[index] == null);
411         }
412       }
413     } else {
414       assertTrue("Expected no image parts, got " + imageParts + ": " + token, imageParts == null || imageParts.length == 0);
415     }
416   }
417   
418   //---------------------------------------------------------------------------
419   // Members
420   //
421 }
422
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags