TestPatternMatching


1   /*
2    * TestPatternMatching.java: JUnit test for regular expression tokenizing
3    *
4    * Copyright (C) 2003 Heiko Blau
5    *
6    * This file belongs to the JTopas test suite.
7    * The JTopas test suite is free software; you can redistribute it and/or modify it 
8    * under the terms of the GNU Lesser General Public License as published by the 
9    * Free Software Foundation; either version 2.1 of the License, or (at your option) 
10   * any later version.
11   *
12   * This software is distributed in the hope that it will be useful, but WITHOUT
13   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
14   * FITNESS FOR A PARTICULAR PURPOSE. 
15   * See the GNU Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public License along 
18   * with the JTopas test suite. If not, write to the
19   *
20   *   Free Software Foundation, Inc.
21   *   59 Temple Place, Suite 330, 
22   *   Boston, MA 02111-1307 
23   *   USA
24   *
25   * or check the Internet: http://www.fsf.org
26   *
27   * The JTopas test suite uses the test framework JUnit by Kent Beck and Erich Gamma.
28   * You should have received a copy of their JUnit licence agreement along with 
29   * the JTopas test suite.
30   *
31   * We do NOT provide the JUnit archive junit.jar nessecary to compile and run 
32   * our tests, since we assume, that You  either have it already or would like 
33   * to get the current release Yourself. 
34   * Please visit either:
35   *   http://sourceforge.net/projects/junit
36   * or
37   *   http://junit.org
38   * to obtain JUnit.
39   *
40   * Contact:
41   *   email: heiko@susebox.de 
42   */
43  
44  package de.susebox.jtopas;
45  
46  //-----------------------------------------------------------------------------
47  // Imports
48  //
49  import java.io.Reader  ;
50  import java.io.StringReader  ;
51  
52  import junit.framework.Test;
53  import junit.framework.TestCase;
54  import junit.framework.TestSuite;
55  import junit.framework.Assert;
56  
57  import de.susebox.TestUtilities;
58  
59  
60  //-----------------------------------------------------------------------------
61  // Class TestPatternMatching
62  //
63  
64  /**<p>
65   * The class contains a number of test cases related to the pattern matching
66   * facility in a {@link Tokenizer}.
67   *</p>
68   *
69   * @see     Tokenizer
70   * @see     StandardTokenizer
71   * @see     StandardTokenizerProperties
72   * @author  Heiko Blau
73   */
74  public class TestPatternMatching extends TestCase {
75    
76    //---------------------------------------------------------------------------
77    // properties
78    //
79  
80    
81    //---------------------------------------------------------------------------
82    // main method
83    //
84    
85    /**
86     * call this method to invoke the tests
87     */
88    public static void main(String  [] args) {
89      String  []   tests = { TestPatternMatching.class.getName() };
90  
91      TestUtilities.run(tests, args);
92    }
93    
94  
95    //---------------------------------------------------------------------------
96    // suite method
97    //
98    
99    /**
100    * Implementation of the JUnit method <code>suite</code>. For each set of test
101    * properties one or more tests are instantiated.
102    *
103    * @return a test suite
104    */
105   public static Test suite() {
106     TestSuite   suite = new TestSuite(TestPatternMatching.class.getName());
107     
108     suite.addTest(new TestPatternMatching("compareJTopasAgainstPattern"));
109     suite.addTest(new TestPatternMatching("testNumberPattern"));
110     suite.addTest(new TestPatternMatching("testPatternExt"));
111     suite.addTest(new TestPatternMatching("testPatternTokenizer"));
112     suite.addTest(new TestPatternMatching("testFreePattern"));
113     return suite;
114   }
115   
116   
117   //---------------------------------------------------------------------------
118   // Constructor
119   //
120   
121   /**
122    * Default constructor. Standard input {@link java.lang.System#in} is used
123    * to construct the input stream reader.
124    */  
125   public TestPatternMatching(String   test) {
126     super(test);
127   }
128 
129   
130   //---------------------------------------------------------------------------
131   // Fixture setup and release
132   //
133   
134   /**
135    * Sets up the fixture, for example, open a network connection.
136    * This method is called before a test is executed.
137    */
138   protected void setUp() throws Exception   {}
139 
140   
141   /**
142    * Tears down the fixture, for example, close a network connection.
143    * This method is called after a test is executed.
144    */
145   protected void tearDown() throws Exception   {}
146   
147   
148   //---------------------------------------------------------------------------
149   // test cases
150   //
151  
152   /**
153    * Test the case, when a line comment is not terminated by a newline character.
154    * This happens when the last line of a file is a line comment without a 
155    * newline on its end.
156    * This is a rather common situation.
157    */
158   public void testNumberPattern() throws Throwable   {
159     Reader                reader    = new StringReader  ("0.95 123 -1 -123123.92382 0,32 +4,001 -123,213 -0");
160     TokenizerProperties props     = new StandardTokenizerProperties();
161     StandardTokenizer   tokenizer = new StandardTokenizer(props);
162 
163     props.removeSeparators(".\\-+,");
164     props.addPattern("[+\\-]?[0-9]+\\.?[0-9]*");
165     props.addPattern("[+\\-]?[0-9]+,?[0-9]*");
166     tokenizer.setSource(reader);
167 
168     while (tokenizer.hasMoreToken()) {
169       Token token = tokenizer.nextToken();
170       
171       System.out.println(token);
172       if (token.getType() != Token.EOF) {
173         assertTrue("Wrong token type " + Token.getTypeName(token.getType()) + " for: " + token.getImage(), 
174                     token.getType() == Token.PATTERN);
175       }
176     }
177   }
178 
179   /**
180    * Test the case, when a line comment is not terminated by a newline character.
181    * This happens when the last line of a file is a line comment without a 
182    * newline on its end.
183    * This is a rather common situation.
184    */
185   public void testPatternExt() throws Throwable   {
186     Reader                reader    = new StringReader  ("// the main method\nvoid main100(100)\n{ int x1 = 0.95; int x2 = -1; int x3 = 3.0 -1.0; }");
187     int[]               expected  = { Token.NORMAL, Token.NORMAL, Token.SEPARATOR, Token.PATTERN, Token.SEPARATOR, Token.SEPARATOR, 
188                                       Token.NORMAL, Token.NORMAL, Token.SEPARATOR, Token.PATTERN, Token.SEPARATOR, 
189                                       Token.NORMAL, Token.NORMAL, Token.SEPARATOR, Token.PATTERN, Token.SEPARATOR, 
190                                       Token.NORMAL, Token.NORMAL, Token.SEPARATOR, Token.PATTERN, Token.PATTERN, Token.SEPARATOR, 
191                                       Token.SEPARATOR, Token.EOF };
192     TokenizerProperties props     = new StandardTokenizerProperties();
193     StandardTokenizer   tokenizer = new StandardTokenizer(props);
194 
195     props.removeSeparators(".\\-");
196     props.addLineComment("//");
197     props.addPattern("[+\\-]?[0-9]+\\.?[0-9]*");
198     tokenizer.setSource(reader);
199 
200     int index = 0;
201     while (tokenizer.hasMoreToken()) {
202       Token token = tokenizer.nextToken();
203       
204       System.out.println(token);
205       assertTrue("Wrong token type " + Token.getTypeName(token.getType()) + " for: " + token.getImage(), 
206                   token.getType() == expected[index]);
207       index++;
208     }
209   }
210 
211   /**
212    * A tokenizer working almost completely with pattern and whitespaces
213    */
214   public void testPatternTokenizer() throws Throwable   {
215     Reader   reader = new StringReader  (
216         "// the main method\n" 
217       + "void main(String[] argv)\n" 
218       + "{\n"
219       +   "int x1 = 0.95;     // 1. variable\n"
220       +   "int x2 = -1;       // 2. variable\n"
221       +   "int x3 = x1 - x2;  // 3. variable\n"
222       +   "int x4 = +1.01;    // 4. variable\n"
223       + "}\n");
224     
225     Object   lineComment  = new String  ("<line comment>");
226     Object   identifier   = new String  ("<identifier>");
227     Object   number       = new String  ("<number>");
228     Object   separator    = new String  ("<separator>");
229     
230     Object  [] expected = { 
231       lineComment, 
232       identifier, identifier, separator, identifier, separator, separator, identifier, separator,
233       separator, 
234       identifier, identifier, separator, number, separator, lineComment,
235       identifier, identifier, separator, number, separator, lineComment,
236       identifier, identifier, separator, identifier, separator, identifier, separator, lineComment,
237       identifier, identifier, separator, number, separator, lineComment,
238       separator
239     };
240     
241     TokenizerProperties props     = new StandardTokenizerProperties();
242     StandardTokenizer   tokenizer = new StandardTokenizer(props);
243 
244     props.addPattern("[+\\-]?[0-9]+\\.?[0-9]*", number);
245     props.addPattern("[a-z][a-z0-9]*", identifier, Flags.F_NO_CASE);
246     props.addPattern("//.*$", lineComment, Flags.F_FREE_PATTERN);
247     props.addSpecialSequence("{", separator);
248     props.addSpecialSequence("}", separator);
249     props.addSpecialSequence("(", separator);
250     props.addSpecialSequence(")", separator);
251     props.addSpecialSequence("[", separator);
252     props.addSpecialSequence("]", separator);
253     props.addSpecialSequence("=", separator);
254     props.addSpecialSequence(";", separator);
255     props.addSpecialSequence("-", separator);
256     tokenizer.setSource(reader);
257 
258     int index = 0;
259     while (tokenizer.hasMoreToken()) {
260       Token token = tokenizer.nextToken();
261       
262       if (token.getType() != Token.EOF) {
263         System.out.println(token);
264         assertTrue("Wrong token companion: " + token.getCompanion() + " for: " + token.getImage(), 
265                     token.getCompanion() == expected[index]);
266       }
267       index++;
268     }
269   }
270 
271   /**
272    * A tokenizer working almost completely with pattern and whitespaces
273    */
274   public void testFreePattern() throws Throwable   {
275     Reader   reader = new StringReader  (
276         "// the main method\n" 
277       + "void main(String[] argv)\n" 
278       + "{\n"
279       +   "int x1 =+1.01;\n"
280       +   "int x2 =- 2.02;\n"
281       +   "int x3 = + 2.02; // line comment\n"
282       +   "int x4 = - 2.02+ 3.1232;\n"
283       +   "fct(x1, x2, x3)// a call\n"
284       + "}\n");
285     
286     Object   lineComment  = new String  ("<line comment>");
287     Object   identifier   = new String  ("<identifier>");
288     Object   number       = new String  ("<number>");
289     Object   separator    = new String  ("<separator>");
290     Object   paraList     = new String  ("<parameter list>");
291     
292     Object  [] expected = { 
293       lineComment, 
294       identifier, identifier, paraList,
295       separator, 
296       identifier, identifier, separator, number, separator,
297       identifier, identifier, separator, number, separator,
298       identifier, identifier, separator, number, separator, lineComment,
299       identifier, identifier, separator, number, number, separator,
300       identifier, paraList, lineComment,
301       separator
302     };
303     
304     TokenizerProperties props     = new StandardTokenizerProperties();
305     StandardTokenizer   tokenizer = new StandardTokenizer(props);
306 
307     props.addPattern("[+\\-]?[ \t]*[0-9]+\\.?[0-9]*", number,      Flags.F_FREE_PATTERN);
308     props.addPattern("[a-z][a-z0-9]*",                identifier,  Flags.F_NO_CASE);
309     props.addPattern("//.*$",                         lineComment, Flags.F_FREE_PATTERN);
310     props.addPattern("\\(.*\\)",                      paraList,    Flags.F_FREE_PATTERN);
311     props.addSpecialSequence("{", separator);
312     props.addSpecialSequence("}", separator);
313     props.addSpecialSequence("(", separator);
314     props.addSpecialSequence(")", separator);
315     props.addSpecialSequence("[", separator);
316     props.addSpecialSequence("]", separator);
317     props.addSpecialSequence("=", separator);
318     props.addSpecialSequence(";", separator);
319     props.addSpecialSequence("-", separator);
320     tokenizer.setSource(reader);
321 
322     int index = 0;
323     while (tokenizer.hasMoreToken()) {
324       Token token = tokenizer.nextToken();
325       
326       if (token.getType() != Token.EOF) {
327         System.out.println(token);
328         assertTrue("Wrong token companion: " + token.getCompanion() + " for: " + token.getImage(), 
329                     token.getCompanion() == expected[index]);
330       }
331       index++;
332     }
333   }
334 
335   /**
336    * Comparing JTopas string and comment handling against regular expression
337    * handling
338    */
339   public void compareJTopasAgainstPattern() throws Throwable   {
340     String   data = 
341         "/* File: $FILENAME */\n"
342       + "/**\n"
343       + "* Starting with a Javadoc comment.\n" 
344       + "* This comment describes the class below.\n" 
345       + "*/\n"
346       + "public class TestClass {\n" 
347       + "  //-------------------------------------------------------------------\n" 
348       + "  // Constants\n" 
349       + "  //-------------------------------------------------------------------\n" 
350       + "\n" 
351       + "  /**\n" 
352       + "  * A constant with its own comment\n" 
353       + "  */\n" 
354       + "  public static final String URL = \"http://jtopas.sourceforge.net/jtopas/index.html\";\n"
355       + "\n" 
356       + "  /**\n" 
357       + "  * Another constant with its own comment\n" 
358       + "  */\n" 
359       + "  public static final String HELP =\n"
360       + "     \"This is the help for JTopas.\"\n"
361       + "   + \"You can obtain the software from \" + URL + \".\"\n"
362       + "   + \"It is a realy easy to use library.\";\n"
363       + "\n" 
364       + "  /**\n" 
365       + "  * The main method takes the usual array of arguments. It also accepts\n" 
366       + "  * <code>null</code>.\n" 
367       + "  *\n" 
368       + "  * @param args the arguments to the main method.\n" 
369       + "  */\n" 
370       + "  public void main(String[] argv)\n" 
371       + "  {\n"
372       + "     // a loop over all arguments\n"
373       + "     for (int ii = 0; ii < argv.length; ++ii) {\n"
374       + "       char cc1 = 'A';\n"
375       + "       char cc2 = 'B';\n"
376       // + "       char cc3 = '\\'';\n"
377       + "       System.out.println(\"String #\" + ii +\": \" + argv[ii] + \".\";\n"
378       + "     }\n"
379       + "     // ready message\n"
380       + "     System.out.println(\"Ready printing Strings.\";\n"
381       + "     /*\n"
382       + "       here we add future extensions:\n"
383       + "       for instance the exit call :-)\n"
384       + "     */\n"
385       + "  }\n"
386       + "}\n";
387     
388     Object   docComment   = new String  ("<doc comment>");
389     Object   blockComment = new String  ("<block comment>");
390     Object   lineComment  = new String  ("<line comment>");
391     Object   string       = new String  ("<string>");
392     Object   character    = new String  ("<character>");
393     
394     TokenizerProperties jtopasProps       = new StandardTokenizerProperties(Flags.F_RETURN_WHITESPACES | Flags.F_TOKEN_POS_ONLY);
395     TokenizerProperties patternProps      = new StandardTokenizerProperties(Flags.F_RETURN_WHITESPACES | Flags.F_TOKEN_POS_ONLY);
396 
397     // patternProps.addPattern("/\\*\\*.*\\*/",            docComment,   Flags.F_FREE_PATTERN);
398     patternProps.addPattern("/\\*.*?\\*/",                blockComment, Flags.F_FREE_PATTERN);
399     patternProps.addPattern("//.*?$",                     lineComment,  Flags.F_FREE_PATTERN);
400     patternProps.addPattern("\"[^$\"]*?\"",               string,       Flags.F_FREE_PATTERN);
401     // patternProps.addPattern("\"[[^$\"]|[\\\\&&\"]]*?\"", string,       Flags.F_FREE_PATTERN);
402     patternProps.addPattern("'.'",                        character,    Flags.F_FREE_PATTERN);
403     // patternProps.addPattern("'[[^']|[\\\\&&']]+?'",       character,    Flags.F_FREE_PATTERN);
404     
405     // jtopasProps.addBlockComment("/**", "*/",            docComment);
406     jtopasProps.addBlockComment("/*", "*/",             blockComment);
407     jtopasProps.addLineComment("//",                    lineComment);
408     jtopasProps.addString("\"", "\"", "\\",             string);
409     jtopasProps.addString("'", "'", "\\",               character);
410 
411     tokenize(jtopasProps, data);
412     tokenize(patternProps, data);
413   }
414 
415   
416   /**
417    * Tokenize with the given tokenizer.
418    */
419   private void tokenize(TokenizerProperties props, String   data) throws Throwable   {
420     long              startTime = System.currentTimeMillis();
421     StandardTokenizer tokenizer = new StandardTokenizer(props);
422     
423     try {
424       for (int ii = 0; ii < 100; ++ii) {
425         Reader   reader = new StringReader  (data);
426 
427         try {
428           tokenizer.setSource(reader);
429           while (tokenizer.hasMoreToken()) {
430             Token token = tokenizer.nextToken();
431             // System.out.println( + ": " + tokenizer.currentImage());
432           }
433         } finally {
434           reader.close();
435         }
436       }
437     } finally {
438       tokenizer.close();
439     }
440     
441     // print elapsed time
442     long diffTime = System.currentTimeMillis() - startTime;
443     System.out.println("  Finished after " + diffTime + " milliseconds.");
444   }
445 }  
446 
447
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags