KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > susebox > jtopas > TestPatternMatching


1 /*
2  * TestPatternMatching.java: JUnit test for regular expression tokenizing
3  *
4  * Copyright (C) 2003 Heiko Blau
5  *
6  * This file belongs to the JTopas test suite.
7  * The JTopas test suite is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU Lesser General Public License as published by the
9  * Free Software Foundation; either version 2.1 of the License, or (at your option)
10  * any later version.
11  *
12  * This software is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.
15  * See the GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License along
18  * with the JTopas test suite. If not, write to the
19  *
20  * Free Software Foundation, Inc.
21  * 59 Temple Place, Suite 330,
22  * Boston, MA 02111-1307
23  * USA
24  *
25  * or check the Internet: http://www.fsf.org
26  *
27  * The JTopas test suite uses the test framework JUnit by Kent Beck and Erich Gamma.
28  * You should have received a copy of their JUnit licence agreement along with
29  * the JTopas test suite.
30  *
31  * We do NOT provide the JUnit archive junit.jar nessecary to compile and run
32  * our tests, since we assume, that You either have it already or would like
33  * to get the current release Yourself.
34  * Please visit either:
35  * http://sourceforge.net/projects/junit
36  * or
37  * http://junit.org
38  * to obtain JUnit.
39  *
40  * Contact:
41  * email: heiko@susebox.de
42  */

43
44 package de.susebox.jtopas;
45
46 //-----------------------------------------------------------------------------
47
// Imports
48
//
49
import java.io.Reader JavaDoc;
50 import java.io.StringReader JavaDoc;
51
52 import junit.framework.Test;
53 import junit.framework.TestCase;
54 import junit.framework.TestSuite;
55 import junit.framework.Assert;
56
57 import de.susebox.TestUtilities;
58
59
60 //-----------------------------------------------------------------------------
61
// Class TestPatternMatching
62
//
63

64 /**<p>
65  * The class contains a number of test cases related to the pattern matching
66  * facility in a {@link Tokenizer}.
67  *</p>
68  *
69  * @see Tokenizer
70  * @see StandardTokenizer
71  * @see StandardTokenizerProperties
72  * @author Heiko Blau
73  */

74 public class TestPatternMatching extends TestCase {
75   
76   //---------------------------------------------------------------------------
77
// properties
78
//
79

80   
81   //---------------------------------------------------------------------------
82
// main method
83
//
84

85   /**
86    * call this method to invoke the tests
87    */

88   public static void main(String JavaDoc[] args) {
89     String JavaDoc[] tests = { TestPatternMatching.class.getName() };
90
91     TestUtilities.run(tests, args);
92   }
93   
94
95   //---------------------------------------------------------------------------
96
// suite method
97
//
98

99   /**
100    * Implementation of the JUnit method <code>suite</code>. For each set of test
101    * properties one or more tests are instantiated.
102    *
103    * @return a test suite
104    */

105   public static Test suite() {
106     TestSuite suite = new TestSuite(TestPatternMatching.class.getName());
107     
108     suite.addTest(new TestPatternMatching("compareJTopasAgainstPattern"));
109     suite.addTest(new TestPatternMatching("testNumberPattern"));
110     suite.addTest(new TestPatternMatching("testPatternExt"));
111     suite.addTest(new TestPatternMatching("testPatternTokenizer"));
112     suite.addTest(new TestPatternMatching("testFreePattern"));
113     return suite;
114   }
115   
116   
117   //---------------------------------------------------------------------------
118
// Constructor
119
//
120

121   /**
122    * Default constructor. Standard input {@link java.lang.System#in} is used
123    * to construct the input stream reader.
124    */

125   public TestPatternMatching(String JavaDoc test) {
126     super(test);
127   }
128
129   
130   //---------------------------------------------------------------------------
131
// Fixture setup and release
132
//
133

134   /**
135    * Sets up the fixture, for example, open a network connection.
136    * This method is called before a test is executed.
137    */

138   protected void setUp() throws Exception JavaDoc {}
139
140   
141   /**
142    * Tears down the fixture, for example, close a network connection.
143    * This method is called after a test is executed.
144    */

145   protected void tearDown() throws Exception JavaDoc {}
146   
147   
148   //---------------------------------------------------------------------------
149
// test cases
150
//
151

152   /**
153    * Test the case, when a line comment is not terminated by a newline character.
154    * This happens when the last line of a file is a line comment without a
155    * newline on its end.
156    * This is a rather common situation.
157    */

158   public void testNumberPattern() throws Throwable JavaDoc {
159     Reader JavaDoc reader = new StringReader JavaDoc("0.95 123 -1 -123123.92382 0,32 +4,001 -123,213 -0");
160     TokenizerProperties props = new StandardTokenizerProperties();
161     StandardTokenizer tokenizer = new StandardTokenizer(props);
162
163     props.removeSeparators(".\\-+,");
164     props.addPattern("[+\\-]?[0-9]+\\.?[0-9]*");
165     props.addPattern("[+\\-]?[0-9]+,?[0-9]*");
166     tokenizer.setSource(reader);
167
168     while (tokenizer.hasMoreToken()) {
169       Token token = tokenizer.nextToken();
170       
171       System.out.println(token);
172       if (token.getType() != Token.EOF) {
173         assertTrue("Wrong token type " + Token.getTypeName(token.getType()) + " for: " + token.getImage(),
174                     token.getType() == Token.PATTERN);
175       }
176     }
177   }
178
179   /**
180    * Test the case, when a line comment is not terminated by a newline character.
181    * This happens when the last line of a file is a line comment without a
182    * newline on its end.
183    * This is a rather common situation.
184    */

185   public void testPatternExt() throws Throwable JavaDoc {
186     Reader JavaDoc reader = new StringReader JavaDoc("// the main method\nvoid main100(100)\n{ int x1 = 0.95; int x2 = -1; int x3 = 3.0 -1.0; }");
187     int[] expected = { Token.NORMAL, Token.NORMAL, Token.SEPARATOR, Token.PATTERN, Token.SEPARATOR, Token.SEPARATOR,
188                                       Token.NORMAL, Token.NORMAL, Token.SEPARATOR, Token.PATTERN, Token.SEPARATOR,
189                                       Token.NORMAL, Token.NORMAL, Token.SEPARATOR, Token.PATTERN, Token.SEPARATOR,
190                                       Token.NORMAL, Token.NORMAL, Token.SEPARATOR, Token.PATTERN, Token.PATTERN, Token.SEPARATOR,
191                                       Token.SEPARATOR, Token.EOF };
192     TokenizerProperties props = new StandardTokenizerProperties();
193     StandardTokenizer tokenizer = new StandardTokenizer(props);
194
195     props.removeSeparators(".\\-");
196     props.addLineComment("//");
197     props.addPattern("[+\\-]?[0-9]+\\.?[0-9]*");
198     tokenizer.setSource(reader);
199
200     int index = 0;
201     while (tokenizer.hasMoreToken()) {
202       Token token = tokenizer.nextToken();
203       
204       System.out.println(token);
205       assertTrue("Wrong token type " + Token.getTypeName(token.getType()) + " for: " + token.getImage(),
206                   token.getType() == expected[index]);
207       index++;
208     }
209   }
210
211   /**
212    * A tokenizer working almost completely with pattern and whitespaces
213    */

214   public void testPatternTokenizer() throws Throwable JavaDoc {
215     Reader JavaDoc reader = new StringReader JavaDoc(
216         "// the main method\n"
217       + "void main(String[] argv)\n"
218       + "{\n"
219       + "int x1 = 0.95; // 1. variable\n"
220       + "int x2 = -1; // 2. variable\n"
221       + "int x3 = x1 - x2; // 3. variable\n"
222       + "int x4 = +1.01; // 4. variable\n"
223       + "}\n");
224     
225     Object JavaDoc lineComment = new String JavaDoc("<line comment>");
226     Object JavaDoc identifier = new String JavaDoc("<identifier>");
227     Object JavaDoc number = new String JavaDoc("<number>");
228     Object JavaDoc separator = new String JavaDoc("<separator>");
229     
230     Object JavaDoc[] expected = {
231       lineComment,
232       identifier, identifier, separator, identifier, separator, separator, identifier, separator,
233       separator,
234       identifier, identifier, separator, number, separator, lineComment,
235       identifier, identifier, separator, number, separator, lineComment,
236       identifier, identifier, separator, identifier, separator, identifier, separator, lineComment,
237       identifier, identifier, separator, number, separator, lineComment,
238       separator
239     };
240     
241     TokenizerProperties props = new StandardTokenizerProperties();
242     StandardTokenizer tokenizer = new StandardTokenizer(props);
243
244     props.addPattern("[+\\-]?[0-9]+\\.?[0-9]*", number);
245     props.addPattern("[a-z][a-z0-9]*", identifier, Flags.F_NO_CASE);
246     props.addPattern("//.*$", lineComment, Flags.F_FREE_PATTERN);
247     props.addSpecialSequence("{", separator);
248     props.addSpecialSequence("}", separator);
249     props.addSpecialSequence("(", separator);
250     props.addSpecialSequence(")", separator);
251     props.addSpecialSequence("[", separator);
252     props.addSpecialSequence("]", separator);
253     props.addSpecialSequence("=", separator);
254     props.addSpecialSequence(";", separator);
255     props.addSpecialSequence("-", separator);
256     tokenizer.setSource(reader);
257
258     int index = 0;
259     while (tokenizer.hasMoreToken()) {
260       Token token = tokenizer.nextToken();
261       
262       if (token.getType() != Token.EOF) {
263         System.out.println(token);
264         assertTrue("Wrong token companion: " + token.getCompanion() + " for: " + token.getImage(),
265                     token.getCompanion() == expected[index]);
266       }
267       index++;
268     }
269   }
270
271   /**
272    * A tokenizer working almost completely with pattern and whitespaces
273    */

274   public void testFreePattern() throws Throwable JavaDoc {
275     Reader JavaDoc reader = new StringReader JavaDoc(
276         "// the main method\n"
277       + "void main(String[] argv)\n"
278       + "{\n"
279       + "int x1 =+1.01;\n"
280       + "int x2 =- 2.02;\n"
281       + "int x3 = + 2.02; // line comment\n"
282       + "int x4 = - 2.02+ 3.1232;\n"
283       + "fct(x1, x2, x3)// a call\n"
284       + "}\n");
285     
286     Object JavaDoc lineComment = new String JavaDoc("<line comment>");
287     Object JavaDoc identifier = new String JavaDoc("<identifier>");
288     Object JavaDoc number = new String JavaDoc("<number>");
289     Object JavaDoc separator = new String JavaDoc("<separator>");
290     Object JavaDoc paraList = new String JavaDoc("<parameter list>");
291     
292     Object JavaDoc[] expected = {
293       lineComment,
294       identifier, identifier, paraList,
295       separator,
296       identifier, identifier, separator, number, separator,
297       identifier, identifier, separator, number, separator,
298       identifier, identifier, separator, number, separator, lineComment,
299       identifier, identifier, separator, number, number, separator,
300       identifier, paraList, lineComment,
301       separator
302     };
303     
304     TokenizerProperties props = new StandardTokenizerProperties();
305     StandardTokenizer tokenizer = new StandardTokenizer(props);
306
307     props.addPattern("[+\\-]?[ \t]*[0-9]+\\.?[0-9]*", number, Flags.F_FREE_PATTERN);
308     props.addPattern("[a-z][a-z0-9]*", identifier, Flags.F_NO_CASE);
309     props.addPattern("//.*$", lineComment, Flags.F_FREE_PATTERN);
310     props.addPattern("\\(.*\\)", paraList, Flags.F_FREE_PATTERN);
311     props.addSpecialSequence("{", separator);
312     props.addSpecialSequence("}", separator);
313     props.addSpecialSequence("(", separator);
314     props.addSpecialSequence(")", separator);
315     props.addSpecialSequence("[", separator);
316     props.addSpecialSequence("]", separator);
317     props.addSpecialSequence("=", separator);
318     props.addSpecialSequence(";", separator);
319     props.addSpecialSequence("-", separator);
320     tokenizer.setSource(reader);
321
322     int index = 0;
323     while (tokenizer.hasMoreToken()) {
324       Token token = tokenizer.nextToken();
325       
326       if (token.getType() != Token.EOF) {
327         System.out.println(token);
328         assertTrue("Wrong token companion: " + token.getCompanion() + " for: " + token.getImage(),
329                     token.getCompanion() == expected[index]);
330       }
331       index++;
332     }
333   }
334
335   /**
336    * Comparing JTopas string and comment handling against regular expression
337    * handling
338    */

339   public void compareJTopasAgainstPattern() throws Throwable JavaDoc {
340     String JavaDoc data =
341         "/* File: $FILENAME */\n"
342       + "/**\n"
343       + "* Starting with a Javadoc comment.\n"
344       + "* This comment describes the class below.\n"
345       + "*/\n"
346       + "public class TestClass {\n"
347       + " //-------------------------------------------------------------------\n"
348       + " // Constants\n"
349       + " //-------------------------------------------------------------------\n"
350       + "\n"
351       + " /**\n"
352       + " * A constant with its own comment\n"
353       + " */\n"
354       + " public static final String URL = \"http://jtopas.sourceforge.net/jtopas/index.html\";\n"
355       + "\n"
356       + " /**\n"
357       + " * Another constant with its own comment\n"
358       + " */\n"
359       + " public static final String HELP =\n"
360       + " \"This is the help for JTopas.\"\n"
361       + " + \"You can obtain the software from \" + URL + \".\"\n"
362       + " + \"It is a realy easy to use library.\";\n"
363       + "\n"
364       + " /**\n"
365       + " * The main method takes the usual array of arguments. It also accepts\n"
366       + " * <code>null</code>.\n"
367       + " *\n"
368       + " * @param args the arguments to the main method.\n"
369       + " */\n"
370       + " public void main(String[] argv)\n"
371       + " {\n"
372       + " // a loop over all arguments\n"
373       + " for (int ii = 0; ii < argv.length; ++ii) {\n"
374       + " char cc1 = 'A';\n"
375       + " char cc2 = 'B';\n"
376       // + " char cc3 = '\\'';\n"
377
+ " System.out.println(\"String #\" + ii +\": \" + argv[ii] + \".\";\n"
378       + " }\n"
379       + " // ready message\n"
380       + " System.out.println(\"Ready printing Strings.\";\n"
381       + " /*\n"
382       + " here we add future extensions:\n"
383       + " for instance the exit call :-)\n"
384       + " */\n"
385       + " }\n"
386       + "}\n";
387     
388     Object JavaDoc docComment = new String JavaDoc("<doc comment>");
389     Object JavaDoc blockComment = new String JavaDoc("<block comment>");
390     Object JavaDoc lineComment = new String JavaDoc("<line comment>");
391     Object JavaDoc string = new String JavaDoc("<string>");
392     Object JavaDoc character = new String JavaDoc("<character>");
393     
394     TokenizerProperties jtopasProps = new StandardTokenizerProperties(Flags.F_RETURN_WHITESPACES | Flags.F_TOKEN_POS_ONLY);
395     TokenizerProperties patternProps = new StandardTokenizerProperties(Flags.F_RETURN_WHITESPACES | Flags.F_TOKEN_POS_ONLY);
396
397     // patternProps.addPattern("/\\*\\*.*\\*/", docComment, Flags.F_FREE_PATTERN);
398
patternProps.addPattern("/\\*.*?\\*/", blockComment, Flags.F_FREE_PATTERN);
399     patternProps.addPattern("//.*?$", lineComment, Flags.F_FREE_PATTERN);
400     patternProps.addPattern("\"[^$\"]*?\"", string, Flags.F_FREE_PATTERN);
401     // patternProps.addPattern("\"[[^$\"]|[\\\\&&\"]]*?\"", string, Flags.F_FREE_PATTERN);
402
patternProps.addPattern("'.'", character, Flags.F_FREE_PATTERN);
403     // patternProps.addPattern("'[[^']|[\\\\&&']]+?'", character, Flags.F_FREE_PATTERN);
404

405     // jtopasProps.addBlockComment("/**", "*/", docComment);
406
jtopasProps.addBlockComment("/*", "*/", blockComment);
407     jtopasProps.addLineComment("//", lineComment);
408     jtopasProps.addString("\"", "\"", "\\", string);
409     jtopasProps.addString("'", "'", "\\", character);
410
411     tokenize(jtopasProps, data);
412     tokenize(patternProps, data);
413   }
414
415   
416   /**
417    * Tokenize with the given tokenizer.
418    */

419   private void tokenize(TokenizerProperties props, String JavaDoc data) throws Throwable JavaDoc {
420     long startTime = System.currentTimeMillis();
421     StandardTokenizer tokenizer = new StandardTokenizer(props);
422     
423     try {
424       for (int ii = 0; ii < 100; ++ii) {
425         Reader JavaDoc reader = new StringReader JavaDoc(data);
426
427         try {
428           tokenizer.setSource(reader);
429           while (tokenizer.hasMoreToken()) {
430             Token token = tokenizer.nextToken();
431             // System.out.println( + ": " + tokenizer.currentImage());
432
}
433         } finally {
434           reader.close();
435         }
436       }
437     } finally {
438       tokenizer.close();
439     }
440     
441     // print elapsed time
442
long diffTime = System.currentTimeMillis() - startTime;
443     System.out.println(" Finished after " + diffTime + " milliseconds.");
444   }
445 }
446
447
Popular Tags