KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > susebox > jtopas > TestStandardTokenizer


1 /*
2  * TestStandardTokenizer.java: JUnit test for the StandardTokenizer
3  *
4  * Copyright (C) 2002 Heiko Blau
5  *
6  * This file belongs to the JTopas test suite.
7  * The JTopas test suite is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU Lesser General Public License as published by the
9  * Free Software Foundation; either version 2.1 of the License, or (at your option)
10  * any later version.
11  *
12  * This software is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.
15  * See the GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License along
18  * with the JTopas test suite. If not, write to the
19  *
20  * Free Software Foundation, Inc.
21  * 59 Temple Place, Suite 330,
22  * Boston, MA 02111-1307
23  * USA
24  *
25  * or check the Internet: http://www.fsf.org
26  *
27  * The JTopas test suite uses the test framework JUnit by Kent Beck and Erich Gamma.
28  * You should have received a copy of their JUnit licence agreement along with
29  * the JTopas test suite.
30  *
31  * We do NOT provide the JUnit archive junit.jar nessecary to compile and run
32  * our tests, since we assume, that You either have it already or would like
33  * to get the current release Yourself.
34  * Please visit either:
35  * http://sourceforge.net/projects/junit
36  * or
37  * http://junit.org
38  * to obtain JUnit.
39  *
40  * Contact:
41  * email: heiko@susebox.de
42  */

43
44 package de.susebox.jtopas;
45
46 //-----------------------------------------------------------------------------
47
// Imports
48
//
49
import java.io.InputStream JavaDoc;
50 import java.io.FileInputStream JavaDoc;
51 import java.io.InputStreamReader JavaDoc;
52 import java.util.Vector JavaDoc;
53 import java.util.Properties JavaDoc;
54 import java.net.URL JavaDoc;
55
56 import junit.framework.Test;
57 import junit.framework.TestCase;
58 import junit.framework.TestSuite;
59 import junit.framework.Assert;
60
61 import de.susebox.java.lang.ExtRuntimeException;
62
63 import de.susebox.TestUtilities;
64
65
66 //-----------------------------------------------------------------------------
67
// Class TestStandardTokenizer
68
//
69

70 /**<p>
71  * This test suite works with a test configuration file. This file contains some
72  * sets of properties, each set for one or more different test runs.
73  *</p><p>
74  * The properties are defined as class constants. In the configuration file, a
75  * property consists of the property name and a number identifying the property
76  * set.
77  *</p>
78  *
79  * @see Tokenizer
80  * @see AbstractTokenizer
81  * @see java.io.InputStreamReader
82  * @author Heiko Blau
83  */

84 public class TestStandardTokenizer extends TestCase {
85   
86   //---------------------------------------------------------------------------
87
// properties
88
//
89

90   /**
91    * The name of the test configuration file. This file will be read by
92    * {@link java.lang.Class#getResourceAsStream}.
93    */

94   public static final String JavaDoc CONFIG_FILE = "TestStandardTokenizer.conf";
95   
96   /**
97    * Property for the tests {@link #testLinkParsing} and {@link #testContentsParsing}
98    */

99   public static final String JavaDoc PROP_PATH = "Path";
100   
101   /**
102    * Property for the test {@link #testLineCounting}.
103    */

104   public static final String JavaDoc PROP_COUNTLINES_PATH = "CountLinesPath";
105   
106   
107   //---------------------------------------------------------------------------
108
// main method
109
//
110

111   /**
112    * call this method to invoke the tests
113    */

114   public static void main(String JavaDoc[] args) {
115     String JavaDoc[] tests = { TestStandardTokenizer.class.getName() };
116
117     TestUtilities.run(tests, args);
118   }
119   
120
121   //---------------------------------------------------------------------------
122
// suite method
123
//
124

125   /**
126    * Implementation of the JUnit method <code>suite</code>. For each set of test
127    * properties one or more tests are instantiated.
128    *
129    * @return a test suite
130    */

131   public static Test suite() {
132     TestSuite suite = new TestSuite(TestStandardTokenizer.class.getName());
133     Properties JavaDoc props = new Properties JavaDoc();
134     int count = 1;
135     String JavaDoc path;
136     URL JavaDoc url;
137     
138     try {
139       props.load(TestStandardTokenizer.class.getResourceAsStream(CONFIG_FILE));
140     } catch (Exception JavaDoc ex) {
141       throw new ExtRuntimeException(ex);
142     }
143     
144     while ((path = props.getProperty(PROP_PATH + count)) != null) {
145       if ((url = TestStandardTokenizer.class.getResource(path)) != null) {
146         path = url.getFile();
147       }
148       suite.addTest(new TestStandardTokenizer("testLinkParsing", path));
149       suite.addTest(new TestStandardTokenizer("testContentsParsing", path));
150       suite.addTest(new TestStandardTokenizer("testContentsFormatting", path));
151       count++;
152     }
153     count = 1;
154     while ((path = props.getProperty(PROP_COUNTLINES_PATH + count)) != null) {
155       if ((url = TestStandardTokenizer.class.getResource(path)) != null) {
156         path = url.getFile();
157       }
158       suite.addTest(new TestStandardTokenizer("testLineCounting", path));
159       count++;
160     }
161     return suite;
162   }
163   
164   
165   //---------------------------------------------------------------------------
166
// Constructor
167
//
168

169   /**
170    * Default constructor. Standard input {@link java.lang.System#in} is used
171    * to construct the input stream reader.
172    */

173   public TestStandardTokenizer(String JavaDoc test, String JavaDoc path) {
174     super(test);
175     _path = path;
176   }
177
178   
179   //---------------------------------------------------------------------------
180
// Fixture setup and release
181
//
182

183     /**
184      * Sets up the fixture, for example, open a network connection.
185      * This method is called before a test is executed.
186      */

187   protected void setUp() throws Exception JavaDoc {
188     InputStream JavaDoc stream = new FileInputStream JavaDoc(_path);
189     
190     _reader = new InputStreamReader JavaDoc(stream);
191     }
192
193   
194     /**
195      * Tears down the fixture, for example, close a network connection.
196      * This method is called after a test is executed.
197      */

198     protected void tearDown() throws Exception JavaDoc {
199     _reader.close();
200     }
201   
202   //---------------------------------------------------------------------------
203
// test cases
204
//
205

206   public void testLinkParsing() throws Throwable JavaDoc {
207     long start = System.currentTimeMillis();
208     TokenizerProperties props = new StandardTokenizerProperties();
209     Tokenizer tokenizer = new StandardTokenizer(props);
210     Vector JavaDoc links = new Vector JavaDoc();
211     Token token;
212
213     try {
214       props.setParseFlags(Flags.F_NO_CASE);
215       props.setSeparators("=");
216       props.addString("\"", "\"", "\\");
217       props.addBlockComment(">", "<"); // overread everything outside of tags
218
props.addBlockComment("SCRIPT", "/SCRIPT"); // overread script parts
219
props.addBlockComment("!--", "--"); // overread HTML comments without < and >
220
props.addKeyword("HREF");
221       tokenizer.setSource(new ReaderSource(_reader));
222
223       System.out.println("\nStart looking for links in \"" + _path + "\"");
224       while (tokenizer.hasMoreToken()) {
225         token = tokenizer.nextToken();
226         if (token.getType() == Token.KEYWORD) {
227           tokenizer.nextToken(); // should be the '=' character
228
System.out.println(" " + tokenizer.nextImage());
229           assertTrue(tokenizer.currentImage() != null);
230           assertTrue(tokenizer.currentToken().getType() == Token.STRING);
231         }
232       }
233     } finally {
234       // Cleanup
235
tokenizer.close();
236     }
237
238     long diff = System.currentTimeMillis() - start;
239     System.out.println("Finished after " + diff + " milliseconds");
240   }
241   
242   
243   /**
244    * Extracting the pure contents of a HTML stream.
245    */

246   public void testContentsParsing() throws Throwable JavaDoc {
247     long start = System.currentTimeMillis();
248     TokenizerProperties props = new StandardTokenizerProperties();
249     Tokenizer tokenizer = new StandardTokenizer(props);
250
251     try {
252       tokenizer.setSource(new ReaderSource(_reader));
253       System.out.println("\nStart extracting contents in \"" + _path + "\"");
254
255       props.setParseFlags(Flags.F_NO_CASE | Flags.F_TOKEN_POS_ONLY);
256       props.setWhitespaces(null);
257       props.setSeparators(null);
258       props.addBlockComment("<", ">"); // overread HTML tags
259
props.addBlockComment("<HEAD>", "</HEAD>"); // overread HTML header
260
props.addBlockComment("<!--", "-->"); // overread HTML comments
261

262       while (tokenizer.hasMoreToken()) {
263         tokenizer.nextToken();
264         if (tokenizer.currentToken().getType() != Token.EOF) {
265           System.out.println(tokenizer.currentImage());
266           assertTrue("Method currentImage() returned null.", tokenizer.currentImage() != null);
267         }
268         assertTrue("Found token type " + tokenizer.currentToken().getType()
269                + ", expected NORMAL (" + Token.NORMAL + ") or EOF (" + Token.EOF + ").",
270                tokenizer.currentToken().getType() == Token.NORMAL
271                || tokenizer.currentToken().getType() == Token.EOF);
272       }
273     } finally {
274       tokenizer.close();
275     }
276     
277     long diff = System.currentTimeMillis() - start;
278     System.out.println("Finished after " + diff + " milliseconds");
279   }
280   
281   
282   /**
283    * Testing the line and column counting correctness. This is done by using a
284    * specially formatted file. At a line x and a column y, the method expects
285    * the token "x/y", e.g. "0/0" at the very beginning of the file.
286    */

287   public void testLineCounting() throws Throwable JavaDoc {
288     long start = System.currentTimeMillis();
289     TokenizerProperties props = new StandardTokenizerProperties();
290     Tokenizer tokenizer = new StandardTokenizer(props);
291     Token token;
292     String JavaDoc image;
293     int delPos;
294     int line;
295     int col;
296
297     System.out.println("\nStart counting lines in \"" + _path + "\"");
298     
299     try {
300       tokenizer.setSource(new ReaderSource(_reader));
301       props.setParseFlags(Flags.F_TOKEN_POS_ONLY | Flags.F_COUNT_LINES);
302       props.setWhitespaces(TokenizerProperties.DEFAULT_WHITESPACES);
303       props.setSeparators(TokenizerProperties.DEFAULT_SEPARATORS);
304       props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
305       props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
306
307       while (tokenizer.hasMoreToken()) {
308         token = tokenizer.nextToken();
309         switch (token.getType()) {
310         case Token.NORMAL:
311           image = tokenizer.currentImage();
312           line = Integer.parseInt(image);
313           assertTrue("Missing separator \"/\".", tokenizer.nextToken().getType() == Token.SEPARATOR && tokenizer.currentImage().equals("/"));
314           assertTrue("Missing column number", tokenizer.nextToken().getType() == Token.NORMAL);
315           image = tokenizer.currentImage();
316           col = Integer.parseInt(image);
317           assertTrue("Found line number " + token.getStartLine() + " does not match expected line number " + line,
318                     line == token.getStartLine());
319           assertTrue("Found column number " + token.getStartColumn() + " does not match expected column number " + col,
320                     col == token.getStartColumn());
321           assertTrue("Found token length " + tokenizer.currentToken().getLength() + " does not match expected length " + image.length(),
322                     image.length() == tokenizer.currentToken().getLength());
323           break;
324         }
325       }
326     } finally {
327       tokenizer.close();
328     }
329     
330     long diff = System.currentTimeMillis() - start;
331     System.out.println("Finished after " + diff + " milliseconds");
332   }
333   
334
335   /**
336    * Advanced contents extracting. Lines will be around 80 characters, a basic
337    * paragraph recognition takes place.
338    */

339   public void testContentsFormatting() throws Throwable JavaDoc{
340     long start = System.currentTimeMillis();
341     TokenizerProperties props = new StandardTokenizerProperties();
342     Tokenizer tokenizer = new StandardTokenizer(props);
343     Token token;
344     String JavaDoc image;
345     int len;
346     Object JavaDoc startPRE = new Object JavaDoc();
347     Object JavaDoc endPRE = new Object JavaDoc();
348     int inPRE = 0;
349     
350     // Counter for expected parts
351
int wsCount = 0;
352     int normalCount = 0;
353     int specCount = 0;
354     int commentCount = 0;
355
356     System.out.println("\nStart formatting contents in \"" + _path + "\"");
357
358     try {
359       tokenizer.setSource(new ReaderSource(_reader));
360       props.setParseFlags( Flags.F_NO_CASE
361                          | Flags.F_TOKEN_POS_ONLY
362                          | Flags.F_RETURN_WHITESPACES);
363       props.setSeparators(null);
364       props.addBlockComment("<", ">");
365       props.addBlockComment("<HEAD>", "</HEAD>");
366       props.addBlockComment("<!--", "-->");
367       props.addSpecialSequence("<b>", "");
368       props.addSpecialSequence("</b>", "");
369       props.addSpecialSequence("<i>", "");
370       props.addSpecialSequence("</i>", "");
371       props.addSpecialSequence("<code>", "");
372       props.addSpecialSequence("</code>", "");
373       props.addSpecialSequence("<pre>", startPRE);
374       props.addSpecialSequence("</pre>", endPRE);
375       props.addSpecialSequence("&auml;", "\u00E4", 0, Flags.F_NO_CASE);
376       props.addSpecialSequence("&ouml;", "\u00F6", 0, Flags.F_NO_CASE);
377       props.addSpecialSequence("&uuml;", "\u00FC", 0, Flags.F_NO_CASE);
378       props.addSpecialSequence("&szlig;", "\u00DF", 0, Flags.F_NO_CASE);
379       props.addSpecialSequence("&Auml;", "\u00C4", 0, Flags.F_NO_CASE);
380       props.addSpecialSequence("&Ouml;", "\u00D6", 0, Flags.F_NO_CASE);
381       props.addSpecialSequence("&Uuml;", "\u00DC", 0, Flags.F_NO_CASE);
382       props.addSpecialSequence("&nbsp;", " ", 0, Flags.F_NO_CASE);
383       props.addSpecialSequence("&gt;", ">", 0, Flags.F_NO_CASE);
384       props.addSpecialSequence("&lt;", "<", 0, Flags.F_NO_CASE);
385       props.addSpecialSequence("&copy;", "\u00A9");
386       props.addSpecialSequence("&euro;", "\u20AC");
387
388       len = 0;
389       while (tokenizer.hasMoreToken()) {
390         token = tokenizer.nextToken();
391         switch (token.getType()) {
392         case Token.NORMAL:
393           image = tokenizer.currentImage();
394           assertTrue("Found HTML tag in normal token: " + image, image.indexOf('<') < 0);
395           System.out.print(image);
396           if (inPRE <= 0) {
397             len += token.getLength();
398           }
399           normalCount++;
400           break;
401
402         case Token.SPECIAL_SEQUENCE:
403           image = tokenizer.currentImage();
404           assertTrue("Couldn't find special sequence in properties: " + image, props.specialSequenceExists(image));
405           if (token.getCompanion() == startPRE) {
406             System.out.println();
407             len = 0;
408             inPRE++;
409           } else if (token.getCompanion() == endPRE) {
410             System.out.println();
411             len = 0;
412             inPRE--;
413           } else {
414             System.out.print((String JavaDoc)token.getCompanion());
415           }
416           specCount++;
417           break;
418
419         case Token.BLOCK_COMMENT:
420           if (len > 0) {
421             System.out.println();
422             len = 0;
423           }
424           commentCount++;
425           break;
426
427         case Token.WHITESPACE:
428           if (inPRE > 0) {
429             System.out.print(tokenizer.currentImage());
430           } else if (len > 75) {
431             System.out.println();
432             len = 0;
433           } else if (len > 0) {
434             System.out.print(' ');
435             len++;
436           }
437           wsCount++;
438           break;
439         }
440       }
441
442       // Where should have been something of each categorie
443
assertTrue("Not one simple context part was found in file " + _path + ".", normalCount > 0);
444       assertTrue("No HTML tag found " + _path + ".", commentCount > 0);
445       assertTrue("No whitespaces found " + _path + ".", wsCount > 0);
446       
447     } finally {
448       // cleanup
449
tokenizer.close();
450     }
451
452     // Ready
453
long diff = System.currentTimeMillis() - start;
454     System.out.println("Finished after " + diff + " milliseconds");
455   }
456   
457   //---------------------------------------------------------------------------
458
// Members
459
//
460
protected InputStreamReader JavaDoc _reader = null;
461   protected String JavaDoc _path = null;
462 }
463
Popular Tags