TestEmbeddedTokenizer


1   /*
2    * TestEmbeddedTokenizer.java: JUnit test for the StandardTokenizer
3    *
4    * Copyright (C) 2001 Heiko Blau
5    *
6    * This file belongs to the Susebox Java core test suite.
7    * The Susebox Java core test suite is free software; you can redistribute it 
8    * and/or modify it under the terms of the GNU Lesser General Public License as 
9    * published by the Free Software Foundation; either version 2.1 of the License, 
10   * or (at your option) any later version.
11   *
12   * This software is distributed in the hope that it will be useful, but WITHOUT
13   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
14   * FITNESS FOR A PARTICULAR PURPOSE. 
15   * See the GNU Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public License along 
18   * with the Susebox Java core test suite. If not, write to the
19   *
20   *   Free Software Foundation, Inc.
21   *   59 Temple Place, Suite 330, 
22   *   Boston, MA 02111-1307 
23   *   USA
24   *
25   * or check the Internet: http://www.fsf.org
26   *
27   * The Susebox Java core test suite uses the test framework JUnit by Kent Beck 
28   * and Erich Gamma. You should have received a copy of their JUnit licence 
29   * agreement along with the Susebox Java test suite.
30   *
31   * We do NOT provide the JUnit archive junit.jar nessecary to compile and run 
32   * our tests, since we assume, that You  either have it already or would like 
33   * to get the current release Yourself. 
34   * Please visit either:
35   *   http://sourceforge.net/projects/junit
36   * or
37   *   http://junit.org
38   * to obtain JUnit.
39   *
40   * Contact:
41   *   email: heiko@susebox.de 
42   */
43  
44  package de.susebox.jtopas;
45  
46  //-----------------------------------------------------------------------------
47  // Imports
48  //
49  import java.io.InputStream  ;
50  import java.io.FileInputStream  ;
51  import java.io.InputStreamReader  ;
52  import java.util.Vector  ;
53  import java.util.Properties  ;
54  import java.net.URL  ;
55  
56  import junit.framework.Test;
57  import junit.framework.TestCase;
58  import junit.framework.TestSuite;
59  import junit.framework.Assert;
60  
61  import de.susebox.java.lang.ExtRuntimeException;
62  
63  import de.susebox.TestUtilities;
64  
65  
66  //-----------------------------------------------------------------------------
67  // Class TestEmbeddedTokenizer
68  //
69  
70  /**<p>
71   * This unit test checks the embedded-tokenizer feature of the class {@link StandardTokenizer}.
72   * With this technique it is possible to parse multipart documents like HTML with
73   * embedded CSS and script parts, Java and javadoc comments etc.
74   *</p><p>
75   * This test suite works with a test configuration file. This file contains some
76   * sets of properties, each set for one or more different test runs.
77   *</p><p>
78   * The properties are defined as class constants. In the configuration file, a 
79   * property consists of the property name and a number identifying the property
80   * set. 
81   *</p>
82   *
83   * @see     StandardTokenizer
84   * @author  Heiko Blau
85   */
86  public class TestEmbeddedTokenizer extends TestCase {
87    
88    //---------------------------------------------------------------------------
89    // properties
90    //
91  
92    /**
93     * The name of the test configuration file. This file will be read by 
94     * {@link java.lang.Class#getResourceAsStream}.
95     */
96    public static final String   CONFIG_FILE = "TestEmbeddedTokenizer.conf";
97    
98    /**
99     * Property for the test {@link #testEmbeddedTokenizer}
100    */
101   public static final String   PROP_PATH = "Path";
102   
103   /**
104    * Property for the test {@link #testJavaTokenizer}
105    */
106   public static final String   PROP_JAVAPATH = "JavaPath";
107   
108   
109   //---------------------------------------------------------------------------
110   // main method
111   //
112   
113   /**
114    * call this method to invoke the tests.
115    *
116    * @param args  unused
117    */
118   public static void main(String  [] args) {
119     String  []   tests = { TestEmbeddedTokenizer.class.getName() };
120 
121     TestUtilities.run(tests, args);
122   }
123   
124 
125   //---------------------------------------------------------------------------
126   // suite method
127   //
128   
129   /**
130    * Implementation of the JUnit method <code>suite</code>. For each set of test
131    * properties one or more tests are instantiated.
132    *
133    * @return a test suite
134    */
135   public static Test suite() {
136     TestSuite   suite = new TestSuite(TestEmbeddedTokenizer.class.getName());
137     Properties    props = new Properties  ();
138     int         count = 1;
139     String        path;
140     URL           url;
141     
142     try {
143       props.load(TestEmbeddedTokenizer.class.getResourceAsStream(CONFIG_FILE));
144     } catch (Exception   ex) {
145       throw new ExtRuntimeException(ex);
146     }
147 
148     // test on HTML files
149     while ((path = props.getProperty(PROP_PATH + count)) != null) {
150       if ((url = TestEmbeddedTokenizer.class.getResource(path)) != null) {
151         path = url.getFile();
152       }
153       suite.addTest(new TestEmbeddedTokenizer("testEmbeddedTokenizer", path));
154       count++;
155     }
156 
157     // tests on Java files
158     count = 1;
159     while ((path = props.getProperty(PROP_JAVAPATH + count)) != null) {
160       if ((url = TestEmbeddedTokenizer.class.getResource(path)) != null) {
161         path = url.getFile();
162       }
163       suite.addTest(new TestEmbeddedTokenizer("testJavaTokenizer", path));
164       count++;
165     }
166     return suite;
167   }
168   
169   
170   //---------------------------------------------------------------------------
171   // Constructor
172   //
173   
174   /**
175    * Initializing the instance with the test file path
176    *
177    * @param test  which test method should be invoked
178    * @param path  name of test configuration file  
179    */  
180   public TestEmbeddedTokenizer(String   test, String   path) {
181     super(test);
182     _path = path;
183   }
184 
185   
186   //---------------------------------------------------------------------------
187   // Fixture setup and release
188   //
189   
190   /**
191    * Sets up the fixture, for example, open a network connection.
192    * This method is called before a test is executed.
193    *
194    * @throws Exception for anything that might go wrong
195    */
196   protected void setUp() throws Exception   {
197     InputStream    stream = new FileInputStream  (_path);
198     
199     _reader = new InputStreamReader  (stream);
200   }
201 
202   
203   /**
204    * Tears down the fixture, for example, close a network connection.
205    * This method is called after a test is executed.
206    *
207    * @throws Exception for anything that might go wrong
208    */
209   protected void tearDown() throws Exception   {
210     _reader.close();
211   }
212   
213   //---------------------------------------------------------------------------
214   // test cases
215   //
216   
217   
218   /**
219    * This method reads the given stream as a Java source. It extracts javadoc
220    * comments and source code.
221    * There should be a class or interface name in every Java source. The opening
222    * and closing brackets should match etc.
223    *
224    * @throws Throwable   for anything that might go wrong
225    * @see   #testEmbeddedTokenizer
226    */
227   public void testJavaTokenizer() throws Throwable   {
228     long                        start         = System.currentTimeMillis();
229     StandardTokenizerProperties javaProps     = new StandardTokenizerProperties();
230     StandardTokenizerProperties docProps      = new StandardTokenizerProperties();
231     StandardTokenizer           javaTokenizer = new StandardTokenizer(javaProps);
232     StandardTokenizer           docTokenizer  = new StandardTokenizer(docProps);
233     StandardTokenizer           currTokenizer = javaTokenizer;
234     Object                        openBlock     = new Object  ();
235     Object                        closeBlock    = new Object  ();
236     Object                        atSign        = new Object  ();
237     int                         blockBalance  = 0;
238     Token                       token;
239     int                         lastStartLineNo = -1;
240     int                         lastStartColNo  = -1;
241 
242     javaProps.setParseFlags(Flags.F_TOKEN_POS_ONLY | Flags.F_KEEP_DATA | Flags.F_COUNT_LINES);
243     docProps.setParseFlags(Flags.F_NO_CASE);
244     
245     javaProps.addSpecialSequence("/**", docTokenizer);
246     javaProps.addSpecialSequence("{", openBlock);
247     javaProps.addSpecialSequence("}", closeBlock);
248     javaProps.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
249     javaProps.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
250     javaProps.addString(TokenizerProperties.DEFAULT_STRING_START, TokenizerProperties.DEFAULT_STRING_END, TokenizerProperties.DEFAULT_STRING_ESCAPE);
251     javaProps.addString("'", "'", "\\");
252     docProps.addSpecialSequence("*/", javaTokenizer);
253     docProps.addSpecialSequence("@", atSign);
254     docProps.addKeyword("param");
255     docProps.addKeyword("return");
256     docProps.addKeyword("throws");
257     docProps.addKeyword("author");
258     docProps.addKeyword("version");
259     docProps.addKeyword("link");
260     docProps.addKeyword("see");
261     docProps.addKeyword("deprecated");
262     
263     javaTokenizer.setSource(_reader);
264     javaTokenizer.addTokenizer(docTokenizer);
265 
266     System.out.println("\nStart parsing \"" + _path + "\"");
267     while (currTokenizer.hasMoreToken()) {
268       token = currTokenizer.nextToken();
269       
270       // Line counting test
271       assertTrue(token.getStartLine() >= lastStartLineNo);
272       if (token.getStartLine() == lastStartLineNo) {
273         assertTrue(token.getStartColumn() >= lastStartColNo);
274         if (token.getEndLine() == lastStartLineNo) {
275           assertTrue(token.getEndColumn() == token.getStartColumn() + token.getLength());
276         }
277       }
278       lastStartLineNo = token.getStartLine();
279       lastStartColNo  = token.getStartColumn();
280       
281       // tokenizer switching
282       switch (token.getType()) {
283         case Token.SPECIAL_SEQUENCE:
284           if (token.getCompanion() instanceof StandardTokenizer) {
285             StandardTokenizer tokenizer = (StandardTokenizer)token.getCompanion();
286             
287             currTokenizer.switchTo(tokenizer);
288             currTokenizer = tokenizer;
289           } else if (token.getCompanion() == openBlock) {
290             blockBalance++;
291           } else if (token.getCompanion() == closeBlock) {
292             blockBalance--;
293           } else if (token.getCompanion() == atSign) {
294             token = currTokenizer.nextToken();
295             assertTrue("Expected keyword after @ sign in javadoc comment, but found \"" + currTokenizer.currentImage(),
296                       token.getType() == Token.KEYWORD);
297           }
298           break;
299       }
300     }
301     
302     // some checks
303     assertTrue("Braces should be balanced in Java file \"" 
304               + _path + "\", but detected inbalance " + blockBalance,
305               blockBalance == 0);
306 
307     // print elapsed time
308     long diff = System.currentTimeMillis() - start;
309     System.out.println("Finished after " + diff + " milliseconds");
310   }
311     
312 
313   
314   /**
315    * The method takes the HTML file given in the constructor, and parses with
316    * the main HTML tokenizer and two embedded tokenizers for JavaScript and
317    * CSS.
318    *
319    * @throws Throwable   for anything that might go wrong
320    * @see   #testEmbeddedTokenizer
321    */
322   public void testEmbeddedTokenizer() throws Throwable   {
323     long                        start         = System.currentTimeMillis();
324     StandardTokenizerProperties htmlProps     = new StandardTokenizerProperties();
325     StandardTokenizerProperties jsProps       = new StandardTokenizerProperties();
326     StandardTokenizerProperties cssProps      = new StandardTokenizerProperties();
327     StandardTokenizer           htmlTokenizer = new StandardTokenizer(htmlProps);
328     StandardTokenizer           jsTokenizer   = new StandardTokenizer(jsProps);
329     StandardTokenizer           cssTokenizer  = new StandardTokenizer(cssProps);
330     String                        keywordLang   = new String  ("LANGUAGE");
331     Object                        endOfEmbedded = new Object  ();
332     Object                        startOfTag    = new Object  ();
333     Object                        endOfTag      = new Object  ();
334     Object                        endOfScript   = new Object  ();
335     Token                       token;
336     int                         lastStartLineNo = -1;
337     int                         lastStartColNo  = -1;
338 
339     htmlProps.setParseFlags(Flags.F_TOKEN_POS_ONLY 
340                           | Flags.F_KEEP_DATA 
341                           | Flags.F_COUNT_LINES);
342     cssProps.setParseFlags (Flags.F_TOKEN_POS_ONLY | Flags.F_NO_CASE);
343     jsProps.setParseFlags  (Flags.F_TOKEN_POS_ONLY);
344     
345     htmlProps.addKeyword("SCRIPT", jsTokenizer);
346     htmlProps.addKeyword("LANGUAGE", keywordLang);
347     htmlProps.addKeyword("STYLE", cssTokenizer);
348     htmlProps.addSpecialSequence("<", startOfTag);
349     htmlProps.addSpecialSequence(">", endOfTag);
350     htmlProps.addBlockComment("<!--", "-->");
351     htmlProps.addString(TokenizerProperties.DEFAULT_STRING_START, TokenizerProperties.DEFAULT_STRING_END, TokenizerProperties.DEFAULT_STRING_ESCAPE);
352     htmlProps.setSeparators(TokenizerProperties.DEFAULT_SEPARATORS);
353     
354     jsProps.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
355     jsProps.addSpecialSequence("<!--");
356     jsProps.addSpecialSequence("-->", endOfEmbedded);
357     jsProps.setSeparators(TokenizerProperties.DEFAULT_SEPARATORS);
358     
359     cssProps.addSpecialSequence("<!--");
360     cssProps.addSpecialSequence("-->", endOfEmbedded);
361     
362     htmlTokenizer.setSource(_reader);
363     htmlTokenizer.addTokenizer(jsTokenizer);
364     htmlTokenizer.addTokenizer(cssTokenizer);
365 
366     System.out.println("\nStart parsing \"" + _path + "\"");
367     while (htmlTokenizer.hasMoreToken()) {
368       token = htmlTokenizer.nextToken();
369       
370       // Line counting test
371       assertTrue(token.getStartLine() >= lastStartLineNo);
372       if (token.getStartLine() == lastStartLineNo) {
373         assertTrue(token.getStartColumn() >= lastStartColNo);
374         if (token.getEndLine() == lastStartLineNo) {
375           assertTrue(token.getEndColumn() == token.getStartColumn() + token.getLength());
376         }
377       }
378       lastStartLineNo = token.getStartLine();
379       lastStartColNo  = token.getStartColumn();
380       
381       // Tokenizer switching
382       switch (token.getType()) {
383       case Token.SPECIAL_SEQUENCE:
384         
385         // dealing with JavaScript
386         if (token.getCompanion() == startOfTag) {
387           token = htmlTokenizer.nextToken();
388           if (token.getType() == Token.KEYWORD && token.getCompanion() == jsTokenizer) {
389             token = htmlTokenizer.nextToken();
390             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"" + keywordLang + "\".",
391                       token.getCompanion() == keywordLang);       // see above; should be the LANGUAGE token
392             token = htmlTokenizer.nextToken();
393             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"=\".",
394                       htmlTokenizer.currentImage().equals("="));  // see above; should be "="
395             token = htmlTokenizer.nextToken();
396             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected string.",
397                       token.getType() == Token.STRING);           // see above; should be "JavaScript"
398             
399             // exclude JavaScript-Includes
400             token = htmlTokenizer.nextToken();
401             if (token.getCompanion() == endOfTag) {
402               htmlTokenizer.switchTo(jsTokenizer);
403 
404               // continuing with JavaScriptTokenizer
405               while (jsTokenizer.hasMoreToken()) {
406                 token = jsTokenizer.nextToken();
407                 if (token.getType() == Token.SPECIAL_SEQUENCE && token.getCompanion() == endOfEmbedded) {
408                   jsTokenizer.switchTo(htmlTokenizer);
409                   break;
410                 }
411               }
412               
413               // now we should find the end-of script tag
414               token = htmlTokenizer.nextToken();
415               assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected start of tag.",
416                         token.getCompanion() == startOfTag);
417               token = htmlTokenizer.nextToken();
418               assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"/\".",
419                         htmlTokenizer.currentImage().equals("/"));
420               token = htmlTokenizer.nextToken();
421               assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected script.",
422                         token.getCompanion() == jsTokenizer);
423               token = htmlTokenizer.nextToken();
424               assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected end of tag.",
425                         token.getCompanion() == endOfTag);
426             }
427             
428           // dealing with Cascading Style Sheets (CSS
429           } else if (token.getType() == Token.KEYWORD && token.getCompanion() == jsTokenizer) {
430             token = htmlTokenizer.nextToken();
431             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected end of tag.",
432                       token.getCompanion() == endOfTag);   // should be the end of tag
433             
434             htmlTokenizer.switchTo(cssTokenizer);
435             while (cssTokenizer.hasMoreToken()) {
436               token = cssTokenizer.nextToken();
437               if (token.getType() == Token.SPECIAL_SEQUENCE && token.getCompanion() == endOfEmbedded) {
438                 jsTokenizer.switchTo(htmlTokenizer);
439                 break;
440               }
441             }
442             
443             // now we should find the end-of-style tag
444             token = htmlTokenizer.nextToken();
445             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected start of tag.",
446                       token.getCompanion() == startOfTag);
447             token = htmlTokenizer.nextToken();
448             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"/\".",
449                       htmlTokenizer.currentImage().equals("/"));
450             token = htmlTokenizer.nextToken();
451             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected script.",
452                       token.getCompanion() == cssTokenizer);
453             token = htmlTokenizer.nextToken();
454             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected end of tag.",
455                       token.getCompanion() == endOfTag);
456           }
457         }
458         break;
459       }
460     }
461 
462     long diff = System.currentTimeMillis() - start;
463     System.out.println("Finished after " + diff + " milliseconds");
464   }
465   
466   
467   //---------------------------------------------------------------------------
468   // Members
469   //
470   protected InputStreamReader   _reader = null;
471   protected String              _path   = null;
472 }
473
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags