KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > susebox > jtopas > TestEmbeddedTokenizer


1 /*
2  * TestEmbeddedTokenizer.java: JUnit test for the StandardTokenizer
3  *
4  * Copyright (C) 2001 Heiko Blau
5  *
6  * This file belongs to the Susebox Java core test suite.
7  * The Susebox Java core test suite is free software; you can redistribute it
8  * and/or modify it under the terms of the GNU Lesser General Public License as
9  * published by the Free Software Foundation; either version 2.1 of the License,
10  * or (at your option) any later version.
11  *
12  * This software is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.
15  * See the GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License along
18  * with the Susebox Java core test suite. If not, write to the
19  *
20  * Free Software Foundation, Inc.
21  * 59 Temple Place, Suite 330,
22  * Boston, MA 02111-1307
23  * USA
24  *
25  * or check the Internet: http://www.fsf.org
26  *
27  * The Susebox Java core test suite uses the test framework JUnit by Kent Beck
28  * and Erich Gamma. You should have received a copy of their JUnit licence
29  * agreement along with the Susebox Java test suite.
30  *
31  * We do NOT provide the JUnit archive junit.jar nessecary to compile and run
32  * our tests, since we assume, that You either have it already or would like
33  * to get the current release Yourself.
34  * Please visit either:
35  * http://sourceforge.net/projects/junit
36  * or
37  * http://junit.org
38  * to obtain JUnit.
39  *
40  * Contact:
41  * email: heiko@susebox.de
42  */

43
44 package de.susebox.jtopas;
45
46 //-----------------------------------------------------------------------------
47
// Imports
48
//
49
import java.io.InputStream JavaDoc;
50 import java.io.FileInputStream JavaDoc;
51 import java.io.InputStreamReader JavaDoc;
52 import java.util.Vector JavaDoc;
53 import java.util.Properties JavaDoc;
54 import java.net.URL JavaDoc;
55
56 import junit.framework.Test;
57 import junit.framework.TestCase;
58 import junit.framework.TestSuite;
59 import junit.framework.Assert;
60
61 import de.susebox.java.lang.ExtRuntimeException;
62
63 import de.susebox.TestUtilities;
64
65
66 //-----------------------------------------------------------------------------
67
// Class TestEmbeddedTokenizer
68
//
69

70 /**<p>
71  * This unit test checks the embedded-tokenizer feature of the class {@link StandardTokenizer}.
72  * With this technique it is possible to parse multipart documents like HTML with
73  * embedded CSS and script parts, Java and javadoc comments etc.
74  *</p><p>
75  * This test suite works with a test configuration file. This file contains some
76  * sets of properties, each set for one or more different test runs.
77  *</p><p>
78  * The properties are defined as class constants. In the configuration file, a
79  * property consists of the property name and a number identifying the property
80  * set.
81  *</p>
82  *
83  * @see StandardTokenizer
84  * @author Heiko Blau
85  */

86 public class TestEmbeddedTokenizer extends TestCase {
87   
88   //---------------------------------------------------------------------------
89
// properties
90
//
91

92   /**
93    * The name of the test configuration file. This file will be read by
94    * {@link java.lang.Class#getResourceAsStream}.
95    */

96   public static final String JavaDoc CONFIG_FILE = "TestEmbeddedTokenizer.conf";
97   
98   /**
99    * Property for the test {@link #testEmbeddedTokenizer}
100    */

101   public static final String JavaDoc PROP_PATH = "Path";
102   
103   /**
104    * Property for the test {@link #testJavaTokenizer}
105    */

106   public static final String JavaDoc PROP_JAVAPATH = "JavaPath";
107   
108   
109   //---------------------------------------------------------------------------
110
// main method
111
//
112

113   /**
114    * call this method to invoke the tests.
115    *
116    * @param args unused
117    */

118   public static void main(String JavaDoc[] args) {
119     String JavaDoc[] tests = { TestEmbeddedTokenizer.class.getName() };
120
121     TestUtilities.run(tests, args);
122   }
123   
124
125   //---------------------------------------------------------------------------
126
// suite method
127
//
128

129   /**
130    * Implementation of the JUnit method <code>suite</code>. For each set of test
131    * properties one or more tests are instantiated.
132    *
133    * @return a test suite
134    */

135   public static Test suite() {
136     TestSuite suite = new TestSuite(TestEmbeddedTokenizer.class.getName());
137     Properties JavaDoc props = new Properties JavaDoc();
138     int count = 1;
139     String JavaDoc path;
140     URL JavaDoc url;
141     
142     try {
143       props.load(TestEmbeddedTokenizer.class.getResourceAsStream(CONFIG_FILE));
144     } catch (Exception JavaDoc ex) {
145       throw new ExtRuntimeException(ex);
146     }
147
148     // test on HTML files
149
while ((path = props.getProperty(PROP_PATH + count)) != null) {
150       if ((url = TestEmbeddedTokenizer.class.getResource(path)) != null) {
151         path = url.getFile();
152       }
153       suite.addTest(new TestEmbeddedTokenizer("testEmbeddedTokenizer", path));
154       count++;
155     }
156
157     // tests on Java files
158
count = 1;
159     while ((path = props.getProperty(PROP_JAVAPATH + count)) != null) {
160       if ((url = TestEmbeddedTokenizer.class.getResource(path)) != null) {
161         path = url.getFile();
162       }
163       suite.addTest(new TestEmbeddedTokenizer("testJavaTokenizer", path));
164       count++;
165     }
166     return suite;
167   }
168   
169   
170   //---------------------------------------------------------------------------
171
// Constructor
172
//
173

174   /**
175    * Initializing the instance with the test file path
176    *
177    * @param test which test method should be invoked
178    * @param path name of test configuration file
179    */

180   public TestEmbeddedTokenizer(String JavaDoc test, String JavaDoc path) {
181     super(test);
182     _path = path;
183   }
184
185   
186   //---------------------------------------------------------------------------
187
// Fixture setup and release
188
//
189

190   /**
191    * Sets up the fixture, for example, open a network connection.
192    * This method is called before a test is executed.
193    *
194    * @throws Exception for anything that might go wrong
195    */

196   protected void setUp() throws Exception JavaDoc {
197     InputStream JavaDoc stream = new FileInputStream JavaDoc(_path);
198     
199     _reader = new InputStreamReader JavaDoc(stream);
200   }
201
202   
203   /**
204    * Tears down the fixture, for example, close a network connection.
205    * This method is called after a test is executed.
206    *
207    * @throws Exception for anything that might go wrong
208    */

209   protected void tearDown() throws Exception JavaDoc {
210     _reader.close();
211   }
212   
213   //---------------------------------------------------------------------------
214
// test cases
215
//
216

217   
218   /**
219    * This method reads the given stream as a Java source. It extracts javadoc
220    * comments and source code.
221    * There should be a class or interface name in every Java source. The opening
222    * and closing brackets should match etc.
223    *
224    * @throws Throwable for anything that might go wrong
225    * @see #testEmbeddedTokenizer
226    */

227   public void testJavaTokenizer() throws Throwable JavaDoc {
228     long start = System.currentTimeMillis();
229     StandardTokenizerProperties javaProps = new StandardTokenizerProperties();
230     StandardTokenizerProperties docProps = new StandardTokenizerProperties();
231     StandardTokenizer javaTokenizer = new StandardTokenizer(javaProps);
232     StandardTokenizer docTokenizer = new StandardTokenizer(docProps);
233     StandardTokenizer currTokenizer = javaTokenizer;
234     Object JavaDoc openBlock = new Object JavaDoc();
235     Object JavaDoc closeBlock = new Object JavaDoc();
236     Object JavaDoc atSign = new Object JavaDoc();
237     int blockBalance = 0;
238     Token token;
239     int lastStartLineNo = -1;
240     int lastStartColNo = -1;
241
242     javaProps.setParseFlags(Flags.F_TOKEN_POS_ONLY | Flags.F_KEEP_DATA | Flags.F_COUNT_LINES);
243     docProps.setParseFlags(Flags.F_NO_CASE);
244     
245     javaProps.addSpecialSequence("/**", docTokenizer);
246     javaProps.addSpecialSequence("{", openBlock);
247     javaProps.addSpecialSequence("}", closeBlock);
248     javaProps.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
249     javaProps.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
250     javaProps.addString(TokenizerProperties.DEFAULT_STRING_START, TokenizerProperties.DEFAULT_STRING_END, TokenizerProperties.DEFAULT_STRING_ESCAPE);
251     javaProps.addString("'", "'", "\\");
252     docProps.addSpecialSequence("*/", javaTokenizer);
253     docProps.addSpecialSequence("@", atSign);
254     docProps.addKeyword("param");
255     docProps.addKeyword("return");
256     docProps.addKeyword("throws");
257     docProps.addKeyword("author");
258     docProps.addKeyword("version");
259     docProps.addKeyword("link");
260     docProps.addKeyword("see");
261     docProps.addKeyword("deprecated");
262     
263     javaTokenizer.setSource(_reader);
264     javaTokenizer.addTokenizer(docTokenizer);
265
266     System.out.println("\nStart parsing \"" + _path + "\"");
267     while (currTokenizer.hasMoreToken()) {
268       token = currTokenizer.nextToken();
269       
270       // Line counting test
271
assertTrue(token.getStartLine() >= lastStartLineNo);
272       if (token.getStartLine() == lastStartLineNo) {
273         assertTrue(token.getStartColumn() >= lastStartColNo);
274         if (token.getEndLine() == lastStartLineNo) {
275           assertTrue(token.getEndColumn() == token.getStartColumn() + token.getLength());
276         }
277       }
278       lastStartLineNo = token.getStartLine();
279       lastStartColNo = token.getStartColumn();
280       
281       // tokenizer switching
282
switch (token.getType()) {
283         case Token.SPECIAL_SEQUENCE:
284           if (token.getCompanion() instanceof StandardTokenizer) {
285             StandardTokenizer tokenizer = (StandardTokenizer)token.getCompanion();
286             
287             currTokenizer.switchTo(tokenizer);
288             currTokenizer = tokenizer;
289           } else if (token.getCompanion() == openBlock) {
290             blockBalance++;
291           } else if (token.getCompanion() == closeBlock) {
292             blockBalance--;
293           } else if (token.getCompanion() == atSign) {
294             token = currTokenizer.nextToken();
295             assertTrue("Expected keyword after @ sign in javadoc comment, but found \"" + currTokenizer.currentImage(),
296                       token.getType() == Token.KEYWORD);
297           }
298           break;
299       }
300     }
301     
302     // some checks
303
assertTrue("Braces should be balanced in Java file \""
304               + _path + "\", but detected inbalance " + blockBalance,
305               blockBalance == 0);
306
307     // print elapsed time
308
long diff = System.currentTimeMillis() - start;
309     System.out.println("Finished after " + diff + " milliseconds");
310   }
311     
312
313   
314   /**
315    * The method takes the HTML file given in the constructor, and parses with
316    * the main HTML tokenizer and two embedded tokenizers for JavaScript and
317    * CSS.
318    *
319    * @throws Throwable for anything that might go wrong
320    * @see #testEmbeddedTokenizer
321    */

322   public void testEmbeddedTokenizer() throws Throwable JavaDoc {
323     long start = System.currentTimeMillis();
324     StandardTokenizerProperties htmlProps = new StandardTokenizerProperties();
325     StandardTokenizerProperties jsProps = new StandardTokenizerProperties();
326     StandardTokenizerProperties cssProps = new StandardTokenizerProperties();
327     StandardTokenizer htmlTokenizer = new StandardTokenizer(htmlProps);
328     StandardTokenizer jsTokenizer = new StandardTokenizer(jsProps);
329     StandardTokenizer cssTokenizer = new StandardTokenizer(cssProps);
330     String JavaDoc keywordLang = new String JavaDoc("LANGUAGE");
331     Object JavaDoc endOfEmbedded = new Object JavaDoc();
332     Object JavaDoc startOfTag = new Object JavaDoc();
333     Object JavaDoc endOfTag = new Object JavaDoc();
334     Object JavaDoc endOfScript = new Object JavaDoc();
335     Token token;
336     int lastStartLineNo = -1;
337     int lastStartColNo = -1;
338
339     htmlProps.setParseFlags(Flags.F_TOKEN_POS_ONLY
340                           | Flags.F_KEEP_DATA
341                           | Flags.F_COUNT_LINES);
342     cssProps.setParseFlags (Flags.F_TOKEN_POS_ONLY | Flags.F_NO_CASE);
343     jsProps.setParseFlags (Flags.F_TOKEN_POS_ONLY);
344     
345     htmlProps.addKeyword("SCRIPT", jsTokenizer);
346     htmlProps.addKeyword("LANGUAGE", keywordLang);
347     htmlProps.addKeyword("STYLE", cssTokenizer);
348     htmlProps.addSpecialSequence("<", startOfTag);
349     htmlProps.addSpecialSequence(">", endOfTag);
350     htmlProps.addBlockComment("<!--", "-->");
351     htmlProps.addString(TokenizerProperties.DEFAULT_STRING_START, TokenizerProperties.DEFAULT_STRING_END, TokenizerProperties.DEFAULT_STRING_ESCAPE);
352     htmlProps.setSeparators(TokenizerProperties.DEFAULT_SEPARATORS);
353     
354     jsProps.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
355     jsProps.addSpecialSequence("<!--");
356     jsProps.addSpecialSequence("-->", endOfEmbedded);
357     jsProps.setSeparators(TokenizerProperties.DEFAULT_SEPARATORS);
358     
359     cssProps.addSpecialSequence("<!--");
360     cssProps.addSpecialSequence("-->", endOfEmbedded);
361     
362     htmlTokenizer.setSource(_reader);
363     htmlTokenizer.addTokenizer(jsTokenizer);
364     htmlTokenizer.addTokenizer(cssTokenizer);
365
366     System.out.println("\nStart parsing \"" + _path + "\"");
367     while (htmlTokenizer.hasMoreToken()) {
368       token = htmlTokenizer.nextToken();
369       
370       // Line counting test
371
assertTrue(token.getStartLine() >= lastStartLineNo);
372       if (token.getStartLine() == lastStartLineNo) {
373         assertTrue(token.getStartColumn() >= lastStartColNo);
374         if (token.getEndLine() == lastStartLineNo) {
375           assertTrue(token.getEndColumn() == token.getStartColumn() + token.getLength());
376         }
377       }
378       lastStartLineNo = token.getStartLine();
379       lastStartColNo = token.getStartColumn();
380       
381       // Tokenizer switching
382
switch (token.getType()) {
383       case Token.SPECIAL_SEQUENCE:
384         
385         // dealing with JavaScript
386
if (token.getCompanion() == startOfTag) {
387           token = htmlTokenizer.nextToken();
388           if (token.getType() == Token.KEYWORD && token.getCompanion() == jsTokenizer) {
389             token = htmlTokenizer.nextToken();
390             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"" + keywordLang + "\".",
391                       token.getCompanion() == keywordLang); // see above; should be the LANGUAGE token
392
token = htmlTokenizer.nextToken();
393             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"=\".",
394                       htmlTokenizer.currentImage().equals("=")); // see above; should be "="
395
token = htmlTokenizer.nextToken();
396             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected string.",
397                       token.getType() == Token.STRING); // see above; should be "JavaScript"
398

399             // exclude JavaScript-Includes
400
token = htmlTokenizer.nextToken();
401             if (token.getCompanion() == endOfTag) {
402               htmlTokenizer.switchTo(jsTokenizer);
403
404               // continuing with JavaScriptTokenizer
405
while (jsTokenizer.hasMoreToken()) {
406                 token = jsTokenizer.nextToken();
407                 if (token.getType() == Token.SPECIAL_SEQUENCE && token.getCompanion() == endOfEmbedded) {
408                   jsTokenizer.switchTo(htmlTokenizer);
409                   break;
410                 }
411               }
412               
413               // now we should find the end-of script tag
414
token = htmlTokenizer.nextToken();
415               assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected start of tag.",
416                         token.getCompanion() == startOfTag);
417               token = htmlTokenizer.nextToken();
418               assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"/\".",
419                         htmlTokenizer.currentImage().equals("/"));
420               token = htmlTokenizer.nextToken();
421               assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected script.",
422                         token.getCompanion() == jsTokenizer);
423               token = htmlTokenizer.nextToken();
424               assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected end of tag.",
425                         token.getCompanion() == endOfTag);
426             }
427             
428           // dealing with Cascading Style Sheets (CSS
429
} else if (token.getType() == Token.KEYWORD && token.getCompanion() == jsTokenizer) {
430             token = htmlTokenizer.nextToken();
431             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected end of tag.",
432                       token.getCompanion() == endOfTag); // should be the end of tag
433

434             htmlTokenizer.switchTo(cssTokenizer);
435             while (cssTokenizer.hasMoreToken()) {
436               token = cssTokenizer.nextToken();
437               if (token.getType() == Token.SPECIAL_SEQUENCE && token.getCompanion() == endOfEmbedded) {
438                 jsTokenizer.switchTo(htmlTokenizer);
439                 break;
440               }
441             }
442             
443             // now we should find the end-of-style tag
444
token = htmlTokenizer.nextToken();
445             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected start of tag.",
446                       token.getCompanion() == startOfTag);
447             token = htmlTokenizer.nextToken();
448             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"/\".",
449                       htmlTokenizer.currentImage().equals("/"));
450             token = htmlTokenizer.nextToken();
451             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected script.",
452                       token.getCompanion() == cssTokenizer);
453             token = htmlTokenizer.nextToken();
454             assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected end of tag.",
455                       token.getCompanion() == endOfTag);
456           }
457         }
458         break;
459       }
460     }
461
462     long diff = System.currentTimeMillis() - start;
463     System.out.println("Finished after " + diff + " milliseconds");
464   }
465   
466   
467   //---------------------------------------------------------------------------
468
// Members
469
//
470
protected InputStreamReader JavaDoc _reader = null;
471   protected String JavaDoc _path = null;
472 }
473
Popular Tags