1 43 44 package de.susebox.jtopas; 45 46 import java.io.InputStream ; 50 import java.io.FileInputStream ; 51 import java.io.InputStreamReader ; 52 import java.util.Vector ; 53 import java.util.Properties ; 54 import java.net.URL ; 55 56 import junit.framework.Test; 57 import junit.framework.TestCase; 58 import junit.framework.TestSuite; 59 import junit.framework.Assert; 60 61 import de.susebox.java.lang.ExtRuntimeException; 62 63 import de.susebox.TestUtilities; 64 65 66 70 84 public class TestStandardTokenizer extends TestCase { 85 86 90 94 public static final String CONFIG_FILE = "TestStandardTokenizer.conf"; 95 96 99 public static final String PROP_PATH = "Path"; 100 101 104 public static final String PROP_COUNTLINES_PATH = "CountLinesPath"; 105 106 107 111 114 public static void main(String [] args) { 115 String [] tests = { TestStandardTokenizer.class.getName() }; 116 117 TestUtilities.run(tests, args); 118 } 119 120 121 125 131 public static Test suite() { 132 TestSuite suite = new TestSuite(TestStandardTokenizer.class.getName()); 133 Properties props = new Properties (); 134 int count = 1; 135 String path; 136 URL url; 137 138 try { 139 props.load(TestStandardTokenizer.class.getResourceAsStream(CONFIG_FILE)); 140 } catch (Exception ex) { 141 throw new ExtRuntimeException(ex); 142 } 143 144 while ((path = props.getProperty(PROP_PATH + count)) != null) { 145 if ((url = TestStandardTokenizer.class.getResource(path)) != null) { 146 path = url.getFile(); 147 } 148 suite.addTest(new TestStandardTokenizer("testLinkParsing", path)); 149 suite.addTest(new TestStandardTokenizer("testContentsParsing", path)); 150 suite.addTest(new TestStandardTokenizer("testContentsFormatting", path)); 151 count++; 152 } 153 count = 1; 154 while ((path = props.getProperty(PROP_COUNTLINES_PATH + count)) != null) { 155 if ((url = TestStandardTokenizer.class.getResource(path)) != null) { 156 path = url.getFile(); 157 } 158 suite.addTest(new TestStandardTokenizer("testLineCounting", path)); 159 count++; 160 } 161 return suite; 162 } 163 164 165 169 173 public TestStandardTokenizer(String test, String path) { 174 super(test); 175 _path = path; 176 } 177 178 179 183 187 protected void setUp() throws Exception { 188 InputStream stream = new FileInputStream (_path); 189 190 _reader = new InputStreamReader (stream); 191 } 192 193 194 198 protected void tearDown() throws Exception { 199 _reader.close(); 200 } 201 202 206 public void testLinkParsing() throws Throwable { 207 long start = System.currentTimeMillis(); 208 TokenizerProperties props = new StandardTokenizerProperties(); 209 Tokenizer tokenizer = new StandardTokenizer(props); 210 Vector links = new Vector (); 211 Token token; 212 213 try { 214 props.setParseFlags(Flags.F_NO_CASE); 215 props.setSeparators("="); 216 props.addString("\"", "\"", "\\"); 217 props.addBlockComment(">", "<"); props.addBlockComment("SCRIPT", "/SCRIPT"); props.addBlockComment("!--", "--"); props.addKeyword("HREF"); 221 tokenizer.setSource(new ReaderSource(_reader)); 222 223 System.out.println("\nStart looking for links in \"" + _path + "\""); 224 while (tokenizer.hasMoreToken()) { 225 token = tokenizer.nextToken(); 226 if (token.getType() == Token.KEYWORD) { 227 tokenizer.nextToken(); System.out.println(" " + tokenizer.nextImage()); 229 assertTrue(tokenizer.currentImage() != null); 230 assertTrue(tokenizer.currentToken().getType() == Token.STRING); 231 } 232 } 233 } finally { 234 tokenizer.close(); 236 } 237 238 long diff = System.currentTimeMillis() - start; 239 System.out.println("Finished after " + diff + " milliseconds"); 240 } 241 242 243 246 public void testContentsParsing() throws Throwable { 247 long start = System.currentTimeMillis(); 248 TokenizerProperties props = new StandardTokenizerProperties(); 249 Tokenizer tokenizer = new StandardTokenizer(props); 250 251 try { 252 tokenizer.setSource(new ReaderSource(_reader)); 253 System.out.println("\nStart extracting contents in \"" + _path + "\""); 254 255 props.setParseFlags(Flags.F_NO_CASE | Flags.F_TOKEN_POS_ONLY); 256 props.setWhitespaces(null); 257 props.setSeparators(null); 258 props.addBlockComment("<", ">"); props.addBlockComment("<HEAD>", "</HEAD>"); props.addBlockComment("<!--", "-->"); 262 while (tokenizer.hasMoreToken()) { 263 tokenizer.nextToken(); 264 if (tokenizer.currentToken().getType() != Token.EOF) { 265 System.out.println(tokenizer.currentImage()); 266 assertTrue("Method currentImage() returned null.", tokenizer.currentImage() != null); 267 } 268 assertTrue("Found token type " + tokenizer.currentToken().getType() 269 + ", expected NORMAL (" + Token.NORMAL + ") or EOF (" + Token.EOF + ").", 270 tokenizer.currentToken().getType() == Token.NORMAL 271 || tokenizer.currentToken().getType() == Token.EOF); 272 } 273 } finally { 274 tokenizer.close(); 275 } 276 277 long diff = System.currentTimeMillis() - start; 278 System.out.println("Finished after " + diff + " milliseconds"); 279 } 280 281 282 287 public void testLineCounting() throws Throwable { 288 long start = System.currentTimeMillis(); 289 TokenizerProperties props = new StandardTokenizerProperties(); 290 Tokenizer tokenizer = new StandardTokenizer(props); 291 Token token; 292 String image; 293 int delPos; 294 int line; 295 int col; 296 297 System.out.println("\nStart counting lines in \"" + _path + "\""); 298 299 try { 300 tokenizer.setSource(new ReaderSource(_reader)); 301 props.setParseFlags(Flags.F_TOKEN_POS_ONLY | Flags.F_COUNT_LINES); 302 props.setWhitespaces(TokenizerProperties.DEFAULT_WHITESPACES); 303 props.setSeparators(TokenizerProperties.DEFAULT_SEPARATORS); 304 props.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT); 305 props.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END); 306 307 while (tokenizer.hasMoreToken()) { 308 token = tokenizer.nextToken(); 309 switch (token.getType()) { 310 case Token.NORMAL: 311 image = tokenizer.currentImage(); 312 line = Integer.parseInt(image); 313 assertTrue("Missing separator \"/\".", tokenizer.nextToken().getType() == Token.SEPARATOR && tokenizer.currentImage().equals("/")); 314 assertTrue("Missing column number", tokenizer.nextToken().getType() == Token.NORMAL); 315 image = tokenizer.currentImage(); 316 col = Integer.parseInt(image); 317 assertTrue("Found line number " + token.getStartLine() + " does not match expected line number " + line, 318 line == token.getStartLine()); 319 assertTrue("Found column number " + token.getStartColumn() + " does not match expected column number " + col, 320 col == token.getStartColumn()); 321 assertTrue("Found token length " + tokenizer.currentToken().getLength() + " does not match expected length " + image.length(), 322 image.length() == tokenizer.currentToken().getLength()); 323 break; 324 } 325 } 326 } finally { 327 tokenizer.close(); 328 } 329 330 long diff = System.currentTimeMillis() - start; 331 System.out.println("Finished after " + diff + " milliseconds"); 332 } 333 334 335 339 public void testContentsFormatting() throws Throwable { 340 long start = System.currentTimeMillis(); 341 TokenizerProperties props = new StandardTokenizerProperties(); 342 Tokenizer tokenizer = new StandardTokenizer(props); 343 Token token; 344 String image; 345 int len; 346 Object startPRE = new Object (); 347 Object endPRE = new Object (); 348 int inPRE = 0; 349 350 int wsCount = 0; 352 int normalCount = 0; 353 int specCount = 0; 354 int commentCount = 0; 355 356 System.out.println("\nStart formatting contents in \"" + _path + "\""); 357 358 try { 359 tokenizer.setSource(new ReaderSource(_reader)); 360 props.setParseFlags( Flags.F_NO_CASE 361 | Flags.F_TOKEN_POS_ONLY 362 | Flags.F_RETURN_WHITESPACES); 363 props.setSeparators(null); 364 props.addBlockComment("<", ">"); 365 props.addBlockComment("<HEAD>", "</HEAD>"); 366 props.addBlockComment("<!--", "-->"); 367 props.addSpecialSequence("<b>", ""); 368 props.addSpecialSequence("</b>", ""); 369 props.addSpecialSequence("<i>", ""); 370 props.addSpecialSequence("</i>", ""); 371 props.addSpecialSequence("<code>", ""); 372 props.addSpecialSequence("</code>", ""); 373 props.addSpecialSequence("<pre>", startPRE); 374 props.addSpecialSequence("</pre>", endPRE); 375 props.addSpecialSequence("ä", "\u00E4", 0, Flags.F_NO_CASE); 376 props.addSpecialSequence("ö", "\u00F6", 0, Flags.F_NO_CASE); 377 props.addSpecialSequence("ü", "\u00FC", 0, Flags.F_NO_CASE); 378 props.addSpecialSequence("ß", "\u00DF", 0, Flags.F_NO_CASE); 379 props.addSpecialSequence("Ä", "\u00C4", 0, Flags.F_NO_CASE); 380 props.addSpecialSequence("Ö", "\u00D6", 0, Flags.F_NO_CASE); 381 props.addSpecialSequence("Ü", "\u00DC", 0, Flags.F_NO_CASE); 382 props.addSpecialSequence(" ", " ", 0, Flags.F_NO_CASE); 383 props.addSpecialSequence(">", ">", 0, Flags.F_NO_CASE); 384 props.addSpecialSequence("<", "<", 0, Flags.F_NO_CASE); 385 props.addSpecialSequence("©", "\u00A9"); 386 props.addSpecialSequence("€", "\u20AC"); 387 388 len = 0; 389 while (tokenizer.hasMoreToken()) { 390 token = tokenizer.nextToken(); 391 switch (token.getType()) { 392 case Token.NORMAL: 393 image = tokenizer.currentImage(); 394 assertTrue("Found HTML tag in normal token: " + image, image.indexOf('<') < 0); 395 System.out.print(image); 396 if (inPRE <= 0) { 397 len += token.getLength(); 398 } 399 normalCount++; 400 break; 401 402 case Token.SPECIAL_SEQUENCE: 403 image = tokenizer.currentImage(); 404 assertTrue("Couldn't find special sequence in properties: " + image, props.specialSequenceExists(image)); 405 if (token.getCompanion() == startPRE) { 406 System.out.println(); 407 len = 0; 408 inPRE++; 409 } else if (token.getCompanion() == endPRE) { 410 System.out.println(); 411 len = 0; 412 inPRE--; 413 } else { 414 System.out.print((String )token.getCompanion()); 415 } 416 specCount++; 417 break; 418 419 case Token.BLOCK_COMMENT: 420 if (len > 0) { 421 System.out.println(); 422 len = 0; 423 } 424 commentCount++; 425 break; 426 427 case Token.WHITESPACE: 428 if (inPRE > 0) { 429 System.out.print(tokenizer.currentImage()); 430 } else if (len > 75) { 431 System.out.println(); 432 len = 0; 433 } else if (len > 0) { 434 System.out.print(' '); 435 len++; 436 } 437 wsCount++; 438 break; 439 } 440 } 441 442 assertTrue("Not one simple context part was found in file " + _path + ".", normalCount > 0); 444 assertTrue("No HTML tag found " + _path + ".", commentCount > 0); 445 assertTrue("No whitespaces found " + _path + ".", wsCount > 0); 446 447 } finally { 448 tokenizer.close(); 450 } 451 452 long diff = System.currentTimeMillis() - start; 454 System.out.println("Finished after " + diff + " milliseconds"); 455 } 456 457 protected InputStreamReader _reader = null; 461 protected String _path = null; 462 } 463 | Popular Tags |