1 19 20 33 package org.htmlparser.tests.parserHelperTests; 34 import java.util.HashMap ; 35 import java.util.Map ; 36 37 import org.htmlparser.Node; 38 import org.htmlparser.Parser; 39 import org.htmlparser.tags.LinkTag; 40 import org.htmlparser.tags.Tag; 41 import org.htmlparser.tests.ParserTestCase; 42 import org.htmlparser.util.ParserException; 43 44 public class TagParserTest extends ParserTestCase 45 { 46 private static final String TEST_HTML = 47 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" 48 + "<!-- Server: sf-web2 -->" 49 + "<html lang=\"en\">" 50 + " <head><link rel=\"stylesheet\" type=\"text/css\" HREF=\"http://sourceforge.net/cssdef.php\">" 51 + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">" 52 + " <TITLE>SourceForge.net: Modify: 711073 - HTMLTagParser not threadsafe as a static variable in Tag</TITLE>" 53 + " <SCRIPT language=\"JavaScript\" type=\"text/javascript\">" 54 + " <!--" 55 + " function help_window(helpurl) {" 56 + " HelpWin = window.open( 'http://sourceforge.net' + helpurl,'HelpWindow','scrollbars=yes,resizable=yes,toolbar=no,height=400,width=400');" 57 + " }" 58 + " // -->" 59 + " </SCRIPT>" 60 + " <link rel=\"SHORTCUT ICON\" HREF=\"/images/favicon.ico\">" 61 + "<!-- This is temp javascript for the jump button. If we could actually have a jump script on the server side that would be ideal -->" 62 + "<script language=\"JavaScript\" type=\"text/javascript\">" 63 + "<!--" 64 + " function jump(targ,selObj,restore){ //v3.0" 65 + " if (selObj.options[selObj.selectedIndex].value) " 66 + " eval(targ+\".location='\"+selObj.options[selObj.selectedIndex].value+\"'\");" 67 + " if (restore) selObj.selectedIndex=0;" 68 + " }" 69 + " //-->" 70 + "</script>" 71 + "<a HREF=\"http://normallink.com/sometext.html\">" 72 + "<style type=\"text/css\">" 73 + "<!--" 74 + "A:link { text-decoration:none }" 75 + "A:visited { text-decoration:none }" 76 + "A:active { text-decoration:none }" 77 + "A:hover { text-decoration:underline; color:#0066FF; }" 78 + "-->" 79 + "</style>" 80 + "</head>" 81 + "<body bgcolor=\"#FFFFFF\" text=\"#000000\" leftmargin=\"0\" topmargin=\"0\" marginwidth=\"0\" marginheight=\"0\" link=\"#003399\" vlink=\"#003399\" alink=\"#003399\">"; 82 private Map results; 83 private int testProgress; 84 85 public TagParserTest(String name) 86 { 87 super(name); 88 } 89 90 public void testTagWithQuotes() throws Exception 91 { 92 String testHtml = 93 "<img SRC=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">"; 94 95 createParser(testHtml); 96 parseAndAssertNodeCount(1); 97 assertType("should be Tag", Tag.class, node[0]); 98 Tag tag = (Tag) node[0]; 99 assertStringEquals("alt", "Marshall Field's", tag.getAttribute("ALT")); 100 assertStringEquals( 101 "html", 102 "<IMG BORDER=\"0\" ALT=\"Marshall Field's\" WIDTH=\"87\" SRC=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" HEIGHT=\"20\">", 103 tag.toHtml()); 104 } 105 106 public void testEmptyTag() throws Exception 107 { 108 createParser("<custom/>"); 109 parseAndAssertNodeCount(1); 110 assertType("should be Tag", Tag.class, node[0]); 111 Tag tag = (Tag) node[0]; 112 assertStringEquals("tag name", "CUSTOM", tag.getTagName()); 113 assertTrue("empty tag", tag.isEmptyXmlTag()); 114 assertStringEquals("html", "<CUSTOM/>", tag.toHtml()); 115 } 116 117 public void testTagWithCloseTagSymbolInAttribute() throws ParserException 118 { 119 createParser("<tag att=\"a>b\">"); 120 parseAndAssertNodeCount(1); 121 assertType("should be Tag", Tag.class, node[0]); 122 Tag tag = (Tag) node[0]; 123 assertStringEquals("attribute", "a>b", tag.getAttribute("att")); 124 } 125 126 public void testTagWithOpenTagSymbolInAttribute() throws ParserException 127 { 128 createParser("<tag att=\"a<b\">"); 129 parseAndAssertNodeCount(1); 130 assertType("should be Tag", Tag.class, node[0]); 131 Tag tag = (Tag) node[0]; 132 assertStringEquals("attribute", "a<b", tag.getAttribute("att")); 133 } 134 135 public void testTagWithSingleQuote() throws ParserException 136 { 137 createParser("<tag att=\'a<b\'>"); 138 parseAndAssertNodeCount(1); 139 assertType("should be Tag", Tag.class, node[0]); 140 Tag tag = (Tag) node[0]; 141 assertStringEquals("html", "<TAG ATT=\"a<b\">", tag.toHtml()); 142 assertStringEquals("attribute", "a<b", tag.getAttribute("att")); 143 } 144 145 150 151 public void testMultiLine1() throws ParserException 152 { 153 createParser("<meta name=\"foo\" content=\"foo<bar>\">"); 154 parseAndAssertNodeCount(1); 155 assertType("should be Tag", Tag.class, node[0]); 156 Tag tag = (Tag) node[0]; 157 String html = tag.toHtml(); 158 assertStringEquals( 159 "html", 160 "<META CONTENT=\"foo<bar>\" NAME=\"foo\">", 161 html); 162 String attribute1 = tag.getAttribute("NAME"); 163 assertStringEquals("attribute 1", "foo", attribute1); 164 String attribute2 = tag.getAttribute("CONTENT"); 165 assertStringEquals("attribute 2", "foo<bar>", attribute2); 166 } 167 168 public void testMultiLine2() throws ParserException 169 { 170 createParser("<meta name=\"foo\" content=\"foo<bar\">"); 171 parseAndAssertNodeCount(1); 172 assertType("should be Tag", Tag.class, node[0]); 173 Tag tag = (Tag) node[0]; 174 String html = tag.toHtml(); 175 assertStringEquals( 176 "html", 177 "<META CONTENT=\"foo<bar\" NAME=\"foo\">", 178 html); 179 String attribute1 = tag.getAttribute("NAME"); 180 assertStringEquals("attribute 1", "foo", attribute1); 181 String attribute2 = tag.getAttribute("CONTENT"); 182 assertStringEquals("attribute 2", "foo<bar", attribute2); 183 } 184 185 public void testMultiLine3() throws ParserException 186 { 187 createParser("<meta name=\"foo\" content=\"foobar>\">"); 188 parseAndAssertNodeCount(1); 189 assertType("should be Tag", Tag.class, node[0]); 190 Tag tag = (Tag) node[0]; 191 String html = tag.toHtml(); 192 assertStringEquals( 193 "html", 194 "<META CONTENT=\"foobar>\" NAME=\"foo\">", 195 html); 196 String attribute1 = tag.getAttribute("NAME"); 197 assertStringEquals("attribute 1", "foo", attribute1); 198 String attribute2 = tag.getAttribute("CONTENT"); 199 assertStringEquals("attribute 2", "foobar>", attribute2); 200 } 201 202 public void testMultiLine4() throws ParserException 203 { 204 createParser("<meta name=\"foo\" content=\"foo\nbar>\">"); 205 parseAndAssertNodeCount(1); 206 assertType("should be Tag", Tag.class, node[0]); 207 Tag tag = (Tag) node[0]; 208 String html = tag.toHtml(); 209 assertStringEquals( 210 "html", 211 "<META CONTENT=\"foo\r\nbar>\" NAME=\"foo\">", 212 html); 213 String attribute1 = tag.getAttribute("NAME"); 214 assertStringEquals("attribute 1", "foo", attribute1); 215 String attribute2 = tag.getAttribute("CONTENT"); 216 assertStringEquals("attribute 2", "foo\r\nbar>", attribute2); 217 } 218 219 224 public void testMultiLine5() throws ParserException 225 { 226 createParser("<meta name=\"foo\" content=\"<foo>\nbar\">"); 229 if (1.4 <= Parser.getVersionNumber()) 230 { 231 parseAndAssertNodeCount(1); 232 assertType("should be Tag", Tag.class, node[0]); 233 Tag tag = (Tag) node[0]; 234 String html = tag.toHtml(); 235 assertStringEquals( 236 "html", 237 "<META CONTENT=\"<foo>\r\nbar\" NAME=\"foo\">", 238 html); 239 String attribute1 = tag.getAttribute("NAME"); 240 assertStringEquals("attribute 1", "foo", attribute1); 241 String attribute2 = tag.getAttribute("CONTENT"); 242 assertStringEquals("attribute 2", "<foo>\r\nbar", attribute2); 243 } 244 } 245 246 251 public void testMultiLine6() throws ParserException 252 { 253 createParser("<meta name=\"foo\" content=\"foo>\nbar\">"); 256 if (1.4 <= Parser.getVersionNumber()) 257 { 258 parseAndAssertNodeCount(1); 259 assertType("should be Tag", Tag.class, node[0]); 260 Tag tag = (Tag) node[0]; 261 String html = tag.toHtml(); 262 assertStringEquals( 263 "html", 264 "<META CONTENT=\"foo>\r\nbar\" NAME=\"foo\">", 265 html); 266 String attribute1 = tag.getAttribute("NAME"); 267 assertStringEquals("attribute 1", "foo", attribute1); 268 String attribute2 = tag.getAttribute("CONTENT"); 269 assertStringEquals("attribute 2", "foo>\r\nbar", attribute2); 270 } 271 } 272 273 278 public void testMultiLine7() throws ParserException 279 { 280 createParser("<meta name=\"foo\" content=\"<foo\nbar\""); 283 if (1.4 <= Parser.getVersionNumber()) 284 { 285 parseAndAssertNodeCount(1); 286 assertType("should be Tag", Tag.class, node[0]); 287 Tag tag = (Tag) node[0]; 288 String html = tag.toHtml(); 289 assertStringEquals( 290 "html", 291 "<META CONTENT=\"<foo\r\nbar\" NAME=\"foo\">", 292 html); 293 String attribute1 = tag.getAttribute("NAME"); 294 assertStringEquals("attribute 1", "foo", attribute1); 295 String attribute2 = tag.getAttribute("CONTENT"); 296 assertStringEquals("attribute 2", "<foo\r\nbar", attribute2); 297 } 298 } 299 300 303 304 309 public void testThreadSafety() throws Exception 310 { 311 createParser("<html></html>"); 312 if (1.4 <= Parser.getVersionNumber()) 313 { 314 String testHtml1 = 315 "<a HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>" 316 + TEST_HTML; 317 318 String testHtml2 = 319 "<a HREF=\"http://normallink.com/sometext.html\">" + TEST_HTML; 320 ParsingThread parsingThread[] = new ParsingThread[100]; 321 results = new HashMap (); 322 testProgress = 0; 323 for (int i = 0; i < parsingThread.length; i++) 324 { 325 if (i < parsingThread.length / 2) 326 parsingThread[i] = 327 new ParsingThread(i, testHtml1, parsingThread.length); 328 else 329 parsingThread[i] = 330 new ParsingThread(i, testHtml2, parsingThread.length); 331 332 Thread thread = new Thread (parsingThread[i]); 333 thread.start(); 334 } 335 336 int completionValue = computeCompletionValue(parsingThread.length); 337 338 do 339 { 340 try 341 { 342 Thread.sleep(50); 343 } 344 catch (InterruptedException e) 345 { 346 } 347 } 348 while (testProgress != completionValue); 349 for (int i = 0; i < parsingThread.length; i++) 350 { 351 if (!parsingThread[i].passed()) 352 { 353 assertNotNull( 354 "Thread " + i + " link 1", 355 parsingThread[i].getLink1()); 356 assertNotNull( 357 "Thread " + i + " link 2", 358 parsingThread[i].getLink2()); 359 if (i < parsingThread.length / 2) 360 { 361 assertStringEquals( 362 "Thread " + i + ", link 1:", 363 "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", 364 parsingThread[i].getLink1().getLink()); 365 assertStringEquals( 366 "Thread " + i + ", link 2:", 367 "http://normallink.com/sometext.html", 368 parsingThread[i].getLink2().getLink()); 369 } 370 else 371 { 372 assertStringEquals( 373 "Thread " + i + ", link 1:", 374 "http://normallink.com/sometext.html", 375 parsingThread[i].getLink1().getLink()); 376 assertNotNull( 377 "Thread " + i + " link 2", 378 parsingThread[i].getLink2()); 379 assertStringEquals( 380 "Thread " + i + ", link 2:", 381 "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", 382 parsingThread[i].getLink2().getLink()); 383 } 384 } 385 } 386 } 387 388 } 389 390 private int computeCompletionValue(int numThreads) 391 { 392 return numThreads * (numThreads - 1) / 2; 393 } 394 395 class ParsingThread implements Runnable 396 { 397 Parser parser; 398 int id; 399 LinkTag link1, link2; 400 boolean result; 401 int max; 402 403 ParsingThread(int id, String testHtml, int max) 404 { 405 this.id = id; 406 this.max = max; 407 this.parser = Parser.createParser(testHtml); 408 parser.registerScanners(); 409 } 410 411 public void run() 412 { 413 try 414 { 415 result = false; 416 Node linkTag[] = parser.extractAllNodesThatAre(LinkTag.class); 417 link1 = (LinkTag) linkTag[0]; 418 link2 = (LinkTag) linkTag[1]; 419 if (id < max / 2) 420 { 421 if (link1 422 .getLink() 423 .equals("/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html") 424 && link2.getLink().equals( 425 "http://normallink.com/sometext.html")) 426 result = true; 427 } 428 else 429 { 430 if (link1 431 .getLink() 432 .equals("http://normallink.com/sometext.html") 433 && link2.getLink().equals( 434 "http://normallink.com/sometext.html")) 435 result = true; 436 } 437 } 438 catch (ParserException e) 439 { 440 System.err.println("Parser Exception"); 441 e.printStackTrace(); 442 } 443 finally 444 { 445 testProgress += id; 446 } 447 } 448 449 public LinkTag getLink1() 450 { 451 return link1; 452 } 453 454 public LinkTag getLink2() 455 { 456 return link2; 457 } 458 459 public boolean passed() 460 { 461 return result; 462 } 463 } 464 } | Popular Tags |