1 27 package org.htmlparser.tests.lexerTests; 28 29 import org.htmlparser.Node; 30 import org.htmlparser.Parser; 31 import org.htmlparser.PrototypicalNodeFactory; 32 import org.htmlparser.Tag; 33 import org.htmlparser.tags.LinkTag; 34 import org.htmlparser.tags.MetaTag; 35 import org.htmlparser.tests.ParserTestCase; 36 import org.htmlparser.util.ParserException; 37 38 public class TagTests extends ParserTestCase { 39 static 40 { 41 System.setProperty ("org.htmlparser.tests.lexerTests.TagTests", "TagTests"); 42 } 43 44 private static final String TEST_HTML = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" + 45 "<!-- Server: sf-web2 -->\n" + 46 "<html lang=\"en\">\n" + 47 " <head><link rel=\"stylesheet\" type=\"text/css\" HREF=\"http://sourceforge.net/cssdef.php\">\n" + 48 " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n" + 49 " <TITLE>SourceForge.net: Modify: 711073 - HTMLTagParser not threadsafe as a static variable in Tag</TITLE>\n" + 50 " <SCRIPT language=\"JavaScript\" type=\"text/javascript\">\n" + 51 " <!--\n" + 52 " function help_window(helpurl) {\n" + 53 " HelpWin = window.open( 'http://sourceforge.net' + helpurl,'HelpWindow','scrollbars=yes,resizable=yes,toolbar=no,height=400,width=400');\n" + 54 " }\n" + 55 " // -->\n" + 56 " </SCRIPT>\n" + 57 " <link rel=\"SHORTCUT ICON\" HREF=\"/images/favicon.ico\">\n" + 58 "<!-- This is temp javascript for the jump button. If we could actually have a jump script on the server side that would be ideal -->\n" + 59 "<script language=\"JavaScript\" type=\"text/javascript\">\n" + 60 "<!--\n" + 61 " function jump(targ,selObj,restore){ //v3.0\n" + 62 " if (selObj.options[selObj.selectedIndex].value)\n" + 63 " eval(targ+\".location='\"+selObj.options[selObj.selectedIndex].value+\"'\");\n" + 64 " if (restore) selObj.selectedIndex=0;\n" + 65 " }\n" + 66 " //-->\n" + 67 "</script>\n" + 68 "<a HREF=\"http://normallink.com/sometext.html\">\n" + 69 "<style type=\"text/css\">\n" + 70 "<!--\n" + 71 "A:link { text-decoration:none }\n" + 72 "A:visited { text-decoration:none }\n" + 73 "A:active { text-decoration:none }\n" + 74 "A:hover { text-decoration:underline; color:#0066FF; }\n" + 75 "-->\n" + 76 "</style>\n" + 77 "</head>\n" + 78 "<body bgcolor=\"#FFFFFF\" text=\"#000000\" leftmargin=\"0\" topmargin=\"0\" marginwidth=\"0\" marginheight=\"0\" link=\"#003399\" vlink=\"#003399\" alink=\"#003399\">\n"; 79 private int testProgress; 80 81 public TagTests (String name) { 82 super(name); 83 } 84 85 public void testTagWithQuotes() throws Exception { 86 String testHtml = 87 "<img SRC=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">"; 88 89 createParser(testHtml); 90 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 91 parseAndAssertNodeCount(1); 92 assertType("should be Tag",Tag.class,node[0]); 93 Tag tag = (Tag)node[0]; 94 assertStringEquals("alt","Marshall Field's",tag.getAttribute("ALT")); 95 assertStringEquals( 96 "html", 97 testHtml, 98 tag.toHtml() 99 ); 100 } 101 102 public void testEmptyTag() throws Exception 103 { 104 String html = "<custom/>"; 105 createParser(html); 106 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 107 parseAndAssertNodeCount(1); 108 assertType("should be Tag",Tag.class,node[0]); 109 Tag tag = (Tag)node[0]; 110 assertStringEquals("tag name","CUSTOM",tag.getTagName()); 111 assertTrue("empty tag",tag.isEmptyXmlTag()); 112 assertStringEquals( 113 "html", 114 html, 115 tag.toHtml() 116 ); 117 } 118 119 public void testTagWithCloseTagSymbolInAttribute() throws ParserException { 120 createParser("<tag att=\"a>b\">"); 121 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 122 parseAndAssertNodeCount(1); 123 assertType("should be Tag",Tag.class,node[0]); 124 Tag tag = (Tag)node[0]; 125 assertStringEquals("attribute","a>b",tag.getAttribute("att")); 126 } 127 128 public void testTagWithOpenTagSymbolInAttribute() throws ParserException { 129 createParser("<tag att=\"a<b\">"); 130 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 131 parseAndAssertNodeCount(1); 132 assertType("should be Tag",Tag.class,node[0]); 133 Tag tag = (Tag)node[0]; 134 assertStringEquals("attribute","a<b",tag.getAttribute("att")); 135 } 136 137 public void testTagWithSingleQuote() throws ParserException { 138 String html = "<tag att=\'a<b\'>"; 139 createParser(html); 140 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 141 parseAndAssertNodeCount(1); 142 assertType("should be Tag",Tag.class,node[0]); 143 Tag tag = (Tag)node[0]; 144 assertStringEquals("html",html,tag.toHtml()); 145 assertStringEquals("attribute","a<b",tag.getAttribute("att")); 146 } 147 148 153 public void testMultiLine1 () throws ParserException 154 { 155 String html = "<meta name=\"foo\" content=\"foo<bar>\">"; 156 createParser(html); 157 parseAndAssertNodeCount (1); 158 assertType ("should be MetaTag", MetaTag.class, node[0]); 159 Tag tag = (Tag)node[0]; 160 assertStringEquals ("html",html, tag.toHtml ()); 161 String attribute1 = tag.getAttribute ("NAME"); 162 assertStringEquals ("attribute 1","foo", attribute1); 163 String attribute2 = tag.getAttribute ("CONTENT"); 164 assertStringEquals ("attribute 2","foo<bar>", attribute2); 165 } 166 167 public void testMultiLine2 () throws ParserException 168 { 169 String html = "<meta name=\"foo\" content=\"foo<bar\">"; 170 createParser(html); 171 parseAndAssertNodeCount (1); 172 assertType ("should be MetaTag", MetaTag.class, node[0]); 173 Tag tag = (Tag)node[0]; 174 assertStringEquals ("html",html, tag.toHtml ()); 175 String attribute1 = tag.getAttribute ("NAME"); 176 assertStringEquals ("attribute 1","foo", attribute1); 177 String attribute2 = tag.getAttribute ("CONTENT"); 178 assertStringEquals ("attribute 2","foo<bar", attribute2); 179 } 180 181 public void testMultiLine3 () throws ParserException 182 { 183 String html = "<meta name=\"foo\" content=\"foobar>\">"; 184 createParser(html); 185 parseAndAssertNodeCount (1); 186 assertType ("should be MetaTag", MetaTag.class, node[0]); 187 Tag tag = (Tag)node[0]; 188 assertStringEquals ("html",html, tag.toHtml ()); 189 String attribute1 = tag.getAttribute ("NAME"); 190 assertStringEquals ("attribute 1","foo", attribute1); 191 String attribute2 = tag.getAttribute ("CONTENT"); 192 assertStringEquals ("attribute 2","foobar>", attribute2); 193 } 194 195 public void testMultiLine4 () throws ParserException 196 { 197 String html = "<meta name=\"foo\" content=\"foo\nbar>\">"; 198 createParser(html); 199 parseAndAssertNodeCount (1); 200 assertType ("should be MetaTag", MetaTag.class, node[0]); 201 Tag tag = (Tag)node[0]; 202 assertStringEquals ("html",html, tag.toHtml ()); 203 String attribute1 = tag.getAttribute ("NAME"); 204 assertStringEquals ("attribute 1","foo", attribute1); 205 String attribute2 = tag.getAttribute ("CONTENT"); 206 assertStringEquals ("attribute 2","foo\nbar>", attribute2); 207 } 208 209 213 public void testMultiLine5 () throws ParserException 214 { 215 String html = "<meta name=\"foo\" content=\"<foo>\nbar\">"; 218 createParser(html); 219 parseAndAssertNodeCount (1); 220 assertType ("should be MetaTag", MetaTag.class, node[0]); 221 Tag tag = (Tag)node[0]; 222 assertStringEquals ("html",html, tag.toHtml ()); 223 String attribute1 = tag.getAttribute ("NAME"); 224 assertStringEquals ("attribute 1","foo", attribute1); 225 String attribute2 = tag.getAttribute ("CONTENT"); 226 assertStringEquals ("attribute 2","<foo>\nbar", attribute2); 227 } 228 229 233 public void testMultiLine6 () throws ParserException 234 { 235 String html = "<meta name=\"foo\" content=\"foo>\nbar\">"; 238 createParser(html); 239 parseAndAssertNodeCount (1); 240 assertType ("should be MetaTag", MetaTag.class, node[0]); 241 Tag tag = (Tag)node[0]; 242 assertStringEquals ("html",html, tag.toHtml ()); 243 String attribute1 = tag.getAttribute ("NAME"); 244 assertStringEquals ("attribute 1","foo", attribute1); 245 String attribute2 = tag.getAttribute ("CONTENT"); 246 assertStringEquals ("attribute 2","foo>\nbar", attribute2); 247 } 248 249 253 public void testMultiLine7 () throws ParserException 254 { 255 String html = "<meta name=\"foo\" content=\"<foo\nbar\""; 258 createParser(html); 259 parseAndAssertNodeCount (1); 260 assertType ("should be MetaTag", MetaTag.class, node[0]); 261 Tag tag = (Tag)node[0]; 262 assertStringEquals ("html",html + ">", tag.toHtml ()); 263 String attribute1 = tag.getAttribute ("NAME"); 264 assertStringEquals ("attribute 1","foo", attribute1); 265 String attribute2 = tag.getAttribute ("CONTENT"); 266 assertStringEquals ("attribute 2","<foo\nbar", attribute2); 267 } 268 269 272 273 277 public void testThreadSafety() throws Exception 278 { 279 createParser("<html></html>"); 280 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 281 String testHtml1 = "<a HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>" + 282 TEST_HTML; 283 284 String testHtml2 = "<a HREF=\"http://normallink.com/sometext.html\">" + 285 TEST_HTML; 286 ParsingThread parsingThread [] = 287 new ParsingThread[100]; 288 testProgress = 0; 289 for (int i=0;i<parsingThread.length;i++) { 290 if (i<parsingThread.length/2) 291 parsingThread[i] = 292 new ParsingThread(i,testHtml1,parsingThread.length); 293 else 294 parsingThread[i] = 295 new ParsingThread(i,testHtml2,parsingThread.length); 296 297 Thread thread = new Thread (parsingThread[i]); 298 thread.start(); 299 } 300 301 int completionValue = computeCompletionValue(parsingThread.length); 302 303 do { 304 try { 305 Thread.sleep(500); 306 } 307 catch (InterruptedException e) { 308 } 309 } 310 while (testProgress!=completionValue); 311 for (int i=0;i<parsingThread.length;i++) 312 { 313 if (!parsingThread[i].passed()) 314 { 315 assertNotNull("Thread "+i+" link 1",parsingThread[i].getLink1()); 316 assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2()); 317 if (i<parsingThread.length/2) { 318 assertStringEquals( 319 "Thread "+i+", link 1:", 320 "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", 321 parsingThread[i].getLink1().getLink() 322 ); 323 assertStringEquals( 324 "Thread "+i+", link 2:", 325 "http://normallink.com/sometext.html", 326 parsingThread[i].getLink2().getLink() 327 ); 328 } else { 329 assertStringEquals( 330 "Thread "+i+", link 1:", 331 "http://normallink.com/sometext.html", 332 parsingThread[i].getLink1().getLink() 333 ); 334 assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2()); 335 assertStringEquals( 336 "Thread "+i+", link 2:", 337 "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", 338 parsingThread[i].getLink2().getLink() 339 ); 340 } 341 } 342 } 343 } 344 345 private int computeCompletionValue(int numThreads) { 346 return numThreads * (numThreads - 1) / 2; 347 } 348 349 class ParsingThread implements Runnable { 350 Parser mParser; 351 int mId; 352 LinkTag mLink1; 353 LinkTag mLink2; 354 boolean mResult; 355 int mMax; 356 357 ParsingThread(int id, String testHtml, int max) { 358 mId = id; 359 mMax = max; 360 mParser = Parser.createParser(testHtml, null); 361 } 362 363 public void run() { 364 try { 365 mResult = false; 366 Node linkTag [] = mParser.extractAllNodesThatAre(LinkTag.class); 367 mLink1 = (LinkTag)linkTag[0]; 368 mLink2 = (LinkTag)linkTag[1]; 369 if (mId < mMax / 2) { 370 if (mLink1.getLink().equals("/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html") && 371 mLink2.getLink().equals("http://normallink.com/sometext.html")) 372 mResult = true; 373 } else { 374 if (mLink1.getLink().equals("http://normallink.com/sometext.html") && 375 mLink2.getLink().equals("http://normallink.com/sometext.html")) 376 mResult = true; 377 } 378 } 379 catch (ParserException e) { 380 System.err.println("Parser Exception"); 381 e.printStackTrace(); 382 } 383 finally { 384 testProgress += mId; 385 } 386 } 387 388 public LinkTag getLink1() { 389 return (mLink1); 390 } 391 392 public LinkTag getLink2() { 393 return (mLink2); 394 } 395 396 public boolean passed() { 397 return (mResult); 398 } 399 } 400 401 404 public void testStandAloneToHTML () throws ParserException 405 { 406 String html = "<input disabled>"; 407 createParser(html); 408 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 409 parseAndAssertNodeCount (1); 410 assertType ("should be Tag", Tag.class, node[0]); 411 Tag tag = (Tag)node[0]; 412 assertStringEquals ("html", html, tag.toHtml ()); 413 } 414 415 418 public void testMissingValueToHTML () throws ParserException 419 { 420 String html = "<input disabled=>"; 421 createParser(html); 422 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 423 parseAndAssertNodeCount (1); 424 assertType ("should be Tag", Tag.class, node[0]); 425 Tag tag = (Tag)node[0]; 426 assertStringEquals ("html", html, tag.toHtml ()); 427 } 428 } | Popular Tags |