1 27 package org.htmlparser.tests; 28 29 import java.io.BufferedInputStream ; 30 import java.io.DataInputStream ; 31 import java.io.File ; 32 import java.io.FileInputStream ; 33 import java.io.FileWriter ; 34 import java.io.PrintWriter ; 35 import java.net.HttpURLConnection ; 36 import java.net.URL ; 37 import java.util.Locale ; 38 39 import org.htmlparser.Node; 40 import org.htmlparser.Parser; 41 import org.htmlparser.PrototypicalNodeFactory; 42 import org.htmlparser.Remark; 43 import org.htmlparser.Tag; 44 import org.htmlparser.Text; 45 import org.htmlparser.filters.NodeClassFilter; 46 import org.htmlparser.filters.TagNameFilter; 47 import org.htmlparser.lexer.InputStreamSource; 48 import org.htmlparser.tags.BodyTag; 49 import org.htmlparser.tags.ImageTag; 50 import org.htmlparser.tags.LinkTag; 51 import org.htmlparser.tags.MetaTag; 52 import org.htmlparser.util.DefaultParserFeedback; 53 import org.htmlparser.util.NodeIterator; 54 import org.htmlparser.util.NodeList; 55 import org.htmlparser.util.ParserException; 56 import org.htmlparser.util.SimpleNodeIterator; 57 58 public class ParserTest extends ParserTestCase 59 { 60 static 61 { 62 System.setProperty ("org.htmlparser.tests.ParserTest", "ParserTest"); 63 } 64 65 public ParserTest(String name) { 66 super(name); 67 } 68 public void testElements() throws Exception { 69 StringBuffer hugeData = new StringBuffer (); 70 for (int i=0;i<5001;i++) hugeData.append('a'); 71 createParser(hugeData.toString()); 72 int i = 0; 73 for (NodeIterator e = parser.elements();e.hasMoreNodes();) 74 { 75 node[i++] = e.nextNode(); 76 } 77 assertEquals("There should be 1 node identified",1,i); 78 } 89 90 93 public void testElementsFromWeb() throws Exception { 94 Parser parser; 95 try { 96 parser = new Parser("http://www.google.com"); 97 } 98 catch (Exception e ){ 99 throw new ParserException("You must be offline! This test needs you to be connected to the internet.",e); 100 } 101 102 Node[] node = new Node[500]; 103 int i = 0; 104 for (NodeIterator e = parser.elements();e.hasMoreNodes();) 105 { 106 node[i++] = e.nextNode(); 107 } 108 int cnt = i; 109 parser.reset (); 110 i = 0; 112 for (NodeIterator e = parser.elements();e.hasMoreNodes();) 113 { 114 node[i++] = e.nextNode(); 115 } 116 assertEquals("There should be "+cnt+" nodes identified (second call to parser.elements())",cnt,i); 117 } 118 119 211 public void testPOST() throws Exception 212 { final String number = "2708"; 214 final String street = "Kelly"; 215 final String type = "Avenue"; 216 final String city = "Ottawa"; 217 final String province = "ON"; 218 final String postal_code = "K2B 7V4"; 220 221 Parser parser; 222 URL url; 223 HttpURLConnection connection; 224 StringBuffer buffer; 225 PrintWriter out; 226 boolean pass; 227 NodeIterator enumeration; 228 Node node; 229 Text string; 230 231 try 232 { 233 url = new URL ("http://www.canadapost.ca/tools/pcl/bin/cp_search_response-e.asp"); 234 connection = (HttpURLConnection )url.openConnection (); 235 connection.setRequestMethod ("POST"); 236 connection.setRequestProperty ("Referer", "http://www.canadapost.ca/tools/pcl/bin/default-e.asp"); 237 connection.setDoOutput (true); 238 connection.setDoInput (true); 239 connection.setUseCaches (false); 240 buffer = new StringBuffer (1024); 241 buffer.append ("app_language="); 242 buffer.append ("english"); 243 buffer.append ("&"); 244 buffer.append ("app_response_start_row_number="); 245 buffer.append ("1"); 246 buffer.append ("&"); 247 buffer.append ("app_response_rows_max="); 248 buffer.append ("9"); 249 buffer.append ("&"); 250 buffer.append ("app_source="); 251 buffer.append ("quick"); 252 buffer.append ("&"); 253 buffer.append ("query_source="); 254 buffer.append ("q"); 255 buffer.append ("&"); 256 buffer.append ("name="); 257 buffer.append ("&"); 258 buffer.append ("postal_code="); 259 buffer.append ("&"); 260 buffer.append ("directory_area_name="); 261 buffer.append ("&"); 262 buffer.append ("delivery_mode="); 263 buffer.append ("&"); 264 buffer.append ("Suffix="); 265 buffer.append ("&"); 266 buffer.append ("street_direction="); 267 buffer.append ("&"); 268 buffer.append ("installation_type="); 269 buffer.append ("&"); 270 buffer.append ("delivery_number="); 271 buffer.append ("&"); 272 buffer.append ("installation_name="); 273 buffer.append ("&"); 274 buffer.append ("unit_numbere="); 275 buffer.append ("&"); 276 buffer.append ("app_state="); 277 buffer.append ("production"); 278 buffer.append ("&"); 279 buffer.append ("street_number="); 280 buffer.append (number); 281 buffer.append ("&"); 282 buffer.append ("street_name="); 283 buffer.append (street); 284 buffer.append ("&"); 285 buffer.append ("street_type="); 286 buffer.append (type); 287 buffer.append ("&"); 288 buffer.append ("test="); 289 buffer.append ("&"); 290 buffer.append ("city="); 291 buffer.append (city); 292 buffer.append ("&"); 293 buffer.append ("prov="); 294 buffer.append (province); 295 buffer.append ("&"); 296 buffer.append ("Search="); 297 out = new PrintWriter (connection.getOutputStream ()); 298 out.print (buffer); 299 out.close (); 300 parser = new Parser (connection); 301 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 302 } 303 catch (Exception e) 304 { 305 throw new ParserException ("You must be offline! This test needs you to be connected to the internet.", e); 306 } 307 308 pass = false; 309 for (enumeration = parser.elements (); enumeration.hasMoreNodes ();) 310 { 311 node = enumeration.nextNode (); 312 if (node instanceof Text) 313 { 314 string = (Text)node; 315 if (-1 != string.getText ().indexOf (postal_code)) 316 pass = true; 317 } 318 } 319 assertTrue("POST operation failed.", pass); 320 } 321 322 325 public void testFile () 326 { 327 String path; 328 File file; 329 PrintWriter out; 330 Parser parser; 331 Node nodes[]; 332 int i; 333 NodeIterator enumeration; 334 335 path = System.getProperty ("user.dir"); 336 if (!path.endsWith (File.separator)) 337 path += File.separator; 338 file = new File (path + "delete_me.html"); 339 try 340 { 341 out = new PrintWriter (new FileWriter (file)); 342 out.println ("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">"); 343 out.println ("<html>"); 344 out.println ("<head>"); 345 out.println ("<title>test</title>"); 346 out.println ("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">"); 347 out.println ("</head>"); 348 out.println ("<body>"); 349 out.println ("This is a test page "); 350 out.println ("</body>"); 351 out.println ("</html>"); 352 out.close (); 353 parser = new Parser (file.getAbsolutePath (), new DefaultParserFeedback(DefaultParserFeedback.QUIET)); 354 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 355 nodes = new Node[30]; 356 i = 0; 357 for (enumeration = parser.elements (); enumeration.hasMoreNodes ();) 358 { 359 nodes[i] = enumeration.nextNode (); 360 i++; 361 } 362 assertEquals("Expected nodes",20,i); 363 } 364 catch (Exception e) 365 { 366 fail (e.toString ()); 367 } 368 finally 369 { 370 file.delete (); 371 } 372 } 373 374 378 public void testFileDelete () 379 { 380 String path; 381 File file; 382 PrintWriter out; 383 Parser parser; 384 NodeIterator enumeration; 385 386 path = System.getProperty ("user.dir"); 387 if (!path.endsWith (File.separator)) 388 path += File.separator; 389 file = new File (path + "delete_me.html"); 390 try 391 { 392 out = new PrintWriter (new FileWriter (file)); 393 out.println ("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">"); 394 out.println ("<html>"); 395 out.println ("<head>"); 396 out.println ("<title>test</title>"); 397 out.println ("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">"); 398 out.println ("</head>"); 399 out.println ("<body>"); 400 out.println ("This is a test page "); 401 out.println ("</body>"); 402 out.println ("</html>"); 403 for (int i = 0; i < InputStreamSource.BUFFER_SIZE; i++) 405 out.println (); 406 out.close (); 407 parser = new Parser (file.getAbsolutePath (), new DefaultParserFeedback(DefaultParserFeedback.QUIET)); 408 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 409 enumeration = parser.elements (); 410 enumeration.nextNode (); 411 if (-1 != System.getProperty ("os.name").indexOf("Windows")) 412 assertTrue ("file deleted with more available", !file.delete ()); 414 parser = null; 416 enumeration = null; 417 System.gc (); 418 System.runFinalization (); 419 assertTrue ("file not deleted after destroy", file.delete ()); 420 } 421 catch (Exception e) 422 { 423 fail (e.toString ()); 424 } 425 finally 426 { 427 file.delete (); 428 } 429 } 430 431 436 public void testHTTPCharset () 437 { 438 Parser parser; 439 try 440 { 441 parser = new Parser("http://www.ibm.com/jp/", Parser.noFeedback); 442 assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS")); 443 } 444 catch (ParserException e) 445 { 446 fail ("could not open http://www.ibm.com/jp/"); 447 } 448 } 449 450 456 public void testHTMLCharset () 457 { 458 Parser parser; 459 NodeIterator enumeration; 460 461 try 462 { 463 parser = new Parser("http://www.sony.co.jp", Parser.noFeedback); 464 assertEquals("Character set by default is ISO-8859-1", "ISO-8859-1", parser.getEncoding ()); 465 enumeration = parser.elements(); 466 while (enumeration.hasMoreNodes ()) 468 if (enumeration.nextNode () instanceof BodyTag) 469 break; 470 assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS")); 471 } 472 catch (ParserException e) 473 { 474 fail ("could not open http://www.sony.co.jp"); 475 } 476 } 477 478 483 public void testSwitchCharset () throws ParserException 484 { 485 Parser parser; 486 String url = "http://htmlparser.sourceforge.net/test/gb2312Charset.html"; 487 int i; 488 Node[] nodes; 489 490 parser = new Parser(url); 491 parser.setNodeFactory (new PrototypicalNodeFactory (new MetaTag ())); 492 i = 0; 493 nodes = new Node[30]; 494 for (NodeIterator e = parser.elements(); e.hasMoreNodes();) 495 nodes[i++] = e.nextNode(); 496 assertEquals ("Expected nodes", 23, i); 497 } 498 499 508 public void testDoubleQuotedCharset () throws ParserException 509 { 510 Parser parser; 511 String url = "http://htmlparser.sourceforge.net/test/DoublequotedCharset.html"; 512 513 parser = new Parser(url); 514 for (NodeIterator e = parser.elements();e.hasMoreNodes();) 515 e.nextNode(); 516 assertTrue ("Wrong encoding", parser.getEncoding ().equals ("UTF-8")); 517 } 518 519 528 public void testSingleQuotedCharset () throws ParserException 529 { 530 Parser parser; 531 String url = "http://htmlparser.sourceforge.net/test/SinglequotedCharset.html"; 532 533 parser = new Parser(url); 534 for (NodeIterator e = parser.elements();e.hasMoreNodes();) 535 e.nextNode(); 536 assertTrue ("Wrong encoding", parser.getEncoding ().equals ("UTF-8")); 537 } 538 539 573 public void testNullUrl() { 574 try 575 { 576 new Parser("http://none.existant.url.org", Parser.noFeedback); 577 assertTrue("Should have thrown an exception!",false); 578 } 579 catch (ParserException e) 580 { 581 } 583 } 584 585 public void testURLWithSpaces() throws ParserException{ 586 Parser parser; 587 String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; 588 589 parser = new Parser(url); 590 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 591 Node node [] = new Node[30]; 592 int i = 0; 593 for (NodeIterator e = parser.elements();e.hasMoreNodes();) { 594 node[i] = e.nextNode(); 595 i++; 596 597 } 598 assertEquals("Expected nodes",20,i); 599 } 600 601 public void testLinkCollection() throws ParserException { 602 createParser( 603 "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"><title>Google</title><style><!--\n"+ 604 "body,td,a,p,.h{font-family:arial,sans-serif;} .h{font-size: 20px;} .h{color:} .q{text-decoration:none; color:#0000cc;}\n"+ 605 "//--></style>\n"+ 606 "<script>\n"+ 607 "<!--\n"+ 608 "function sf(){document.f.q.focus();}\n"+ 609 "function c(p){var f=document.f;if (f.action) {f.action = 'http://'+p;f.submit();return false;}return true;}\n"+ 610 "// -->\n"+ 611 "</script>\n"+ 612 "</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf()><center><table border=0 cellspacing=0 cellpadding=0><tr><td><img SRC=\"images/logo.gif\" width=276 height=110 alt=\"Google\"></td></tr></table><br>\n"+ 613 "<table border=0 cellspacing=0 cellpadding=0>" + 614 "<tr>" + 615 "<td width=15> </td>" + 616 "<td id=0 bgcolor=#3366cc align=center width=95 nowrap>" + 617 "<font color=#ffffff size=-1><b>Web</b></font>" + 618 "</td>" + 619 "<td width=15> </td>" + 620 "<td id=1 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/imghp');\" style=cursor:pointer;cursor:hand;><a id=1a class=q HREF=\"/imghp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/imghp');\"><font size=-1>Images</font></a></td><td width=15> </td><td id=2 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/grphp');\" style=cursor:pointer;cursor:hand;><a id=2a class=q HREF=\"/grphp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/grphp');\"><font size=-1>Groups</font></a></td><td width=15> </td><td id=3 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/dirhp');\" style=cursor:pointer;cursor:hand;><a id=3a class=q HREF=\"/dirhp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/dirhp');\"><font size=-1>Directory</font></a></td><td width=15> </td><td id=4 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/nwshp');\" style=cursor:pointer;cursor:hand;><a id=4a class=q HREF=\"/nwshp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/nwshp');\"><font size=-1><nobr>News-<font color=red>New!</font></nobr></font></a></td><td width=15> </td></tr><tr><td colspan=12 bgcolor=#3366cc><img width=1 height=1 alt=\"\">" + 621 "</td>" + 622 "</tr>" + 623 "</table>" + 624 "<br>" + 625 "<form action=\"/search\" name=f>" + 626 "<table cellspacing=0 cellpadding=0>" + 627 "<tr>" + 628 "<td width=75> </td>" + 629 "<td align=center>" + 630 "<input type=hidden name=hl value=en>" + 631 "<input type=hidden name=ie value=\"UTF-8\">" + 632 "<input type=hidden name=oe value=\"UTF-8\">" + 633 "<input maxLength=256 size=55 name=q value=\"\"><br>" + 634 "<input type=submit value=\"Google Search\" name=btnG>" + 635 "<input type=submit value=\"I'm Feeling Lucky\" name=btnI>" + 636 "</td>" + 637 "<td valign=top nowrap>" + 638 "<font size=-2> • <a HREF=/advanced_search?hl=en>Advanced Search</a>" + 639 "<br> • <a HREF=/preferences?hl=en>Preferences</a>" + 640 "<br> • <a HREF=/language_tools?hl=en>Language Tools</a>" + 641 "</font>" + 642 "</td>" + 643 "</tr>" + 644 "</table>" + 645 "</form><br>\n"+ 646 "<br><font size=-1><a HREF=\"/ads/\">Advertise with Us</a> - <a HREF=\"/services/\">Search Solutions</a> - <a HREF=\"/options/\">Services & Tools</a> - <a HREF=/about.html>Jobs, Press, & Help</a><span id=hp style=\"behavior:url(#default#homepage)\"></span>\n"+ 647 "<script>\n"+ 648 "if (!hp.isHomePage('http://www.google.com/')) {document.write(\"<p><a HREF=\"/mgyhp.html\" onClick=\"style.behavior='url(#default#homepage)';setHomePage('http://www.google.com/');\">Make Google Your Homepage!</a>\");}\n"+ 649 "</script></font>\n"+ 650 "<p><font size=-2>©2002 Google</font><font size=-2> - Searching 3,083,324,652 web pages</font></center></body></html>\n" 651 ); 652 NodeList collectionList = new NodeList(); 653 NodeClassFilter filter = new NodeClassFilter (LinkTag.class); 654 for (NodeIterator e = parser.elements();e.hasMoreNodes();) 655 e.nextNode().collectInto(collectionList,filter); 656 assertEquals("Size of collection vector should be 11",11,collectionList.size()); 657 for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { 659 Node node = e.nextNode(); 660 assertTrue("Only links should have been parsed",node instanceof LinkTag); 661 } 662 } 663 public void testImageCollection() throws ParserException { 664 createParser( 665 "<html>\n"+ 666 "<head>\n"+ 667 "<meta name=\"generator\" content=\"Created Using Yahoo! PageBuilder 2.60.24\">\n"+ 668 "</head>\n"+ 669 "<body bgcolor=\"#FFFFFF\" link=\"#0000FF\" vlink=\"#FF0000\" text=\"#000000\"\n"+ 670 " onLoad=\"window.onresize=new Function('if (navigator.appVersion==\'Netscape\') history.go(0);');\">\n"+ 671 "<div id=\"layer0\" style=\"position:absolute;left:218;top:40;width:240;height:26;\">\n"+ 672 "<table width=240 height=26 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ 673 "<td><b><font size=\"+2\"><span style=\"font-size:24\">NISHI-HONGWAN-JI</span></font></b></td>\n"+ 674 "</tr></table></div>\n"+ 675 "<div id=\"layer1\" style=\"position:absolute;left:75;top:88;width:542;height:83;\">\n"+ 676 "<table width=542 height=83 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ 677 "<td><span style=\"font-size:14\">The Nihi Hongwanj-ji temple is very traditional, very old, and very beautiful. This is the place that we stayed on our first night in Kyoto. We then attended the morning prayer ceremony, at 6:30 am. Staying here costed us 7,500 yen, which was inclusive of dinner and breakfast, and usage of the o-furo (public bath). Felt more like a luxury hotel than a temple.</span></td>\n"+ 678 "</tr></table></div>\n"+ 679 "<div id=\"layer2\" style=\"position:absolute;left:144;top:287;width:128;height:96;\">\n"+ 680 "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ 681 "<td><a HREF=\"nishi-hongwanji1.html\"><img height=96 width=128 SRC=\"nishi-hongwanji1-thumb.jpg\" border=0 ></a></td>\n"+ 682 "</tr></table></div>\n"+ 683 "<div id=\"layer3\" style=\"position:absolute;left:415;top:285;width:128;height:96;\">\n"+ 684 "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ 685 "<td><a HREF=\"nishi-hongwanji3.html\"><img height=96 width=128 SRC=\"nishi-hongwanji2-thumb.jpg\" border=0 ></a></td>\n"+ 686 "</tr></table></div>\n"+ 687 "<div id=\"layer4\" style=\"position:absolute;left:414;top:182;width:128;height:96;\">\n"+ 688 "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ 689 "<td><a HREF=\"higashi-hongwanji.html\"><img height=96 width=128 SRC=\"higashi-hongwanji-thumb.jpg\" border=0 ></a></td>\n"+ 690 "</tr></table></div>\n"+ 691 "<div id=\"layer5\" style=\"position:absolute;left:78;top:396;width:530;height:49;\">\n"+ 692 "<table width=530 height=49 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ 693 "<td><span style=\"font-size:14\">Click on the pictures to see the full-sized versions. The picture at the top right corner is taken in Higashi-Hongwanji. Nishi means west, and Higashi means east. These two temples are adjacent to each other and represent two different Buddhist sects.</span></td>\n"+ 694 "</tr></table></div>\n"+ 695 "<div id=\"layer6\" style=\"position:absolute;left:143;top:180;width:128;height:102;\">\n"+ 696 "<table width=128 height=102 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ 697 "<td><a HREF=\"nishi-hongwanji4.html\"><img height=102 width=128 SRC=\"nishi-hongwanji4-thumb.jpg\" border=0 ></a></td>\n"+ 698 "</tr></table></div>\n"+ 699 "<div id=\"layer7\" style=\"position:absolute;left:280;top:235;width:124;height:99;\">\n"+ 700 "<table width=124 height=99 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ 701 "<td><a HREF=\"nishi-hongwanji-lodging.html\"><img height=99 width=124 SRC=\"nishi-hongwanji-lodging-thumb.jpg\" border=0 ></a></td>\n"+ 702 "</tr></table></div>\n"+ 703 "</body>\n"+ 704 "</html>"); 705 NodeList collectionList = new NodeList(); 706 TagNameFilter filter = new TagNameFilter ("IMG"); 707 for (NodeIterator e = parser.elements();e.hasMoreNodes();) 708 e.nextNode().collectInto(collectionList,filter); 709 assertEquals("Size of collection vector should be 5",5,collectionList.size()); 710 for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { 712 Node node = e.nextNode(); 713 assertTrue("Only images should have been parsed",node instanceof ImageTag); 714 } 715 } 716 717 720 public void testOutOfMemory () throws Exception 721 { 722 createParser ( 723 "<html><head></head>\n" 724 + "<body>\n" 725 + "<table>\n" 726 + "<tr>\n" 727 + " <td><img SRC=\"foo.jpg\" alt=\"f's b\"><font\n" 728 + " size=1>blah</font>\n" 729 + "</td>\n" 730 + "</tr>\n" 731 + "</table>\n" 732 + "</body></html>\n"); 733 for (NodeIterator e = parser.elements();e.hasMoreNodes();) { 734 e.nextNode(); 735 } 736 } 737 738 741 public void testEmbeddedQuoteSplit () throws Exception 742 { 743 createParser ( 744 "<html><head></head>\n" 745 + "<body>\n" 746 + "<table>\n" 747 + "<tr><td><img SRC=\"x\" alt=\"f's b\"><font\n" 748 + "size=1>blah</font></td></tr>\n" 749 + "</table>\n" 750 + "</body></html>"); 751 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 752 int i = 0; 753 for (NodeIterator e = parser.elements();e.hasMoreNodes();) 754 { 755 Node node = e.nextNode(); 756 if (10 == i) 757 { 758 assertTrue ("not a tag", node instanceof Tag); 759 assertTrue ("ALT attribute incorrect", ((Tag)node).getAttribute ("ALT").equals ("f's b")); 760 } 761 i++; 762 } 763 assertEquals("Expected nodes",21,i); 764 } 765 766 769 public void testSetInputHTML () throws Exception 770 { 771 String html; 772 String path; 773 File file; 774 PrintWriter out; 775 Node[] nodes; 776 777 html = "<html></html>"; 778 createParser (html); 779 path = System.getProperty ("user.dir"); 780 if (!path.endsWith (File.separator)) 781 path += File.separator; 782 file = new File (path + "delete_me.html"); 783 try 784 { 785 out = new PrintWriter (new FileWriter (file)); 786 out.print ("<html>\r\n"); 787 out.print ("<head>\r\n"); 788 out.print ("<!-- BEGIN TYPE -->\r\n"); 789 out.print ("<!-- NAVIGATION -->\r\n"); 790 out.print ("<!-- END TYPE -->\r\n"); 791 out.print ("<!-- BEGIN TITLE -->\r\n"); 792 out.print ("<title>Einstiegsseite</title>\r\n"); 793 out.print ("<!-- END TITLE -->\r\n"); 794 out.print ("</head>\r\n"); 795 out.print ("<body>\r\n"); 796 out.print ("<ul>\r\n"); 797 out.print ("<li>\r\n"); 798 out.print ("<!-- BEGIN ITEM -->\r\n"); 799 out.print ("<!-- BEGIN REF -->\r\n"); 800 out.print ("<a HREF=\"kapitel1/index.html\">\r\n"); 801 out.print ("<!-- END REF -->\r\n"); 802 out.print ("<!-- BEGIN REFTITLE -->\r\n"); 803 out.print ("Kapitel 1\r\n"); 804 out.print ("<!-- END REFTITLE -->\r\n"); 805 out.print ("</a>\r\n"); 806 out.print ("<!-- END ITEM -->\r\n"); 807 out.print ("</li>\r\n"); 808 out.print ("<li>\r\n"); 809 out.print ("<!-- BEGIN ITEM -->\r\n"); 810 out.print ("<!-- BEGIN REF -->\r\n"); 811 out.print ("<a HREF=\"kapitel2/index.html\">\r\n"); 812 out.print ("<!-- END REF -->\r\n"); 813 out.print ("<!-- BEGIN REFTITLE -->\r\n"); 814 out.print ("Kapitel 2\r\n"); 815 out.print ("<!-- END REFTITLE -->\r\n"); 816 out.print ("</a>\r\n"); 817 out.print ("<!-- END ITEM -->\r\n"); 818 out.print ("</li>\r\n"); 819 out.print ("<li>\r\n"); 820 out.print ("<!-- BEGIN ITEM -->\r\n"); 821 out.print ("<!-- BEGIN REF -->\r\n"); 822 out.print ("<a HREF=\"kapitel3/index.html\">\r\n"); 823 out.print ("<!-- END REF -->\r\n"); 824 out.print ("<!-- BEGIN REFTITLE -->\r\n"); 825 out.print ("Kapitel 3\r\n"); 826 out.print ("<!-- END REFTITLE -->\r\n"); 827 out.print ("</a>\r\n"); 828 out.print ("<!-- END ITEM -->\r\n"); 829 out.print ("</li>\r\n"); 830 out.print ("</ul>\r\n"); 831 out.print ("</body>\r\n"); 832 out.print ("</html>"); 833 out.close (); 834 DataInputStream stream = new DataInputStream ( 835 new BufferedInputStream (new FileInputStream (file))); 836 byte[] buffer = new byte[(int)file.length ()]; 837 stream.readFully (buffer); 838 html = new String (buffer); 839 try 840 { 841 parser.setInputHTML (html); 842 nodes = parser.extractAllNodesThatAre (LinkTag.class); 843 } 844 catch (ParserException e) 845 { 846 e.printStackTrace (); 847 nodes = new Node[0]; 848 } 849 assertTrue ("node count", 3 == nodes.length); 850 } 851 catch (Exception e) 852 { 853 fail (e.toString ()); 854 } 855 finally 856 { 857 file.delete (); 858 } 859 } 860 861 864 public void testXMLTypeToString () throws Exception 865 { 866 String guts; 867 String output; 868 869 guts = "TD width=\"69\"/"; 870 createParser ("<" + guts + ">"); 871 parseAndAssertNodeCount (1); 872 output = node[0].toString (); assertTrue ("bad toString()", -1 != output.indexOf (guts)); 874 } 875 876 879 public void testDifferentLocale () throws Exception 880 { 881 String html; 882 Locale original; 883 884 html = "<title>This is supposedly Turkish.</title>"; 885 original = Locale.getDefault (); 886 try 887 { 888 Locale.setDefault (new Locale ("tr")); createParser (html); 890 parseAndAssertNodeCount (1); 891 assertStringEquals ("html", html, node[0].toHtml ()); 892 } 893 finally 894 { 895 Locale.setDefault (original); 896 } 897 } 898 899 902 public void testSetStringText () throws Exception 903 { 904 String text; 905 String html; 906 String newtext; 907 String newhtml; 908 Node txt; 909 910 text = "This is just text."; 911 html = "<body>" + text + "</body>"; 912 newtext = "This is different text."; 913 newhtml = "<body>" + newtext + "</body>"; 914 createParser (html); 915 parseAndAssertNodeCount (1); 916 assertStringEquals ("html wrong", html, node[0].toHtml ()); 917 assertTrue ("wrong number of children", 1 == node[0].getChildren ().size ()); 918 assertTrue ("string node expected", node[0].getChildren ().elementAt (0) instanceof Text); 919 txt = node[0].getChildren ().elementAt (0); 920 assertStringEquals ("string html wrong", text, txt.toHtml ()); 921 assertStringEquals ("string contents wrong", text, txt.getText ()); 922 assertTrue ("toString wrong", txt.toString ().endsWith (text)); 923 txt.setText (newtext); 924 assertStringEquals ("html wrong", newhtml, node[0].toHtml ()); 925 assertStringEquals ("new string html wrong", newtext, txt.toHtml ()); 926 assertStringEquals ("new string contents wrong", newtext, txt.getText ()); 927 assertTrue ("toString wrong", txt.toString ().endsWith (newtext)); 928 } 929 930 933 public void testSetRemarkText () throws Exception 934 { 935 String text; 936 String remark; 937 String html; 938 String newtext; 939 String newremark; 940 String newhtml; 941 Node rem; 942 943 text = " This is a remark. "; 944 remark = "<!--" + text + "-->"; 945 html = "<body>" + remark + "</body>"; 946 newtext = " This is a different remark. "; 947 newremark = "<!--" + newtext + "-->"; 948 newhtml = "<body>" + newremark + "</body>"; 949 createParser (html); 950 parseAndAssertNodeCount (1); 951 assertStringEquals ("html wrong", html, node[0].toHtml ()); 952 assertTrue ("wrong number of children", 1 == node[0].getChildren ().size ()); 953 assertTrue ("remark node expected", node[0].getChildren ().elementAt (0) instanceof Remark); 954 rem = node[0].getChildren ().elementAt (0); 955 assertStringEquals ("remark html wrong", remark, rem.toHtml ()); 956 assertStringEquals ("remark contents wrong", text, rem.getText ()); 957 assertTrue ("toString wrong", rem.toString ().endsWith (text)); 958 rem.setText (newtext); 959 assertStringEquals ("html wrong", newhtml, node[0].toHtml ()); 960 assertStringEquals ("new remark html wrong", newremark, rem.toHtml ()); 961 assertStringEquals ("new remark contents wrong", newtext, rem.getText ()); 962 assertTrue ("toString wrong", rem.toString ().endsWith (newtext)); 963 rem.setText (newremark); 964 assertStringEquals ("html wrong", newhtml, node[0].toHtml ()); 965 assertStringEquals ("new remark html wrong", newremark, rem.toHtml ()); 966 assertStringEquals ("new remark contents wrong", newtext, rem.getText ()); 967 assertTrue ("toString wrong", rem.toString ().endsWith (newtext)); 968 } 969 970 public void testFixSpaces () throws ParserException 971 { 972 String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; 973 parser = new Parser (url); 974 assertEquals("Expected","http://htmlparser.sourceforge.net/test/This%20is%20a%20Test%20Page.html", parser.getURL ()); 975 } 976 } 977 | Popular Tags |