TagTest


1   // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2   // http://sourceforge.org/projects/htmlparser
3   // Copyright (C) 2004 Somik Raha
4   //
5   // Revision Control Information
6   //
7   // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java,v $
8   // $Author: derrickoswald $
9   // $Date: 2004/09/02 02:28:14 $
10  // $Revision: 1.62 $
11  //
12  // This library is free software; you can redistribute it and/or
13  // modify it under the terms of the GNU Lesser General Public
14  // License as published by the Free Software Foundation; either
15  // version 2.1 of the License, or (at your option) any later version.
16  //
17  // This library is distributed in the hope that it will be useful,
18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  // Lesser General Public License for more details.
21  //
22  // You should have received a copy of the GNU Lesser General Public
23  // License along with this library; if not, write to the Free Software
24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25  //
26  
27  package org.htmlparser.tests.tagTests;
28  
29  import org.htmlparser.Attribute;
30  
31  import org.htmlparser.Node;
32  import org.htmlparser.PrototypicalNodeFactory;
33  import org.htmlparser.Tag;
34  import org.htmlparser.Text;
35  import org.htmlparser.tags.BodyTag;
36  import org.htmlparser.tags.Div;
37  import org.htmlparser.tags.Html;
38  import org.htmlparser.tags.LinkTag;
39  import org.htmlparser.tests.ParserTestCase;
40  import org.htmlparser.util.NodeIterator;
41  import org.htmlparser.util.ParserException;
42  
43  public class TagTest extends ParserTestCase
44  {
45      static
46      {
47          System.setProperty ("org.htmlparser.tests.tagTests.TagTest", "TagTest");
48      }
49  
50      private static final boolean JSP_TESTS_ENABLED = false;
51  
52      public TagTest(String   name) {
53          super(name);
54      }
55  
56      /**
57       * The bug being reproduced is this : <BR>
58       * &lt;BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR>
59       * vLink=#551a8b&gt;
60       * The above line is incorrectly parsed in that, the BODY tag is not identified.
61       */
62      public void testBodyTagBug1() throws ParserException {
63          String   body = "<BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc "
64              + "onload=setfocus() text=#000000\nvLink=#551a8b>";
65          createParser(body);
66          parseAndAssertNodeCount(1);
67          // The node should be a body Tag
68          assertTrue("Node should be a BodyTag",node[0] instanceof BodyTag);
69          BodyTag tag = (BodyTag)node[0];
70          String   text = tag.toHtml ();
71          assertEquals("Contents of the tag",body + "</BODY>",text);
72      }
73  
74      /**
75       * The following should be identified as a tag : <BR>
76       *  &lt;MYTAG abcd\n"+
77       *      "efgh\n"+
78       *      "ijkl\n"+
79       *      "mnop&gt;
80       * Creation date: (6/17/2001 5:27:42 PM)
81       */
82      public void testLargeTagBug() throws ParserException {
83          String   mytag = "MYTAG abcd\n"+
84              "efgh\n"+
85              "ijkl\n"+
86              "mnop";
87          createParser(
88              "<" + mytag + ">"
89          );
90          parseAndAssertNodeCount(1);
91          // The node should be an Tag
92          assertTrue("Node should be a Tag",node[0] instanceof Tag);
93          Tag tag = (Tag)node[0];
94          assertEquals("Contents of the tag",mytag,tag.getText());
95  
96  
97      }
98      /**
99       * Bug reported by Gordon Deudney 2002-03-15
100      * Nested JSP Tags were not working
101      */
102     public void testNestedTags() throws ParserException
103     {
104         if (JSP_TESTS_ENABLED)
105         {
106             String   s = "input type=\"text\" value=\"<%=\"test\"%>\" name=\"text\"";
107             String   line = "<"+s+">";
108             createParser(line);
109             parseAndAssertNodeCount(1);
110             assertTrue("The node found should have been an Tag",node[0] instanceof Tag);
111             Tag tag = (Tag) node[0];
112             assertEquals("Tag Contents",s,tag.getText());
113         }
114     }
115 
116     /**
117      * Test parseParameter method
118      * Created by Kaarle Kaila (august 2001)
119      * the tag name is here G
120      */
121     public void testParseParameter3() throws ParserException {
122         Tag tag;
123         Node node=null;
124         String   lin1 = "<DIV class=\"userData\" id=\"oLayout\" name=\"oLayout\"></DIV>";
125         createParser(lin1);
126         NodeIterator en = parser.elements();
127 
128         try {
129 
130             if (en.hasMoreNodes()) {
131                 node = en.nextNode();
132 
133                 tag = (Tag)node;
134                 String   classValue= tag.getAttribute ("CLASS");
135                 assertEquals ("The class value should be ","userData",classValue);
136             }
137 
138         }
139         catch (ClassCastException   ce) {
140             fail("Bad class element = " + node.getClass().getName());
141         }
142     }
143 
144     /**
145      * Test parseParameter method
146      * Created by Kaarle Kaila (august 2001)
147      * the tag name is here A (and should be eaten up by linkScanner)
148      */
149     public void testParseParameterA() throws ParserException {
150         Tag tag;
151         Tag etag;
152         Text snode;
153         Node node=null;
154         String   lin1 = "<A HREF=\"http://www.iki.fi/kaila\" myParameter yourParameter=\"Kaarle Kaaila\">Kaarle's homepage</A><p>Paragraph</p>";
155         createParser(lin1);
156         NodeIterator en = parser.elements();
157         String   a,href,myValue,nice;
158 
159         try {
160 
161             if (en.hasMoreNodes()) {
162                 node = en.nextNode();
163 
164                 tag = (Tag)node;
165                 a = ((Attribute)(tag.getAttributesEx ().elementAt (0))).getName ();
166                 href = tag.getAttribute ("HREF");
167                 myValue = tag.getAttribute ("MYPARAMETER");
168                 nice = tag.getAttribute ("YOURPARAMETER");
169                 assertEquals ("Link tag (A)","A",a);
170                 assertEquals ("href value","http://www.iki.fi/kaila",href);
171                 assertEquals ("myparameter value",null,myValue);
172                 assertEquals ("yourparameter value","Kaarle Kaaila",nice);
173             }
174             if (!(node instanceof LinkTag)) {
175                 // linkscanner has eaten up this piece
176                 if ( en.hasMoreNodes()) {
177                     node = en.nextNode();
178                     snode = (Text)node;
179                     assertEquals("Value of element","Kaarle's homepage",snode.getText());
180                 }
181 
182                 if (en.hasMoreNodes()) {
183                     node = en.nextNode();
184                     etag = (Tag)node;
185                     assertEquals("endtag of link","/A", etag.getText());
186                 }
187             }
188             // testing rest
189             if (en.hasMoreNodes()) {
190                 node = en.nextNode();
191 
192                 tag = (Tag)node;
193                 assertEquals("following paragraph begins",tag.getText(),"p");
194             }
195             if (en.hasMoreNodes()) {
196                 node = en.nextNode();
197                 snode = (Text)node;
198                 assertEquals("paragraph contents","Paragraph",snode.getText());
199             }
200             if (en.hasMoreNodes()) {
201                 node = en.nextNode();
202                 etag = (Tag)node;
203                 assertEquals("paragrapg endtag","/p",etag.getText());
204             }
205 
206         }
207         catch (ClassCastException   ce) {
208             fail("Bad class element = " + node.getClass().getName());
209         }
210     }
211 
212     /**
213      * Test parseParameter method
214      * Created by Kaarle Kaila (august 2001)
215      * the tag name is here G
216      */
217     public void testParseParameterG() throws ParserException{
218         Tag tag;
219         Tag etag;
220         Text snode;
221         Node node=null;
222         String   lin1 = "<G HREF=\"http://www.iki.fi/kaila\" myParameter yourParameter=\"Kaila\">Kaarle's homepage</G><p>Paragraph</p>";
223         createParser(lin1);
224         NodeIterator en = parser.elements();
225         String   a,href,myValue,nice;
226 
227         try {
228 
229             if (en.hasMoreNodes()) {
230                 node = en.nextNode();
231 
232                 tag = (Tag)node;
233                 a = ((Attribute)(tag.getAttributesEx ().elementAt (0))).getName ();
234                 href = tag.getAttribute ("HREF");
235                 myValue = tag.getAttribute ("MYPARAMETER");
236                 nice = tag.getAttribute ("YOURPARAMETER");
237                 assertEquals ("The tagname should be G",a,"G");
238                 assertEquals ("Check the http address",href,"http://www.iki.fi/kaila");
239                 assertEquals ("myValue is not null",myValue,null);
240                 assertEquals ("The second parameter value",nice,"Kaila");
241             }
242             if (en.hasMoreNodes()) {
243                 node = en.nextNode();
244                 snode = (Text)node;
245                 assertEquals("The text of the element",snode.getText(),"Kaarle's homepage");
246             }
247 
248             if (en.hasMoreNodes()) {
249                 node = en.nextNode();
250                 etag = (Tag)node;
251                 assertEquals("Endtag is G","/G", etag.getText());
252             }
253             // testing rest
254             if (en.hasMoreNodes()) {
255                 node = en.nextNode();
256 
257                 tag = (Tag)node;
258                 assertEquals("Follow up by p-tag","p", tag.getText());
259             }
260             if (en.hasMoreNodes()) {
261                 node = en.nextNode();
262                 snode = (Text)node;
263                 assertEquals("Verify the paragraph text","Paragraph", snode.getText());
264             }
265             if (en.hasMoreNodes()) {
266                 node = en.nextNode();
267                 etag = (Tag)node;
268                 assertEquals("Still patragraph endtag","/p", etag.getText());
269             }
270 
271         } catch (ClassCastException   ce) {
272             fail("Bad class element = " + node.getClass().getName());
273         }
274     }
275 
276 
277    /**
278     * Test parseParameter method
279     * Created by Kaarle Kaila (august 2002)
280     * the tag name is here A (and should be eaten up by linkScanner)
281     * Tests elements where = sign is surrounded by spaces
282     */
283     public void testParseParameterSpace() throws ParserException{
284         Tag tag;
285         Tag etag;
286         Text snode;
287         Node node=null;
288         String   lin1 = "<A yourParameter = \"Kaarle\">Kaarle's homepage</A>";
289         createParser(lin1);
290         NodeIterator en = parser.elements();
291         String   a,nice;
292 
293         try {
294 
295             if (en.hasMoreNodes()) {
296                 node = en.nextNode();
297 
298                 tag = (Tag)node;
299                 a = ((Attribute)(tag.getAttributesEx ().elementAt (0))).getName ();
300                 nice = tag.getAttribute ("YOURPARAMETER");
301                 assertEquals ("Link tag (A)",a,"A");
302                 assertEquals ("yourParameter value","Kaarle",nice);
303             }
304             if (!(node instanceof LinkTag)) {
305                 // linkscanner has eaten up this piece
306                 if ( en.hasMoreNodes()) {
307                     node = en.nextNode();
308                     snode = (Text)node;
309                     assertEquals("Value of element","Kaarle's homepage",snode.getText());
310                 }
311 
312                 if (en.hasMoreNodes()) {
313                     node = en.nextNode();
314                     etag = (Tag)node;
315                     assertEquals("Still patragraph endtag","/A",etag.getText());
316                 }
317             }
318             // testing rest
319 
320         } catch (ClassCastException   ce) {
321             fail("Bad class element = " + node.getClass().getName());
322         }
323     }
324 
325     /**
326      * Reproduction of a bug reported by Annette Doyle
327      * This is actually a pretty good example of dirty html - we are in a fix
328      * here, bcos the font tag (the first one) has an erroneous inverted comma. In Tag,
329      * we ignore anything in inverted commas, and dont if its outside. This kind of messes
330      * up our parsing almost completely.
331      */
332     public void testStrictParsing() throws ParserException {
333         String   testHTML =
334         "<div align=\"center\">" +
335             "<font face=\"Arial,\"helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\">" +
336                 "<a HREF=\"/index.html\" link=\"#000000\" vlink=\"#000000\"><font color=\"#FFFFFF\">Home</font></a>\n"+
337                 "<a HREF=\"/cia/notices.html\" link=\"#000000\" vlink=\"#000000\"><font color=\"#FFFFFF\">Notices</font></a>\n"+
338                 "<a HREF=\"/cia/notices.html#priv\" link=\"#000000\" vlink=\"#000000\"><font color=\"#FFFFFF\">Privacy</font></a>\n"+
339                 "<a HREF=\"/cia/notices.html#sec\" link=\"#000000\" vlink=\"#000000\"><font color=\"#FFFFFF\">Security</font></a>\n"+
340                 "<a HREF=\"/cia/contact.htm\" link=\"#000000\" vlink=\"#000000\"><font color=\"#FFFFFF\">Contact Us</font></a>\n"+
341                 "<a HREF=\"/cia/sitemap.html\" link=\"#000000\" vlink=\"#000000\"><font color=\"#FFFFFF\">Site Map</font></a>\n"+
342                 "<a HREF=\"/cia/siteindex.html\" link=\"#000000\" vlink=\"#000000\"><font color=\"#FFFFFF\">Index</font></a>\n"+
343                 "<a HREF=\"/search\" link=\"#000000\" vlink=\"#000000\"><font color=\"#FFFFFF\">Search</font></a>\n"+
344             "</font>" +
345         "</div>";
346 
347         createParser(testHTML,"http://www.cia.gov");
348         parseAndAssertNodeCount(1);
349         // Check the tags
350         assertType("node",Div.class,node[0]);
351         Div div = (Div)node[0];
352         Tag fontTag = (Tag)div.children().nextNode();
353         // an alternate interpretation: assertEquals("Second tag should be corrected","font face=\"Arial,helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",fontTag.getText());
354         assertEquals("Second tag should be corrected","font face=\"Arial,\"helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",fontTag.getText());
355         assertEquals("font sans-serif parameter","sans-serif",fontTag.getAttribute("SANS-SERIF"));
356         // an alternate interpretation: assertEquals("font face parameter","Arial,helvetica,",table.get("FACE"));
357         // another: assertEquals("font face parameter","Arial,\"helvetica,",table.get("FACE"));
358         assertEquals("font face parameter","Arial,",fontTag.getAttribute("FACE"));
359     }
360 
361     public void testToHTML() throws ParserException {
362         String   tag1 = "<MYTAG abcd\n"+
363             "efgh\n"+
364             "ijkl\n"+
365             "mnop>";
366         String   testHTML = tag1 +
367             "\n"+
368             "<TITLE>Hello</TITLE>\n"+
369             "<A HREF=\"Hello.html\">Hey</A>";
370         createParser(testHTML);
371         parser.setNodeFactory (new PrototypicalNodeFactory (true));
372         parseAndAssertNodeCount(9);
373         // The node should be an Tag
374         assertTrue("1st Node should be a Tag",node[0] instanceof Tag);
375         Tag tag = (Tag)node[0];
376         assertStringEquals("toHTML()",tag1,tag.toHtml());
377         assertTrue("3rd Node should be a Tag",node[2] instanceof Tag);
378         assertTrue("5th Node should be a Tag",node[6] instanceof Tag);
379         tag = (Tag)node[2];
380         assertEquals("Raw String of the tag","<TITLE>",tag.toHtml());
381         tag = (Tag)node[6];
382         assertEquals("Raw String of the tag","<A HREF=\"Hello.html\">",tag.toHtml());
383     }
384 
385 
386     /**
387      * Test parseParameter method
388      * Created by Kaarle Kaila (22 Oct 2001)
389      * This test just wants the text in the element
390      */
391     public void testWithoutParseParameter() throws ParserException{
392         Node node;
393         String   testHTML = "<A HREF=\"http://www.iki.fi/kaila\" myParameter yourParameter=\"Kaarle\">Kaarle's homepage</A><p>Paragraph</p>";
394         createParser(testHTML);
395         NodeIterator en = parser.elements();
396         String   result="";
397         while (en.hasMoreNodes()) {
398             node = en.nextNode();
399             result += node.toHtml();
400         }
401         assertStringEquals("Check collected contents to original", testHTML, result);
402     }
403 
404     /**
405     * Test parseParameter method
406     * Created by Kaarle Kaila (09 Jan 2003)
407     * This test just wants the text in the element
408     */
409    public void testEmptyTagParseParameter() throws ParserException{
410        Node node;
411        String   testHTML = "<INPUT name=\"foo\" value=\"foobar\" type=\"text\" />";
412 
413        createParser(testHTML);
414        NodeIterator en = parser.elements();
415        String   result="";
416        while (en.hasMoreNodes()) {
417            node = en.nextNode();
418            result = node.toHtml();
419        }
420        assertStringEquals("Check collected contents to original", testHTML, result);
421     }
422 
423 
424     public void testStyleSheetTag() throws ParserException{
425         String   testHTML1 = new String  ("<link rel SRC=\"af.css\"/>");
426         createParser(testHTML1,"http://www.google.com/test/index.html");
427         parseAndAssertNodeCount(1);
428         assertTrue("Node should be a tag",node[0] instanceof Tag);
429         Tag tag = (Tag)node[0];
430         assertEquals("StyleSheet Source","af.css",tag.getAttribute("src"));
431     }
432 
433     /**
434      * Bug report by Cedric Rosa, causing null pointer exceptions when encountering a broken tag,
435      * and if this has no further lines to parse
436      */
437     public void testBrokenTag() throws ParserException{
438         String   testHTML1 = new String  ("<br");
439         createParser(testHTML1);
440         parseAndAssertNodeCount(1);
441         assertTrue("Node should be a tag",node[0] instanceof Tag);
442         Tag tag = (Tag)node[0];
443         assertEquals("Node contents","br",tag.getText());
444     }
445 
446     public void testTagInsideTag() throws ParserException {
447         String   testHTML = new String  ("<META name=\"Hello\" value=\"World </I>\">");
448         createParser(testHTML);
449         parseAndAssertNodeCount(1);
450         assertTrue("Node should be a tag",node[0] instanceof Tag);
451         Tag tag = (Tag)node[0];
452         assertEquals("Node contents","META name=\"Hello\" value=\"World </I>\"",tag.getText());
453         assertEquals("Meta Content","World </I>",tag.getAttribute("value"));
454 
455     }
456 
457     public void testIncorrectInvertedCommas() throws ParserException {
458         String   content = "DORIER-APPRILL E., GERVAIS-LAMBONY P., MORICONI-EBRARD F., NAVEZ-BOUCHANINE F.";
459         String   author = "Author";
460         String   guts = "META NAME=\"" + author + "\" CONTENT = \"" + content + "\"";
461         String   testHTML = "<" + guts + ">";
462         createParser(testHTML);
463         parseAndAssertNodeCount(1);
464         assertTrue("Node should be a tag",node[0] instanceof Tag);
465         Tag tag = (Tag)node[0];
466         assertStringEquals("Node contents",guts,tag.getText());
467         assertEquals("Meta Content",author,tag.getAttribute("NAME"));
468         
469         //
470         // Big todo here:
471         // This involves a change in the lexer state machine from
472         // six states to probably 8, or perhaps a half dozen 'substates'
473         // on state zero...
474         // we shy away from this at the moment:
475 //        assertEquals("Meta Content",content,tag.getAttribute("CONTENT"));
476     }
477 
478     public void testIncorrectInvertedCommas2() throws ParserException {
479         String   guts = "META NAME=\"Keywords\" CONTENT=Moscou, modernisation, politique urbaine, sp\u00e9cificit\u00e9s culturelles, municipalit\u00e9, Moscou, modernisation, urban politics, cultural specificities, municipality\"";
480         String   testHTML = "<" + guts + ">";
481         createParser(testHTML);
482         parseAndAssertNodeCount(1);
483         assertTrue("Node should be a tag",node[0] instanceof Tag);
484         Tag tag = (Tag)node[0];
485         assertStringEquals("Node contents",guts,tag.getText());
486     }
487 
488     public void testIncorrectInvertedCommas3() throws ParserException {
489         String   testHTML = new String  ("<meta name=\"description\" content=\"Une base de donn\u00e9es sur les th\u00e8ses de g\"ographie soutenues en France \">");
490         createParser(testHTML);
491         parseAndAssertNodeCount(1);
492         assertTrue("Node should be a tag",node[0] instanceof Tag);
493         Tag tag = (Tag)node[0];
494         assertEquals("Node contents","meta name=\"description\" content=\"Une base de donn\u00e9es sur les th\u00e8ses de g\"ographie soutenues en France \"",tag.getText());
495     }
496 
497     /**
498      * Ignore empty tags.
499      */
500     public void testEmptyTag() throws ParserException {
501         String   testHTML = "<html><body><>text</body></html>";
502         createParser(testHTML);
503         parser.setNodeFactory (new PrototypicalNodeFactory (true));
504         parseAndAssertNodeCount(5);
505         assertTrue("Third node should be a string node",node[2] instanceof Text);
506         Text stringNode = (Text)node[2];
507         assertEquals("Third node has incorrect text","<>text",stringNode.getText());
508     }
509 
510     /**
511      * Ignore empty tags.
512      */
513     public void testEmptyTag2() throws ParserException {
514         String   testHTML = "<html><body>text<></body></html>";
515         createParser(testHTML);
516         parser.setNodeFactory (new PrototypicalNodeFactory (true));
517         parseAndAssertNodeCount(5);
518         assertTrue("Third node should be a string node",node[2] instanceof Text);
519         Text stringNode = (Text)node[2];
520         assertEquals("Third node has incorrect text","text<>",stringNode.getText());
521     }
522 
523     /**
524      * Ignore empty tags.
525      */
526     public void testEmptyTag3() throws ParserException {
527         String   testHTML = "<html><body>text<>text</body></html>";
528         createParser(testHTML);
529         parseAndAssertNodeCount(1);
530         assertTrue("Only node should be an HTML node",node[0] instanceof Html);
531         Html html = (Html)node[0];
532         assertTrue("HTML node should have one child",1 == html.getChildCount ());
533         assertTrue("Only node should be an BODY node",html.getChild(0) instanceof BodyTag);
534         BodyTag body = (BodyTag)html.getChild(0);
535         assertTrue("BODY node should have one child",1 == body.getChildCount ());
536         assertTrue("Only node should be a string node",body.getChild(0) instanceof Text);
537         Text stringNode = (Text)body.getChild(0);
538         assertEquals("Third node has incorrect text","text<>text",stringNode.getText());
539     }
540 
541     /**
542      * Ignore empty tags.
543      */
544     public void testEmptyTag4() throws ParserException {
545         String   testHTML = "<html><body>text\n<>text</body></html>";
546         createParser(testHTML);
547         parseAndAssertNodeCount(1);
548         assertTrue("Only node should be an HTML node",node[0] instanceof Html);
549         Html html = (Html)node[0];
550         assertTrue("HTML node should have one child",1 == html.getChildCount ());
551         assertTrue("Only node should be an BODY node",html.getChild(0) instanceof BodyTag);
552         BodyTag body = (BodyTag)html.getChild(0);
553         assertTrue("BODY node should have one child",1 == body.getChildCount ());
554         assertTrue("Only node should be a string node",body.getChild(0) instanceof Text);
555         Text stringNode = (Text)body.getChild(0);
556         String   actual = stringNode.getText();
557         assertEquals("Third node has incorrect text","text\n<>text",actual);
558     }
559 
560     /**
561      * Ignore empty tags.
562      */
563     public void testEmptyTag5() throws ParserException {
564         String   testHTML = "<html><body>text<\n>text</body></html>";
565         createParser(testHTML);
566         parseAndAssertNodeCount(1);
567         assertTrue("Only node should be an HTML node",node[0] instanceof Html);
568         Html html = (Html)node[0];
569         assertTrue("HTML node should have one child",1 == html.getChildCount ());
570         assertTrue("Only node should be an BODY node",html.getChild(0) instanceof BodyTag);
571         BodyTag body = (BodyTag)html.getChild(0);
572         assertTrue("BODY node should have one child",1 == body.getChildCount ());
573         assertTrue("Only node should be a string node",body.getChild(0) instanceof Text);
574         Text stringNode = (Text)body.getChild(0);
575         String   actual = stringNode.getText();
576         assertEquals("Third node has incorrect text","text<\n>text",actual);
577     }
578 
579     /**
580      * Ignore empty tags.
581      */
582     public void testEmptyTag6() throws ParserException {
583         String   testHTML = "<html><body>text<>\ntext</body></html>";
584         createParser(testHTML);
585         parseAndAssertNodeCount(1);
586         assertTrue("Only node should be an HTML node",node[0] instanceof Html);
587         Html html = (Html)node[0];
588         assertTrue("HTML node should have one child",1 == html.getChildCount ());
589         assertTrue("Only node should be an BODY node",html.getChild(0) instanceof BodyTag);
590         BodyTag body = (BodyTag)html.getChild(0);
591         assertTrue("BODY node should have one child",1 == body.getChildCount ());
592         assertTrue("Only node should be a string node",body.getChild(0) instanceof Text);
593         Text stringNode = (Text)body.getChild(0);
594         String   actual = stringNode.getText();
595         assertEquals("Third node has incorrect text","text<>\ntext",actual);
596     }
597 
598     public void testAttributesReconstruction() throws ParserException {
599         String   expectedHTML = "<TEXTAREA name=\"JohnDoe\" >";
600         String   testHTML = expectedHTML + "</TEXTAREA>";
601         createParser(testHTML);
602         parser.setNodeFactory (new PrototypicalNodeFactory (true));
603         parseAndAssertNodeCount(2);
604         assertTrue("First node should be an HTMLtag",node[0] instanceof Tag);
605         Tag htmlTag = (Tag)node[0];
606         assertStringEquals("Expected HTML",expectedHTML,htmlTag.toHtml());
607     }
608 
609     public void testIgnoreState() throws ParserException
610     {
611         String   testHTML = "<A \n"+
612         "HREF=\"/a?b=c>d&e=f&g=h&i=http://localhost/Testing/Report1.html\">20020702 Report 1</A>";
613         createParser(testHTML);
614         parseAndAssertNodeCount(1);
615         assertTrue("Node should be a tag",node[0] instanceof Tag);
616         Tag tag = (Tag)node[0];
617         String   href = tag.getAttribute("HREF");
618         assertStringEquals("Resolved Link","/a?b=c>d&e=f&g=h&i=http://localhost/Testing/Report1.html",href);
619     }
620 
621     /**
622      * See bug #726913 toHtml() method incomplete
623      */
624     public void testSetText() throws ParserException
625     {
626         String   testHTML = "<LABEL ID=\"JohnDoe\">John Doe</LABEL>";
627         createParser(testHTML);
628         parseAndAssertNodeCount(1);
629         org.htmlparser.tags.LabelTag htmlTag = (org.htmlparser.tags.LabelTag)node[0];
630         String   expectedHTML = "<LABEL ID=\"JohnDoe\">John Doe</LABEL>";
631         assertStringEquals("Expected HTML",expectedHTML,htmlTag.toHtml());
632         assertStringEquals("Expected HTML","John Doe",htmlTag.getLabel());
633 
634         ((org.htmlparser.Text)((org.htmlparser.tags.CompositeTag)htmlTag).getChild(0)).setText("Jane Doe");
635         expectedHTML = "<LABEL ID=\"JohnDoe\">Jane Doe</LABEL>";
636         assertStringEquals("Expected HTML",expectedHTML,htmlTag.toHtml());
637         assertStringEquals("Expected HTML","Jane Doe",htmlTag.getLabel());
638     }
639 
640     /**
641      * From oyoaha
642      */
643     public void testTabText () throws ParserException
644     {
645         String   testHTML = "<a\thref=\"http://cbc.ca\">";
646         createParser (testHTML);
647         parseAndAssertNodeCount (1);
648         assertTrue("Node should be a LinkTag", node[0] instanceof LinkTag);
649         LinkTag tag = (LinkTag)node[0];
650         String   href = tag.getAttribute ("HREF");
651         assertStringEquals("Resolved Link","http://cbc.ca", href);
652     }
653 
654     /**
655      * See bug #741026 registerScanners() mangles output HTML badly.
656      */
657     public void testHTMLOutputOfDifficultLinksWithRegisterScanners () throws ParserException
658     {
659         // straight out of a real world example
660         String   html = "<a HREF=http://www.google.com/webhp?hl=en>";
661         createParser (html);
662         String   temp = null;
663         for (NodeIterator e = parser.elements (); e.hasMoreNodes ();)
664         {
665             Node newNode = e.nextNode ();  // Get the next HTML Node
666             temp = newNode.toHtml();
667         }
668         assertNotNull ("No nodes", temp);
669         assertStringEquals ("Incorrect HTML output: ", html + "</a>", temp);
670     }
671 
672     /**
673      * See bug #740411 setParsed() has no effect on output.
674      */
675     public void testParameterChange() throws ParserException
676     {
677         createParser("<TABLE BORDER=0>");
678         parser.setNodeFactory (new PrototypicalNodeFactory (true));
679         parseAndAssertNodeCount(1);
680         // the node should be a Tag
681         assertTrue("Node should be a Tag",node[0] instanceof Tag);
682         Tag tag = (Tag)node[0];
683         assertEquals("Initial text should be","TABLE BORDER=0",tag.getText ());
684         tag.setAttribute ("BORDER","\"1\"");
685         assertEquals("HTML should be","<TABLE BORDER=\"1\">", tag.toHtml ());
686     }
687 }
688
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Free Books Free Magazines
Popular Tags