StringParserTest


1   // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2   // http://sourceforge.org/projects/htmlparser
3   // Copyright (C) 2004 Somik Raha
4   //
5   // Revision Control Information
6   //
7   // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/StringParserTest.java,v $
8   // $Author: derrickoswald $
9   // $Date: 2004/09/02 02:28:15 $
10  // $Revision: 1.50 $
11  //
12  // This library is free software; you can redistribute it and/or
13  // modify it under the terms of the GNU Lesser General Public
14  // License as published by the Free Software Foundation; either
15  // version 2.1 of the License, or (at your option) any later version.
16  //
17  // This library is distributed in the hope that it will be useful,
18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  // Lesser General Public License for more details.
21  //
22  // You should have received a copy of the GNU Lesser General Public
23  // License along with this library; if not, write to the Free Software
24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25  //
26  
27  package org.htmlparser.tests.parserHelperTests;
28  
29  import org.htmlparser.PrototypicalNodeFactory;
30  import org.htmlparser.Remark;
31  import org.htmlparser.Text;
32  import org.htmlparser.tags.HeadTag;
33  import org.htmlparser.tags.Html;
34  import org.htmlparser.tags.LinkTag;
35  import org.htmlparser.tags.MetaTag;
36  import org.htmlparser.tests.ParserTestCase;
37  import org.htmlparser.util.ParserException;
38  
39  public class StringParserTest extends ParserTestCase {
40  
41      static
42      {
43          System.setProperty ("org.htmlparser.tests.parserHelperTests.StringParserTest", "StringParserTest");
44      }
45  
46      public StringParserTest(String   name) {
47          super(name);
48      }
49  
50      /**
51       * The bug being reproduced is this : <BR>
52       * &lt;HTML&gt;&lt;HEAD&gt;&lt;TITLE&gt;Google&lt;/TITLE&gt; <BR>
53       * The above line is incorrectly parsed in that, the text Google is missed.
54       * The presence of this bug is typically when some tag is identified before the string node is. (usually seen
55       * with the end tag). The bug lies in NodeReader.readElement().
56       * Creation date: (6/17/2001 4:01:06 PM)
57       */
58      public void testTextBug1() throws ParserException {
59          createParser("<HTML><HEAD><TITLE>Google</TITLE>");
60          parser.setNodeFactory (new PrototypicalNodeFactory (true));
61          parseAndAssertNodeCount(5);
62          // The fourth node should be a Text-  with the text - Google
63          assertTrue("Fourth node should be a Text",node[3] instanceof Text);
64          Text stringNode = (Text)node[3];
65          assertEquals("Text of the Text","Google",stringNode.getText());
66      }
67  
68      /**
69       * Test string containing link.
70       * Bug reported by Kaarle Kaila of Nokia<br>
71       * For the following HTML :
72       * view these documents, you must have &lt;A HREF='http://www.adobe.com'&gt;Adobe <br>
73       * Acrobat Reader&lt;/A&gt; installed on your computer.<br>
74       * The first string before the link is not identified, and the space after the link is also not identified
75       * Creation date: (8/2/2001 2:07:32 AM)
76       */
77      public void testTextBug2() throws ParserException {
78          // Register the link scanner
79  
80          createParser("view these documents, you must have <A HREF='http://www.adobe.com'>Adobe \n"+
81              "Acrobat Reader</A> installed on your computer.");
82          parseAndAssertNodeCount(3);
83          // The first node should be a Text-  with the text - view these documents, you must have
84          assertTrue("First node should be a Text",node[0] instanceof Text);
85          Text stringNode = (Text)node[0];
86          assertEquals("Text of the Text","view these documents, you must have ",stringNode.getText());
87          assertTrue("Second node should be a link node",node[1] instanceof LinkTag);
88          LinkTag linkNode = (LinkTag)node[1];
89          assertEquals("Link is","http://www.adobe.com",linkNode.getLink());
90          assertEquals("Link text is","Adobe \nAcrobat Reader",linkNode.getLinkText());
91  
92          assertTrue("Third node should be a string node",node[2] instanceof Text);
93          Text stringNode2 = (Text)node[2];
94          assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText());
95      }
96  
97      /**
98       * Bug reported by Roger Sollberger<br>
99       * For the following HTML :
100      * &lt;a HREF="http://asgard.ch"&gt;[&lt; ASGARD &gt;&lt;/a&gt;&lt;br&gt;
101      * The string node is not correctly identified
102      */
103     public void testTagCharsInText() throws ParserException {
104         createParser("<a HREF=\"http://asgard.ch\">[> ASGARD <]</a>");
105         parseAndAssertNodeCount(1);
106         assertTrue("Node identified must be a link tag",node[0] instanceof LinkTag);
107         LinkTag linkTag = (LinkTag) node[0];
108         assertEquals("[> ASGARD <]",linkTag.getLinkText());
109         assertEquals("http://asgard.ch",linkTag.getLink());
110     }
111 
112     public void testToPlainTextString() throws ParserException {
113         createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
114         parser.setNodeFactory (new PrototypicalNodeFactory (true));
115         parseAndAssertNodeCount(10);
116         assertTrue("Fourth Node identified must be a string node",node[3] instanceof Text);
117         Text stringNode = (Text)node[3];
118         assertEquals("First String Node","This is the Title",stringNode.toPlainTextString());
119         assertTrue("Eighth Node identified must be a string node",node[7] instanceof Text);
120         stringNode = (Text)node[7];
121         assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString());
122     }
123 
124     public void testToHTML() throws ParserException {
125         createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
126         parser.setNodeFactory (new PrototypicalNodeFactory (true));
127         parseAndAssertNodeCount(10);
128         assertTrue("Fourth Node identified must be a string node",node[3] instanceof Text);
129         Text stringNode = (Text)node[3];
130         assertEquals("First String Node","This is the Title",stringNode.toHtml());
131         assertTrue("Eighth Node identified must be a string node",node[7] instanceof Text);
132         stringNode = (Text)node[7];
133         assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toHtml());
134     }
135 
136     public void testEmptyLines() throws ParserException {
137         createParser(
138         "David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"+
139         "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      \n"+
140         "<br>"
141         );
142         parser.setNodeFactory (new PrototypicalNodeFactory (true));
143         parseAndAssertNodeCount(4);
144         assertTrue("Third Node identified must be a string node",node[2] instanceof Text);
145     }
146 
147     /**
148      * This is a bug reported by John Zook (586222), where the first few chars
149      * before a remark is being missed, if its on the same line.
150      */
151     public void testStringBeingMissedBug() throws ParserException {
152         createParser(
153         "Before Comment <!-- Comment --> After Comment"
154         );
155         parser.setNodeFactory (new PrototypicalNodeFactory (true));
156         parseAndAssertNodeCount(3);
157         assertTrue("First node should be Text",node[0] instanceof Text);
158         assertTrue("Second node should be Remark",node[1] instanceof Remark);
159         assertTrue("Third node should be Text",node[2] instanceof Text);
160         Text stringNode = (Text)node[0];
161         assertEquals("First String node contents","Before Comment ",stringNode.getText());
162         Text stringNode2 = (Text)node[2];
163         assertEquals("Second String node contents"," After Comment",stringNode2.getText());
164         Remark remarkNode = (Remark)node[1];
165         assertEquals("Remark Node contents"," Comment ",remarkNode.getText());
166 
167     }
168 
169     /**
170      * Based on a bug report submitted by Cedric Rosa, if the last line contains a single character,
171      * Text does not return the string node correctly.
172      */
173     public void testLastLineWithOneChar() throws ParserException {
174         createParser("a");
175         parser.setNodeFactory (new PrototypicalNodeFactory (true));
176         parseAndAssertNodeCount(1);
177         assertTrue("First node should be Text",node[0] instanceof Text);
178         Text stringNode = (Text)node[0];
179         assertEquals("First String node contents","a",stringNode.getText());
180     }
181 
182     public void testStringWithEmptyLine() throws ParserException {
183         String   text = "a\n\nb";
184         createParser(text);
185         parser.setNodeFactory (new PrototypicalNodeFactory (true));
186         parseAndAssertNodeCount(1);
187         assertTrue("First node should be Text",node[0] instanceof Text);
188         Text stringNode = (Text)node[0];
189         assertStringEquals("First String node contents",text,stringNode.getText());
190     }
191 
192     /**
193      * An attempt to reproduce bug 677176, which passes.
194      * @throws Exception
195      */
196     public void testStringParserBug() throws Exception   {
197         createParser(
198             "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " +
199             "Transitional//EN\">" +
200             "<html>" +
201             "<head>" +
202             "<title>Untitled Document</title>" +
203             "<meta http-equiv=\"Content-Type\" content=\"text/html; " +
204             "charset=iso-8859-1\">" +
205             "</head>" +
206             "<script language=\"JavaScript\" type=\"text/JavaScript\">" +
207             "// if this fails, output a 'hello' \n" +
208             "if (true) " +
209             "{ " +
210             "//something good...\n" +
211             "} " +
212             "</script>" +
213             "<body>" +
214             "</body>" +
215             "</html>"
216         );
217         parseAndAssertNodeCount(2);
218         assertTrue(node[1] instanceof Html);
219         Html htmlTag = (Html)node[1];
220         assertTrue("The HTML tag should have 3 nodes", 3 == htmlTag.getChildCount ());
221         assertTrue("The first child should be a HEAD tag",htmlTag.getChild(0) instanceof HeadTag);
222         HeadTag headTag = (HeadTag)htmlTag.getChild(0);
223         assertTrue("The HEAD tag should have 2 nodes", 2 == headTag.getChildCount ());
224         assertTrue("The second child should be a META tag",headTag.getChild(1) instanceof MetaTag);
225         MetaTag metaTag = (MetaTag)headTag.getChild(1);
226 
227         assertStringEquals(
228             "content",
229             "text/html; charset=iso-8859-1",
230             metaTag.getAttribute("CONTENT")
231         );
232     }
233 
234     public void testStringWithLineBreaks() throws Exception   {
235         String   text = "Testing &\nRefactoring";
236         createParser(text);
237         parser.setNodeFactory (new PrototypicalNodeFactory (true));
238         parseAndAssertNodeCount(1);
239         assertType("first node",Text.class,node[0]);
240         Text stringNode = (Text)node[0];
241         assertStringEquals("text",text,stringNode.toPlainTextString());
242     }
243 
244 }
245
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags