KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tests > parserHelperTests > StringParserTest


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/StringParserTest.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/09/02 02:28:15 $
10
// $Revision: 1.50 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.tests.parserHelperTests;
28
29 import org.htmlparser.PrototypicalNodeFactory;
30 import org.htmlparser.Remark;
31 import org.htmlparser.Text;
32 import org.htmlparser.tags.HeadTag;
33 import org.htmlparser.tags.Html;
34 import org.htmlparser.tags.LinkTag;
35 import org.htmlparser.tags.MetaTag;
36 import org.htmlparser.tests.ParserTestCase;
37 import org.htmlparser.util.ParserException;
38
39 public class StringParserTest extends ParserTestCase {
40
41     static
42     {
43         System.setProperty ("org.htmlparser.tests.parserHelperTests.StringParserTest", "StringParserTest");
44     }
45
46     public StringParserTest(String JavaDoc name) {
47         super(name);
48     }
49
50     /**
51      * The bug being reproduced is this : <BR>
52      * &lt;HTML&gt;&lt;HEAD&gt;&lt;TITLE&gt;Google&lt;/TITLE&gt; <BR>
53      * The above line is incorrectly parsed in that, the text Google is missed.
54      * The presence of this bug is typically when some tag is identified before the string node is. (usually seen
55      * with the end tag). The bug lies in NodeReader.readElement().
56      * Creation date: (6/17/2001 4:01:06 PM)
57      */

58     public void testTextBug1() throws ParserException {
59         createParser("<HTML><HEAD><TITLE>Google</TITLE>");
60         parser.setNodeFactory (new PrototypicalNodeFactory (true));
61         parseAndAssertNodeCount(5);
62         // The fourth node should be a Text- with the text - Google
63
assertTrue("Fourth node should be a Text",node[3] instanceof Text);
64         Text stringNode = (Text)node[3];
65         assertEquals("Text of the Text","Google",stringNode.getText());
66     }
67
68     /**
69      * Test string containing link.
70      * Bug reported by Kaarle Kaila of Nokia<br>
71      * For the following HTML :
72      * view these documents, you must have &lt;A HREF='http://www.adobe.com'&gt;Adobe <br>
73      * Acrobat Reader&lt;/A&gt; installed on your computer.<br>
74      * The first string before the link is not identified, and the space after the link is also not identified
75      * Creation date: (8/2/2001 2:07:32 AM)
76      */

77     public void testTextBug2() throws ParserException {
78         // Register the link scanner
79

80         createParser("view these documents, you must have <A HREF='http://www.adobe.com'>Adobe \n"+
81             "Acrobat Reader</A> installed on your computer.");
82         parseAndAssertNodeCount(3);
83         // The first node should be a Text- with the text - view these documents, you must have
84
assertTrue("First node should be a Text",node[0] instanceof Text);
85         Text stringNode = (Text)node[0];
86         assertEquals("Text of the Text","view these documents, you must have ",stringNode.getText());
87         assertTrue("Second node should be a link node",node[1] instanceof LinkTag);
88         LinkTag linkNode = (LinkTag)node[1];
89         assertEquals("Link is","http://www.adobe.com",linkNode.getLink());
90         assertEquals("Link text is","Adobe \nAcrobat Reader",linkNode.getLinkText());
91
92         assertTrue("Third node should be a string node",node[2] instanceof Text);
93         Text stringNode2 = (Text)node[2];
94         assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText());
95     }
96
97     /**
98      * Bug reported by Roger Sollberger<br>
99      * For the following HTML :
100      * &lt;a HREF="http://asgard.ch"&gt;[&lt; ASGARD &gt;&lt;/a&gt;&lt;br&gt;
101      * The string node is not correctly identified
102      */

103     public void testTagCharsInText() throws ParserException {
104         createParser("<a HREF=\"http://asgard.ch\">[> ASGARD <]</a>");
105         parseAndAssertNodeCount(1);
106         assertTrue("Node identified must be a link tag",node[0] instanceof LinkTag);
107         LinkTag linkTag = (LinkTag) node[0];
108         assertEquals("[> ASGARD <]",linkTag.getLinkText());
109         assertEquals("http://asgard.ch",linkTag.getLink());
110     }
111
112     public void testToPlainTextString() throws ParserException {
113         createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
114         parser.setNodeFactory (new PrototypicalNodeFactory (true));
115         parseAndAssertNodeCount(10);
116         assertTrue("Fourth Node identified must be a string node",node[3] instanceof Text);
117         Text stringNode = (Text)node[3];
118         assertEquals("First String Node","This is the Title",stringNode.toPlainTextString());
119         assertTrue("Eighth Node identified must be a string node",node[7] instanceof Text);
120         stringNode = (Text)node[7];
121         assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString());
122     }
123
124     public void testToHTML() throws ParserException {
125         createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
126         parser.setNodeFactory (new PrototypicalNodeFactory (true));
127         parseAndAssertNodeCount(10);
128         assertTrue("Fourth Node identified must be a string node",node[3] instanceof Text);
129         Text stringNode = (Text)node[3];
130         assertEquals("First String Node","This is the Title",stringNode.toHtml());
131         assertTrue("Eighth Node identified must be a string node",node[7] instanceof Text);
132         stringNode = (Text)node[7];
133         assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toHtml());
134     }
135
136     public void testEmptyLines() throws ParserException {
137         createParser(
138         "David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"+
139         " \n"+
140         "<br>"
141         );
142         parser.setNodeFactory (new PrototypicalNodeFactory (true));
143         parseAndAssertNodeCount(4);
144         assertTrue("Third Node identified must be a string node",node[2] instanceof Text);
145     }
146
147     /**
148      * This is a bug reported by John Zook (586222), where the first few chars
149      * before a remark is being missed, if its on the same line.
150      */

151     public void testStringBeingMissedBug() throws ParserException {
152         createParser(
153         "Before Comment <!-- Comment --> After Comment"
154         );
155         parser.setNodeFactory (new PrototypicalNodeFactory (true));
156         parseAndAssertNodeCount(3);
157         assertTrue("First node should be Text",node[0] instanceof Text);
158         assertTrue("Second node should be Remark",node[1] instanceof Remark);
159         assertTrue("Third node should be Text",node[2] instanceof Text);
160         Text stringNode = (Text)node[0];
161         assertEquals("First String node contents","Before Comment ",stringNode.getText());
162         Text stringNode2 = (Text)node[2];
163         assertEquals("Second String node contents"," After Comment",stringNode2.getText());
164         Remark remarkNode = (Remark)node[1];
165         assertEquals("Remark Node contents"," Comment ",remarkNode.getText());
166
167     }
168
169     /**
170      * Based on a bug report submitted by Cedric Rosa, if the last line contains a single character,
171      * Text does not return the string node correctly.
172      */

173     public void testLastLineWithOneChar() throws ParserException {
174         createParser("a");
175         parser.setNodeFactory (new PrototypicalNodeFactory (true));
176         parseAndAssertNodeCount(1);
177         assertTrue("First node should be Text",node[0] instanceof Text);
178         Text stringNode = (Text)node[0];
179         assertEquals("First String node contents","a",stringNode.getText());
180     }
181
182     public void testStringWithEmptyLine() throws ParserException {
183         String JavaDoc text = "a\n\nb";
184         createParser(text);
185         parser.setNodeFactory (new PrototypicalNodeFactory (true));
186         parseAndAssertNodeCount(1);
187         assertTrue("First node should be Text",node[0] instanceof Text);
188         Text stringNode = (Text)node[0];
189         assertStringEquals("First String node contents",text,stringNode.getText());
190     }
191
192     /**
193      * An attempt to reproduce bug 677176, which passes.
194      * @throws Exception
195      */

196     public void testStringParserBug() throws Exception JavaDoc {
197         createParser(
198             "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " +
199             "Transitional//EN\">" +
200             "<html>" +
201             "<head>" +
202             "<title>Untitled Document</title>" +
203             "<meta http-equiv=\"Content-Type\" content=\"text/html; " +
204             "charset=iso-8859-1\">" +
205             "</head>" +
206             "<script language=\"JavaScript\" type=\"text/JavaScript\">" +
207             "// if this fails, output a 'hello' \n" +
208             "if (true) " +
209             "{ " +
210             "//something good...\n" +
211             "} " +
212             "</script>" +
213             "<body>" +
214             "</body>" +
215             "</html>"
216         );
217         parseAndAssertNodeCount(2);
218         assertTrue(node[1] instanceof Html);
219         Html htmlTag = (Html)node[1];
220         assertTrue("The HTML tag should have 3 nodes", 3 == htmlTag.getChildCount ());
221         assertTrue("The first child should be a HEAD tag",htmlTag.getChild(0) instanceof HeadTag);
222         HeadTag headTag = (HeadTag)htmlTag.getChild(0);
223         assertTrue("The HEAD tag should have 2 nodes", 2 == headTag.getChildCount ());
224         assertTrue("The second child should be a META tag",headTag.getChild(1) instanceof MetaTag);
225         MetaTag metaTag = (MetaTag)headTag.getChild(1);
226
227         assertStringEquals(
228             "content",
229             "text/html; charset=iso-8859-1",
230             metaTag.getAttribute("CONTENT")
231         );
232     }
233
234     public void testStringWithLineBreaks() throws Exception JavaDoc {
235         String JavaDoc text = "Testing &\nRefactoring";
236         createParser(text);
237         parser.setNodeFactory (new PrototypicalNodeFactory (true));
238         parseAndAssertNodeCount(1);
239         assertType("first node",Text.class,node[0]);
240         Text stringNode = (Text)node[0];
241         assertStringEquals("text",text,stringNode.toPlainTextString());
242     }
243
244 }
245
Popular Tags