KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tests > parserHelperTests > RemarkNodeParserTest


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/RemarkNodeParserTest.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/09/02 02:28:16 $
10
// $Revision: 1.48 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.tests.parserHelperTests;
28
29 import org.htmlparser.PrototypicalNodeFactory;
30 import org.htmlparser.Remark;
31 import org.htmlparser.Tag;
32 import org.htmlparser.Text;
33 import org.htmlparser.tests.ParserTestCase;
34 import org.htmlparser.util.ParserException;
35
36 public class RemarkNodeParserTest extends ParserTestCase
37 {
38     static
39     {
40         System.setProperty ("org.htmlparser.tests.parserHelperTests.RemarkParserTest", "RemarkParserTest");
41     }
42
43     public RemarkNodeParserTest (String JavaDoc name) {
44         super(name);
45     }
46
47     /**
48      * Test unparsed remark node.
49      * The bug being reproduced is this : <BR>
50      * &lt;!-- saved from url=(0022)http://internet.e-mail --&gt;
51      * &lt;HTML&gt;
52      * &lt;HEAD&gt;&lt;META name="title" content="Training Introduction"&gt;
53      * &lt;META name="subject" content=""&gt;
54      * &lt;!--
55          Whats gonna happen now ?
56      * --&gt;
57      * &lt;TEST&gt;
58      * &lt;/TEST&gt;
59      *
60      * The above line is incorrectly parsed - the remark is not correctly identified.
61      * This bug was reported by Serge Kruppa (2002-Feb-08).
62      */

63     public void testRemarkBug() throws ParserException
64     {
65         createParser(
66             "<!-- saved from url=(0022)http://internet.e-mail -->\n"+
67             "<HTML>\n"+
68             "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+
69             "<META name=\"subject\" content=\"\">\n"+
70             "<!--\n"+
71             " Whats gonna happen now ?\n"+
72             "-->\n"+
73             "<TEST>\n"+
74             "</TEST>\n");
75         parser.setNodeFactory (new PrototypicalNodeFactory (true));
76         parseAndAssertNodeCount(15);
77         // The first node should be a Remark
78
assertTrue("First node should be a Remark",node[0] instanceof Remark);
79         Remark Remark = (Remark)node[0];
80         assertEquals("Text of the Remark #1"," saved from url=(0022)http://internet.e-mail ",Remark.getText());
81         // The tenth node should be a Remark
82
assertTrue("Tenth node should be a Remark",node[9] instanceof Remark);
83         Remark = (Remark)node[9];
84         assertEquals("Text of the Remark #10","\n Whats gonna happen now ?\n",Remark.getText());
85     }
86
87     public void testToPlainTextString() throws ParserException {
88         createParser(
89             "<!-- saved from url=(0022)http://internet.e-mail -->\n"+
90             "<HTML>\n"+
91             "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+
92             "<META name=\"subject\" content=\"\">\n"+
93             "<!--\n"+
94             " Whats gonna happen now ?\n"+
95             "-->\n"+
96             "<TEST>\n"+
97             "</TEST>\n");
98         parser.setNodeFactory (new PrototypicalNodeFactory (true));
99         parseAndAssertNodeCount(15);
100         // The first node should be a Remark
101
assertTrue("First node should be a Remark",node[0] instanceof Remark);
102         Remark Remark = (Remark)node[0];
103         assertEquals("Plain Text of the Remark #1"," saved from url=(0022)http://internet.e-mail ",Remark.toPlainTextString());
104         // The tenth node should be a Remark
105
assertTrue("Tenth node should be a Remark",node[9] instanceof Remark);
106         Remark = (Remark)node[9];
107         assertEquals("Plain Text of the Remark #10","\n Whats gonna happen now ?\n",Remark.getText());
108
109     }
110
111     public void testToRawString() throws ParserException {
112         createParser(
113             "<!-- saved from url=(0022)http://internet.e-mail -->\n"+
114             "<HTML>\n"+
115             "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+
116             "<META name=\"subject\" content=\"\">\n"+
117             "<!--\n"+
118             " Whats gonna happen now ?\n"+
119             "-->\n"+
120             "<TEST>\n"+
121             "</TEST>\n");
122         parser.setNodeFactory (new PrototypicalNodeFactory (true));
123         parseAndAssertNodeCount(15);
124         // The first node should be a Remark
125
assertTrue("First node should be a Remark",node[0] instanceof Remark);
126         Remark Remark = (Remark)node[0];
127         assertStringEquals("Raw String of the Remark #1","<!-- saved from url=(0022)http://internet.e-mail -->",Remark.toHtml());
128         // The tenth node should be a Remark
129
assertTrue("Tenth node should be a Remark",node[9] instanceof Remark);
130         Remark = (Remark)node[9];
131         assertStringEquals("Raw String of the Remark #6","<!--\n Whats gonna happen now ?\n-->",Remark.toHtml());
132     }
133
134     public void testNonRemark() throws ParserException {
135         createParser("&nbsp;<![endif]>");
136         parseAndAssertNodeCount(2);
137         // The first node should be a Remark
138
assertTrue("First node should be a string node",node[0] instanceof Text);
139         assertTrue("Second node should be a Tag",node[1] instanceof Tag);
140         Text stringNode = (Text)node[0];
141         Tag tag = (Tag)node[1];
142         assertEquals("Text contents","&nbsp;",stringNode.getText());
143         assertEquals("Tag Contents","![endif]",tag.getText());
144
145     }
146
147     /**
148      * This is the simulation of bug report 586756, submitted
149      * by John Zook.
150      * If all the comment contains is a blank line, it breaks
151      * the state
152      */

153     public void testRemarkWithBlankLine() throws ParserException {
154         createParser("<!--\n"+
155         "\n"+
156         "-->");
157         parser.setNodeFactory (new PrototypicalNodeFactory (true));
158         parseAndAssertNodeCount(1);
159         assertTrue("Node should be a Remark",node[0] instanceof Remark);
160         Remark Remark = (Remark)node[0];
161         assertEquals("Expected contents","\n\n",Remark.getText());
162
163     }
164
165     /**
166      * This is the simulation of a bug report submitted
167      * by Claude Duguay.
168      * If it is a comment with nothing in it, parser crashes
169      */

170     public void testRemarkWithNothing() throws ParserException {
171         createParser("<!-->");
172         parser.setNodeFactory (new PrototypicalNodeFactory (true));
173         parseAndAssertNodeCount(1);
174         assertTrue("Node should be a Remark",node[0] instanceof Remark);
175         Remark Remark = (Remark)node[0];
176         assertEquals("Expected contents","",Remark.getText());
177
178     }
179
180     /**
181      * Test tag within remark.
182      * Reproduction of bug reported by John Zook [594301]
183      * When we have tags like :
184      * &lt;!-- &lt;A&gt; --&gt;
185      * it doesent get parsed correctly
186      */

187     public void testTagWithinRemark() throws ParserException {
188         createParser("<!-- \n"+
189         "<A>\n"+
190         "bcd -->");
191         parser.setNodeFactory (new PrototypicalNodeFactory (true));
192         parseAndAssertNodeCount(1);
193         assertTrue("Node should be a Remark",node[0] instanceof Remark);
194         Remark Remark = (Remark)node[0];
195         assertStringEquals("Expected contents"," \n<A>\nbcd ",Remark.getText());
196
197     }
198
199     /**
200      * Bug reported by John Zook [594301], invalid remark nodes are accepted as remark nodes.
201      * &lt;<br>
202      * -<br>
203      * -<br>
204      * ssd --&gt;<br>
205      * This is not supposed to be a Remark
206      */

207     public void testInvalidTag() throws ParserException {
208         createParser("<!\n"+
209         "-\n"+
210         "-\n"+
211         "ssd -->");
212         parser.setNodeFactory (new PrototypicalNodeFactory (true));
213         parseAndAssertNodeCount(1);
214         assertTrue("Node should be a Tag but was "+node[0],node[0] instanceof Tag);
215         Tag tag = (Tag)node[0];
216         assertStringEquals("Expected contents","!\n"+
217         "-\n"+
218         "-\n"+
219         "ssd --",tag.getText());
220     }
221
222     /**
223      * Bug reported by John Zook [594301]
224      * If dashes exist in a comment, they dont get added to the comment text
225      */

226     public void testDashesInComment() throws ParserException{
227         createParser("<!-- -- -->");
228         parser.setNodeFactory (new PrototypicalNodeFactory (true));
229         parseAndAssertNodeCount(1);
230         assertTrue("Node should be a Remark but was "+node[0],node[0] instanceof Remark);
231         Remark Remark = (Remark)node[0];
232         assertEquals("Remark Node contents"," -- ",Remark.getText());
233     }
234
235
236     // from http://www.w3.org/MarkUp/html-spec/html-spec_3.html
237
//Comments
238
//
239
//To include comments in an HTML document, use a comment declaration.
240
//A comment declaration consists of `<!' followed by zero or more comments
241
//followed by `>'. Each comment starts with `--' and includes all text up to
242
//and including the next occurrence of `--'. In a comment declaration, white
243
//space is allowed after each comment, but not before the first comment. The
244
//entire comment declaration is ignored. (10)
245
//
246
//For example:
247
//
248
//<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
249
//<HEAD>
250
//<TITLE>HTML Comment Example</TITLE>
251
//<!-- Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp -->
252
//<!-- another -- -- comment -->
253
//<!>
254
//</HEAD>
255
//<BODY>
256
//<p> <!- not a comment, just regular old data characters ->
257

258     /**
259      * Test a comment declaration with a comment.
260      */

261     public void testSingleComment ()
262         throws
263             ParserException
264     {
265         createParser(
266               "<HTML>\n"
267             + "<HEAD>\n"
268             + "<TITLE>HTML Comment Test</TITLE>\n"
269             + "</HEAD>\n"
270             + "<BODY>\n"
271             + "<!-- Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp -->\n"
272             + "</BODY>\n"
273             + "</HTML>\n"
274             );
275         parser.setNodeFactory (new PrototypicalNodeFactory (true));
276         parseAndAssertNodeCount(18);
277         assertTrue("Node should be a Remark but was "+node[12],node[12] instanceof Remark);
278         Remark Remark = (Remark)node[12];
279         assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",Remark.getText());
280     }
281
282     /**
283      * Test a comment declaration with two comments.
284      */

285     public void testDoubleComment ()
286         throws
287             ParserException
288     {
289         createParser(
290               "<HTML>\n"
291             + "<HEAD>\n"
292             + "<TITLE>HTML Comment Test</TITLE>\n"
293             + "</HEAD>\n"
294             + "<BODY>\n"
295             + "<!-- another -- -- comment -->\n"
296             + "</BODY>\n"
297             + "</HTML>\n"
298             );
299         parser.setNodeFactory (new PrototypicalNodeFactory (true));
300         parseAndAssertNodeCount(18);
301         assertTrue("Node should be a Remark but was "+node[12],node[12] instanceof Remark);
302         Remark Remark = (Remark)node[12];
303         assertEquals("Remark Node contents"," another -- -- comment ",Remark.getText());
304     }
305
306     /**
307      * Test a comment declaration without any comments.
308      */

309     public void testEmptyComment ()
310         throws
311             ParserException
312     {
313         createParser(
314               "<HTML>\n"
315             + "<HEAD>\n"
316             + "<TITLE>HTML Comment Test 'testEmptyComment'</TITLE>\n"
317             + "</HEAD>\n"
318             + "<BODY>\n"
319             + "<!>\n"
320             + "</BODY>\n"
321             + "</HTML>\n"
322             );
323         parser.setNodeFactory (new PrototypicalNodeFactory (true));
324         parseAndAssertNodeCount(18);
325         assertTrue("Node should be a Remark but was "+node[12],node[12] instanceof Remark);
326         Remark Remark = (Remark)node[12];
327         assertEquals("Remark Node contents","",Remark.getText());
328     }
329
330 // /**
331
// * Test what the specification calls data characters.
332
// * Actually, no browser I've tried handles this correctly (as text).
333
// * Some handle it as a comment and others handle it as a tag.
334
// * So for now we leave this test case out.
335
// */
336
// public void testNotAComment ()
337
// throws
338
// HTMLParserException
339
// {
340
// createParser(
341
// "<HTML>\n"
342
// + "<HEAD>\n"
343
// + "<TITLE>HTML Comment Test 'testNotAComment'</TITLE>\n"
344
// + "</HEAD>\n"
345
// + "<BODY>\n"
346
// + "<!- not a comment, just regular old data characters ->\n"
347
// + "</BODY>\n"
348
// + "</HTML>\n"
349
// );
350
// parseAndAssertNodeCount(10);
351
// assertTrue("Node should not be a Remark",!(node[7] instanceof Remark));
352
// assertTrue("Node should be a HTMLText but was "+node[7],node[7].getType()==HTMLText.TYPE);
353
// HTMLText stringNode = (HTMLText)node[7];
354
// assertEquals("String Node contents","<!- not a comment, just regular old data characters ->\n",stringNode.getText());
355
// }
356

357     /**
358      * Test exclamation mark ending.
359      * Test a comment ending with !--.
360      * See bug #788746 parser crashes on comments like <!-- foobar --!>
361      */

362     public void testExclamationComment ()
363         throws
364             ParserException
365     {
366         createParser (
367               "<html>\n"
368             + "<head>\n"
369             + "<title>foobar</title>\n"
370             + "</head>\n"
371             + "<body>\n"
372             + "<!-- foobar --!>\n"
373             + "</body>\n"
374             + "</html>\n"
375             );
376         parser.setNodeFactory (new PrototypicalNodeFactory (true));
377         parseAndAssertNodeCount (18);
378         assertTrue("Node should be a Remark but was " + node[12], node[12] instanceof Remark);
379         assertStringEquals ("remark text", "<!-- foobar --!>", node[12].toHtml ());
380     }
381
382 }
383
Popular Tags