KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tests > lexerTests > TagTests


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/TagTests.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2005/03/13 14:51:46 $
10
// $Revision: 1.13 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.tests.lexerTests;
28
29 import org.htmlparser.Node;
30 import org.htmlparser.Parser;
31 import org.htmlparser.PrototypicalNodeFactory;
32 import org.htmlparser.Tag;
33 import org.htmlparser.tags.LinkTag;
34 import org.htmlparser.tags.MetaTag;
35 import org.htmlparser.tests.ParserTestCase;
36 import org.htmlparser.util.ParserException;
37
38 public class TagTests extends ParserTestCase {
39     static
40     {
41         System.setProperty ("org.htmlparser.tests.lexerTests.TagTests", "TagTests");
42     }
43
44     private static final String JavaDoc TEST_HTML = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" +
45         "<!-- Server: sf-web2 -->\n" +
46         "<html lang=\"en\">\n" +
47         " <head><link rel=\"stylesheet\" type=\"text/css\" HREF=\"http://sourceforge.net/cssdef.php\">\n" +
48         " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n" +
49         " <TITLE>SourceForge.net: Modify: 711073 - HTMLTagParser not threadsafe as a static variable in Tag</TITLE>\n" +
50         " <SCRIPT language=\"JavaScript\" type=\"text/javascript\">\n" +
51         " <!--\n" +
52         " function help_window(helpurl) {\n" +
53         " HelpWin = window.open( 'http://sourceforge.net' + helpurl,'HelpWindow','scrollbars=yes,resizable=yes,toolbar=no,height=400,width=400');\n" +
54         " }\n" +
55         " // -->\n" +
56         " </SCRIPT>\n" +
57         " <link rel=\"SHORTCUT ICON\" HREF=\"/images/favicon.ico\">\n" +
58         "<!-- This is temp javascript for the jump button. If we could actually have a jump script on the server side that would be ideal -->\n" +
59         "<script language=\"JavaScript\" type=\"text/javascript\">\n" +
60         "<!--\n" +
61         " function jump(targ,selObj,restore){ //v3.0\n" +
62         " if (selObj.options[selObj.selectedIndex].value)\n" +
63         " eval(targ+\".location='\"+selObj.options[selObj.selectedIndex].value+\"'\");\n" +
64         " if (restore) selObj.selectedIndex=0;\n" +
65         " }\n" +
66         " //-->\n" +
67         "</script>\n" +
68         "<a HREF=\"http://normallink.com/sometext.html\">\n" +
69         "<style type=\"text/css\">\n" +
70         "<!--\n" +
71         "A:link { text-decoration:none }\n" +
72         "A:visited { text-decoration:none }\n" +
73         "A:active { text-decoration:none }\n" +
74         "A:hover { text-decoration:underline; color:#0066FF; }\n" +
75         "-->\n" +
76         "</style>\n" +
77         "</head>\n" +
78         "<body bgcolor=\"#FFFFFF\" text=\"#000000\" leftmargin=\"0\" topmargin=\"0\" marginwidth=\"0\" marginheight=\"0\" link=\"#003399\" vlink=\"#003399\" alink=\"#003399\">\n";
79     private int testProgress;
80
81     public TagTests (String JavaDoc name) {
82         super(name);
83     }
84
85     public void testTagWithQuotes() throws Exception JavaDoc {
86         String JavaDoc testHtml =
87         "<img SRC=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">";
88
89         createParser(testHtml);
90         parser.setNodeFactory (new PrototypicalNodeFactory (true));
91         parseAndAssertNodeCount(1);
92         assertType("should be Tag",Tag.class,node[0]);
93         Tag tag = (Tag)node[0];
94         assertStringEquals("alt","Marshall Field's",tag.getAttribute("ALT"));
95         assertStringEquals(
96             "html",
97             testHtml,
98             tag.toHtml()
99         );
100     }
101
102     public void testEmptyTag() throws Exception JavaDoc
103     {
104         String JavaDoc html = "<custom/>";
105         createParser(html);
106         parser.setNodeFactory (new PrototypicalNodeFactory (true));
107         parseAndAssertNodeCount(1);
108         assertType("should be Tag",Tag.class,node[0]);
109         Tag tag = (Tag)node[0];
110         assertStringEquals("tag name","CUSTOM",tag.getTagName());
111         assertTrue("empty tag",tag.isEmptyXmlTag());
112         assertStringEquals(
113             "html",
114             html,
115             tag.toHtml()
116         );
117     }
118
119     public void testTagWithCloseTagSymbolInAttribute() throws ParserException {
120         createParser("<tag att=\"a>b\">");
121         parser.setNodeFactory (new PrototypicalNodeFactory (true));
122         parseAndAssertNodeCount(1);
123         assertType("should be Tag",Tag.class,node[0]);
124         Tag tag = (Tag)node[0];
125         assertStringEquals("attribute","a>b",tag.getAttribute("att"));
126     }
127
128     public void testTagWithOpenTagSymbolInAttribute() throws ParserException {
129         createParser("<tag att=\"a<b\">");
130         parser.setNodeFactory (new PrototypicalNodeFactory (true));
131         parseAndAssertNodeCount(1);
132         assertType("should be Tag",Tag.class,node[0]);
133         Tag tag = (Tag)node[0];
134         assertStringEquals("attribute","a<b",tag.getAttribute("att"));
135     }
136
137     public void testTagWithSingleQuote() throws ParserException {
138         String JavaDoc html = "<tag att=\'a<b\'>";
139         createParser(html);
140         parser.setNodeFactory (new PrototypicalNodeFactory (true));
141         parseAndAssertNodeCount(1);
142         assertType("should be Tag",Tag.class,node[0]);
143         Tag tag = (Tag)node[0];
144         assertStringEquals("html",html,tag.toHtml());
145         assertStringEquals("attribute","a<b",tag.getAttribute("att"));
146     }
147
148     /**
149      * The following multi line test cases are from
150      * bug #725749 Parser does not handle < and > in multi-line attributes
151      * submitted by Joe Robins (zorblak)
152      */

153     public void testMultiLine1 () throws ParserException
154     {
155         String JavaDoc html = "<meta name=\"foo\" content=\"foo<bar>\">";
156         createParser(html);
157         parseAndAssertNodeCount (1);
158         assertType ("should be MetaTag", MetaTag.class, node[0]);
159         Tag tag = (Tag)node[0];
160         assertStringEquals ("html",html, tag.toHtml ());
161         String JavaDoc attribute1 = tag.getAttribute ("NAME");
162         assertStringEquals ("attribute 1","foo", attribute1);
163         String JavaDoc attribute2 = tag.getAttribute ("CONTENT");
164         assertStringEquals ("attribute 2","foo<bar>", attribute2);
165     }
166
167     public void testMultiLine2 () throws ParserException
168     {
169         String JavaDoc html = "<meta name=\"foo\" content=\"foo<bar\">";
170         createParser(html);
171         parseAndAssertNodeCount (1);
172         assertType ("should be MetaTag", MetaTag.class, node[0]);
173         Tag tag = (Tag)node[0];
174         assertStringEquals ("html",html, tag.toHtml ());
175         String JavaDoc attribute1 = tag.getAttribute ("NAME");
176         assertStringEquals ("attribute 1","foo", attribute1);
177         String JavaDoc attribute2 = tag.getAttribute ("CONTENT");
178         assertStringEquals ("attribute 2","foo<bar", attribute2);
179     }
180
181     public void testMultiLine3 () throws ParserException
182     {
183         String JavaDoc html = "<meta name=\"foo\" content=\"foobar>\">";
184         createParser(html);
185         parseAndAssertNodeCount (1);
186         assertType ("should be MetaTag", MetaTag.class, node[0]);
187         Tag tag = (Tag)node[0];
188         assertStringEquals ("html",html, tag.toHtml ());
189         String JavaDoc attribute1 = tag.getAttribute ("NAME");
190         assertStringEquals ("attribute 1","foo", attribute1);
191         String JavaDoc attribute2 = tag.getAttribute ("CONTENT");
192         assertStringEquals ("attribute 2","foobar>", attribute2);
193     }
194
195     public void testMultiLine4 () throws ParserException
196     {
197         String JavaDoc html = "<meta name=\"foo\" content=\"foo\nbar>\">";
198         createParser(html);
199         parseAndAssertNodeCount (1);
200         assertType ("should be MetaTag", MetaTag.class, node[0]);
201         Tag tag = (Tag)node[0];
202         assertStringEquals ("html",html, tag.toHtml ());
203         String JavaDoc attribute1 = tag.getAttribute ("NAME");
204         assertStringEquals ("attribute 1","foo", attribute1);
205         String JavaDoc attribute2 = tag.getAttribute ("CONTENT");
206         assertStringEquals ("attribute 2","foo\nbar>", attribute2);
207     }
208
209     /**
210      * Test multiline tag like attribute.
211      * See feature request #725749 Handle < and > in multi-line attributes.
212      */

213     public void testMultiLine5 () throws ParserException
214     {
215         // <meta name="foo" content="<foo>
216
// bar">
217
String JavaDoc html = "<meta name=\"foo\" content=\"<foo>\nbar\">";
218         createParser(html);
219         parseAndAssertNodeCount (1);
220         assertType ("should be MetaTag", MetaTag.class, node[0]);
221         Tag tag = (Tag)node[0];
222         assertStringEquals ("html",html, tag.toHtml ());
223         String JavaDoc attribute1 = tag.getAttribute ("NAME");
224         assertStringEquals ("attribute 1","foo", attribute1);
225         String JavaDoc attribute2 = tag.getAttribute ("CONTENT");
226         assertStringEquals ("attribute 2","<foo>\nbar", attribute2);
227     }
228
229     /**
230      * Test multiline broken tag like attribute.
231      * See feature request #725749 Handle < and > in multi-line attributes.
232      */

233     public void testMultiLine6 () throws ParserException
234     {
235         // <meta name="foo" content="foo>
236
// bar">
237
String JavaDoc html = "<meta name=\"foo\" content=\"foo>\nbar\">";
238         createParser(html);
239         parseAndAssertNodeCount (1);
240         assertType ("should be MetaTag", MetaTag.class, node[0]);
241         Tag tag = (Tag)node[0];
242         assertStringEquals ("html",html, tag.toHtml ());
243         String JavaDoc attribute1 = tag.getAttribute ("NAME");
244         assertStringEquals ("attribute 1","foo", attribute1);
245         String JavaDoc attribute2 = tag.getAttribute ("CONTENT");
246         assertStringEquals ("attribute 2","foo>\nbar", attribute2);
247     }
248
249     /**
250      * Test multiline split tag like attribute.
251      * See feature request #725749 Handle < and > in multi-line attributes.
252      */

253     public void testMultiLine7 () throws ParserException
254     {
255         // <meta name="foo" content="<foo
256
// bar">
257
String JavaDoc html = "<meta name=\"foo\" content=\"<foo\nbar\"";
258         createParser(html);
259         parseAndAssertNodeCount (1);
260         assertType ("should be MetaTag", MetaTag.class, node[0]);
261         Tag tag = (Tag)node[0];
262         assertStringEquals ("html",html + ">", tag.toHtml ());
263         String JavaDoc attribute1 = tag.getAttribute ("NAME");
264         assertStringEquals ("attribute 1","foo", attribute1);
265         String JavaDoc attribute2 = tag.getAttribute ("CONTENT");
266         assertStringEquals ("attribute 2","<foo\nbar", attribute2);
267     }
268
269     /**
270      * End of multi line test cases.
271      */

272
273     /**
274      * Test multiple threads running against the parser.
275      * See feature request #736144 Handle multi-threaded operation.
276      */

277     public void testThreadSafety() throws Exception JavaDoc
278     {
279         createParser("<html></html>");
280         parser.setNodeFactory (new PrototypicalNodeFactory (true));
281         String JavaDoc testHtml1 = "<a HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>" +
282                             TEST_HTML;
283
284         String JavaDoc testHtml2 = "<a HREF=\"http://normallink.com/sometext.html\">" +
285                             TEST_HTML;
286         ParsingThread parsingThread [] =
287             new ParsingThread[100];
288         testProgress = 0;
289         for (int i=0;i<parsingThread.length;i++) {
290             if (i<parsingThread.length/2)
291                 parsingThread[i] =
292                     new ParsingThread(i,testHtml1,parsingThread.length);
293                 else
294                     parsingThread[i] =
295                         new ParsingThread(i,testHtml2,parsingThread.length);
296
297             Thread JavaDoc thread = new Thread JavaDoc(parsingThread[i]);
298             thread.start();
299         }
300
301         int completionValue = computeCompletionValue(parsingThread.length);
302
303         do {
304             try {
305                 Thread.sleep(500);
306             }
307             catch (InterruptedException JavaDoc e) {
308             }
309         }
310         while (testProgress!=completionValue);
311         for (int i=0;i<parsingThread.length;i++)
312         {
313             if (!parsingThread[i].passed())
314             {
315                 assertNotNull("Thread "+i+" link 1",parsingThread[i].getLink1());
316                 assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2());
317                 if (i<parsingThread.length/2) {
318                     assertStringEquals(
319                         "Thread "+i+", link 1:",
320                         "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html",
321                         parsingThread[i].getLink1().getLink()
322                     );
323                     assertStringEquals(
324                         "Thread "+i+", link 2:",
325                         "http://normallink.com/sometext.html",
326                         parsingThread[i].getLink2().getLink()
327                     );
328                 } else {
329                     assertStringEquals(
330                         "Thread "+i+", link 1:",
331                         "http://normallink.com/sometext.html",
332                         parsingThread[i].getLink1().getLink()
333                     );
334                     assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2());
335                     assertStringEquals(
336                         "Thread "+i+", link 2:",
337                         "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html",
338                         parsingThread[i].getLink2().getLink()
339                     );
340                 }
341             }
342         }
343     }
344
345     private int computeCompletionValue(int numThreads) {
346         return numThreads * (numThreads - 1) / 2;
347     }
348
349     class ParsingThread implements Runnable JavaDoc {
350         Parser mParser;
351         int mId;
352         LinkTag mLink1;
353         LinkTag mLink2;
354         boolean mResult;
355         int mMax;
356
357         ParsingThread(int id, String JavaDoc testHtml, int max) {
358             mId = id;
359             mMax = max;
360             mParser = Parser.createParser(testHtml, null);
361         }
362
363         public void run() {
364             try {
365                 mResult = false;
366                 Node linkTag [] = mParser.extractAllNodesThatAre(LinkTag.class);
367                 mLink1 = (LinkTag)linkTag[0];
368                 mLink2 = (LinkTag)linkTag[1];
369                 if (mId < mMax / 2) {
370                     if (mLink1.getLink().equals("/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html") &&
371                         mLink2.getLink().equals("http://normallink.com/sometext.html"))
372                         mResult = true;
373                 } else {
374                     if (mLink1.getLink().equals("http://normallink.com/sometext.html") &&
375                         mLink2.getLink().equals("http://normallink.com/sometext.html"))
376                         mResult = true;
377                 }
378             }
379             catch (ParserException e) {
380                 System.err.println("Parser Exception");
381                 e.printStackTrace();
382             }
383             finally {
384                 testProgress += mId;
385             }
386         }
387
388         public LinkTag getLink1() {
389             return (mLink1);
390         }
391
392         public LinkTag getLink2() {
393             return (mLink2);
394         }
395
396         public boolean passed() {
397             return (mResult);
398         }
399     }
400
401     /**
402      * Test the toHTML method for a standalone attribute.
403      */

404     public void testStandAloneToHTML () throws ParserException
405     {
406         String JavaDoc html = "<input disabled>";
407         createParser(html);
408         parser.setNodeFactory (new PrototypicalNodeFactory (true));
409         parseAndAssertNodeCount (1);
410         assertType ("should be Tag", Tag.class, node[0]);
411         Tag tag = (Tag)node[0];
412         assertStringEquals ("html", html, tag.toHtml ());
413     }
414
415     /**
416      * Test the toHTML method for a missing value attribute.
417      */

418     public void testMissingValueToHTML () throws ParserException
419     {
420         String JavaDoc html = "<input disabled=>";
421         createParser(html);
422         parser.setNodeFactory (new PrototypicalNodeFactory (true));
423         parseAndAssertNodeCount (1);
424         assertType ("should be Tag", Tag.class, node[0]);
425         Tag tag = (Tag)node[0];
426         assertStringEquals ("html", html, tag.toHtml ());
427     }
428 }
Popular Tags