KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tests > parserHelperTests > TagParserTest


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/parserHelperTests/TagParserTest.java,v 1.2 2004/02/10 13:41:10 woolfel Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33 package org.htmlparser.tests.parserHelperTests;
34 import java.util.HashMap JavaDoc;
35 import java.util.Map JavaDoc;
36
37 import org.htmlparser.Node;
38 import org.htmlparser.Parser;
39 import org.htmlparser.tags.LinkTag;
40 import org.htmlparser.tags.Tag;
41 import org.htmlparser.tests.ParserTestCase;
42 import org.htmlparser.util.ParserException;
43
44 public class TagParserTest extends ParserTestCase
45 {
46     private static final String JavaDoc TEST_HTML =
47         "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">"
48             + "<!-- Server: sf-web2 -->"
49             + "<html lang=\"en\">"
50             + " <head><link rel=\"stylesheet\" type=\"text/css\" HREF=\"http://sourceforge.net/cssdef.php\">"
51             + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">"
52             + " <TITLE>SourceForge.net: Modify: 711073 - HTMLTagParser not threadsafe as a static variable in Tag</TITLE>"
53             + " <SCRIPT language=\"JavaScript\" type=\"text/javascript\">"
54             + " <!--"
55             + " function help_window(helpurl) {"
56             + " HelpWin = window.open( 'http://sourceforge.net' + helpurl,'HelpWindow','scrollbars=yes,resizable=yes,toolbar=no,height=400,width=400');"
57             + " }"
58             + " // -->"
59             + " </SCRIPT>"
60             + " <link rel=\"SHORTCUT ICON\" HREF=\"/images/favicon.ico\">"
61             + "<!-- This is temp javascript for the jump button. If we could actually have a jump script on the server side that would be ideal -->"
62             + "<script language=\"JavaScript\" type=\"text/javascript\">"
63             + "<!--"
64             + " function jump(targ,selObj,restore){ //v3.0"
65             + " if (selObj.options[selObj.selectedIndex].value) "
66             + " eval(targ+\".location='\"+selObj.options[selObj.selectedIndex].value+\"'\");"
67             + " if (restore) selObj.selectedIndex=0;"
68             + " }"
69             + " //-->"
70             + "</script>"
71             + "<a HREF=\"http://normallink.com/sometext.html\">"
72             + "<style type=\"text/css\">"
73             + "<!--"
74             + "A:link { text-decoration:none }"
75             + "A:visited { text-decoration:none }"
76             + "A:active { text-decoration:none }"
77             + "A:hover { text-decoration:underline; color:#0066FF; }"
78             + "-->"
79             + "</style>"
80             + "</head>"
81             + "<body bgcolor=\"#FFFFFF\" text=\"#000000\" leftmargin=\"0\" topmargin=\"0\" marginwidth=\"0\" marginheight=\"0\" link=\"#003399\" vlink=\"#003399\" alink=\"#003399\">";
82     private Map JavaDoc results;
83     private int testProgress;
84
85     public TagParserTest(String JavaDoc name)
86     {
87         super(name);
88     }
89
90     public void testTagWithQuotes() throws Exception JavaDoc
91     {
92         String JavaDoc testHtml =
93             "<img SRC=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">";
94
95         createParser(testHtml);
96         parseAndAssertNodeCount(1);
97         assertType("should be Tag", Tag.class, node[0]);
98         Tag tag = (Tag) node[0];
99         assertStringEquals("alt", "Marshall Field's", tag.getAttribute("ALT"));
100         assertStringEquals(
101             "html",
102             "<IMG BORDER=\"0\" ALT=\"Marshall Field's\" WIDTH=\"87\" SRC=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" HEIGHT=\"20\">",
103             tag.toHtml());
104     }
105
106     public void testEmptyTag() throws Exception JavaDoc
107     {
108         createParser("<custom/>");
109         parseAndAssertNodeCount(1);
110         assertType("should be Tag", Tag.class, node[0]);
111         Tag tag = (Tag) node[0];
112         assertStringEquals("tag name", "CUSTOM", tag.getTagName());
113         assertTrue("empty tag", tag.isEmptyXmlTag());
114         assertStringEquals("html", "<CUSTOM/>", tag.toHtml());
115     }
116
117     public void testTagWithCloseTagSymbolInAttribute() throws ParserException
118     {
119         createParser("<tag att=\"a>b\">");
120         parseAndAssertNodeCount(1);
121         assertType("should be Tag", Tag.class, node[0]);
122         Tag tag = (Tag) node[0];
123         assertStringEquals("attribute", "a>b", tag.getAttribute("att"));
124     }
125
126     public void testTagWithOpenTagSymbolInAttribute() throws ParserException
127     {
128         createParser("<tag att=\"a<b\">");
129         parseAndAssertNodeCount(1);
130         assertType("should be Tag", Tag.class, node[0]);
131         Tag tag = (Tag) node[0];
132         assertStringEquals("attribute", "a<b", tag.getAttribute("att"));
133     }
134
135     public void testTagWithSingleQuote() throws ParserException
136     {
137         createParser("<tag att=\'a<b\'>");
138         parseAndAssertNodeCount(1);
139         assertType("should be Tag", Tag.class, node[0]);
140         Tag tag = (Tag) node[0];
141         assertStringEquals("html", "<TAG ATT=\"a<b\">", tag.toHtml());
142         assertStringEquals("attribute", "a<b", tag.getAttribute("att"));
143     }
144
145     /**
146      * The following multi line test cases are from
147      * bug #725749 Parser does not handle < and > in multi-line attributes
148      * submitted by Joe Robins (zorblak)
149      */

150
151     public void testMultiLine1() throws ParserException
152     {
153         createParser("<meta name=\"foo\" content=\"foo<bar>\">");
154         parseAndAssertNodeCount(1);
155         assertType("should be Tag", Tag.class, node[0]);
156         Tag tag = (Tag) node[0];
157         String JavaDoc html = tag.toHtml();
158         assertStringEquals(
159             "html",
160             "<META CONTENT=\"foo<bar>\" NAME=\"foo\">",
161             html);
162         String JavaDoc attribute1 = tag.getAttribute("NAME");
163         assertStringEquals("attribute 1", "foo", attribute1);
164         String JavaDoc attribute2 = tag.getAttribute("CONTENT");
165         assertStringEquals("attribute 2", "foo<bar>", attribute2);
166     }
167
168     public void testMultiLine2() throws ParserException
169     {
170         createParser("<meta name=\"foo\" content=\"foo<bar\">");
171         parseAndAssertNodeCount(1);
172         assertType("should be Tag", Tag.class, node[0]);
173         Tag tag = (Tag) node[0];
174         String JavaDoc html = tag.toHtml();
175         assertStringEquals(
176             "html",
177             "<META CONTENT=\"foo<bar\" NAME=\"foo\">",
178             html);
179         String JavaDoc attribute1 = tag.getAttribute("NAME");
180         assertStringEquals("attribute 1", "foo", attribute1);
181         String JavaDoc attribute2 = tag.getAttribute("CONTENT");
182         assertStringEquals("attribute 2", "foo<bar", attribute2);
183     }
184
185     public void testMultiLine3() throws ParserException
186     {
187         createParser("<meta name=\"foo\" content=\"foobar>\">");
188         parseAndAssertNodeCount(1);
189         assertType("should be Tag", Tag.class, node[0]);
190         Tag tag = (Tag) node[0];
191         String JavaDoc html = tag.toHtml();
192         assertStringEquals(
193             "html",
194             "<META CONTENT=\"foobar>\" NAME=\"foo\">",
195             html);
196         String JavaDoc attribute1 = tag.getAttribute("NAME");
197         assertStringEquals("attribute 1", "foo", attribute1);
198         String JavaDoc attribute2 = tag.getAttribute("CONTENT");
199         assertStringEquals("attribute 2", "foobar>", attribute2);
200     }
201
202     public void testMultiLine4() throws ParserException
203     {
204         createParser("<meta name=\"foo\" content=\"foo\nbar>\">");
205         parseAndAssertNodeCount(1);
206         assertType("should be Tag", Tag.class, node[0]);
207         Tag tag = (Tag) node[0];
208         String JavaDoc html = tag.toHtml();
209         assertStringEquals(
210             "html",
211             "<META CONTENT=\"foo\r\nbar>\" NAME=\"foo\">",
212             html);
213         String JavaDoc attribute1 = tag.getAttribute("NAME");
214         assertStringEquals("attribute 1", "foo", attribute1);
215         String JavaDoc attribute2 = tag.getAttribute("CONTENT");
216         assertStringEquals("attribute 2", "foo\r\nbar>", attribute2);
217     }
218
219     /**
220      * Test multiline tag like attribute.
221      * See feature request #725749 Handle < and > in multi-line attributes.
222      * Only perform this test if it's version 1.4 or higher.
223      */

224     public void testMultiLine5() throws ParserException
225     {
226         // <meta name="foo" content="<foo>
227
// bar">
228
createParser("<meta name=\"foo\" content=\"<foo>\nbar\">");
229         if (1.4 <= Parser.getVersionNumber())
230         {
231             parseAndAssertNodeCount(1);
232             assertType("should be Tag", Tag.class, node[0]);
233             Tag tag = (Tag) node[0];
234             String JavaDoc html = tag.toHtml();
235             assertStringEquals(
236                 "html",
237                 "<META CONTENT=\"<foo>\r\nbar\" NAME=\"foo\">",
238                 html);
239             String JavaDoc attribute1 = tag.getAttribute("NAME");
240             assertStringEquals("attribute 1", "foo", attribute1);
241             String JavaDoc attribute2 = tag.getAttribute("CONTENT");
242             assertStringEquals("attribute 2", "<foo>\r\nbar", attribute2);
243         }
244     }
245
246     /**
247      * Test multiline broken tag like attribute.
248      * See feature request #725749 Handle < and > in multi-line attributes.
249      * Only perform this test if it's version 1.4 or higher.
250      */

251     public void testMultiLine6() throws ParserException
252     {
253         // <meta name="foo" content="foo>
254
// bar">
255
createParser("<meta name=\"foo\" content=\"foo>\nbar\">");
256         if (1.4 <= Parser.getVersionNumber())
257         {
258             parseAndAssertNodeCount(1);
259             assertType("should be Tag", Tag.class, node[0]);
260             Tag tag = (Tag) node[0];
261             String JavaDoc html = tag.toHtml();
262             assertStringEquals(
263                 "html",
264                 "<META CONTENT=\"foo>\r\nbar\" NAME=\"foo\">",
265                 html);
266             String JavaDoc attribute1 = tag.getAttribute("NAME");
267             assertStringEquals("attribute 1", "foo", attribute1);
268             String JavaDoc attribute2 = tag.getAttribute("CONTENT");
269             assertStringEquals("attribute 2", "foo>\r\nbar", attribute2);
270         }
271     }
272
273     /**
274      * Test multiline split tag like attribute.
275      * See feature request #725749 Handle < and > in multi-line attributes.
276      * Only perform this test if it's version 1.4 or higher.
277      */

278     public void testMultiLine7() throws ParserException
279     {
280         // <meta name="foo" content="<foo
281
// bar">
282
createParser("<meta name=\"foo\" content=\"<foo\nbar\"");
283         if (1.4 <= Parser.getVersionNumber())
284         {
285             parseAndAssertNodeCount(1);
286             assertType("should be Tag", Tag.class, node[0]);
287             Tag tag = (Tag) node[0];
288             String JavaDoc html = tag.toHtml();
289             assertStringEquals(
290                 "html",
291                 "<META CONTENT=\"<foo\r\nbar\" NAME=\"foo\">",
292                 html);
293             String JavaDoc attribute1 = tag.getAttribute("NAME");
294             assertStringEquals("attribute 1", "foo", attribute1);
295             String JavaDoc attribute2 = tag.getAttribute("CONTENT");
296             assertStringEquals("attribute 2", "<foo\r\nbar", attribute2);
297         }
298     }
299
300     /**
301      * End of multi line test cases.
302      */

303
304     /**
305      * Test multiple threads running against the parser.
306      * See feature request #736144 Handle multi-threaded operation.
307      * Only perform this test if it's version 1.4 or higher.
308      */

309     public void testThreadSafety() throws Exception JavaDoc
310     {
311         createParser("<html></html>");
312         if (1.4 <= Parser.getVersionNumber())
313         {
314             String JavaDoc testHtml1 =
315                 "<a HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>"
316                     + TEST_HTML;
317
318             String JavaDoc testHtml2 =
319                 "<a HREF=\"http://normallink.com/sometext.html\">" + TEST_HTML;
320             ParsingThread parsingThread[] = new ParsingThread[100];
321             results = new HashMap JavaDoc();
322             testProgress = 0;
323             for (int i = 0; i < parsingThread.length; i++)
324             {
325                 if (i < parsingThread.length / 2)
326                     parsingThread[i] =
327                         new ParsingThread(i, testHtml1, parsingThread.length);
328                 else
329                     parsingThread[i] =
330                         new ParsingThread(i, testHtml2, parsingThread.length);
331
332                 Thread JavaDoc thread = new Thread JavaDoc(parsingThread[i]);
333                 thread.start();
334             }
335
336             int completionValue = computeCompletionValue(parsingThread.length);
337
338             do
339             {
340                 try
341                 {
342                     Thread.sleep(50);
343                 }
344                 catch (InterruptedException JavaDoc e)
345                 {
346                 }
347             }
348             while (testProgress != completionValue);
349             for (int i = 0; i < parsingThread.length; i++)
350             {
351                 if (!parsingThread[i].passed())
352                 {
353                     assertNotNull(
354                         "Thread " + i + " link 1",
355                         parsingThread[i].getLink1());
356                     assertNotNull(
357                         "Thread " + i + " link 2",
358                         parsingThread[i].getLink2());
359                     if (i < parsingThread.length / 2)
360                     {
361                         assertStringEquals(
362                             "Thread " + i + ", link 1:",
363                             "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html",
364                             parsingThread[i].getLink1().getLink());
365                         assertStringEquals(
366                             "Thread " + i + ", link 2:",
367                             "http://normallink.com/sometext.html",
368                             parsingThread[i].getLink2().getLink());
369                     }
370                     else
371                     {
372                         assertStringEquals(
373                             "Thread " + i + ", link 1:",
374                             "http://normallink.com/sometext.html",
375                             parsingThread[i].getLink1().getLink());
376                         assertNotNull(
377                             "Thread " + i + " link 2",
378                             parsingThread[i].getLink2());
379                         assertStringEquals(
380                             "Thread " + i + ", link 2:",
381                             "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html",
382                             parsingThread[i].getLink2().getLink());
383                     }
384                 }
385             }
386         }
387
388     }
389
390     private int computeCompletionValue(int numThreads)
391     {
392         return numThreads * (numThreads - 1) / 2;
393     }
394
395     class ParsingThread implements Runnable JavaDoc
396     {
397         Parser parser;
398         int id;
399         LinkTag link1, link2;
400         boolean result;
401         int max;
402
403         ParsingThread(int id, String JavaDoc testHtml, int max)
404         {
405             this.id = id;
406             this.max = max;
407             this.parser = Parser.createParser(testHtml);
408             parser.registerScanners();
409         }
410
411         public void run()
412         {
413             try
414             {
415                 result = false;
416                 Node linkTag[] = parser.extractAllNodesThatAre(LinkTag.class);
417                 link1 = (LinkTag) linkTag[0];
418                 link2 = (LinkTag) linkTag[1];
419                 if (id < max / 2)
420                 {
421                     if (link1
422                         .getLink()
423                         .equals("/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html")
424                         && link2.getLink().equals(
425                             "http://normallink.com/sometext.html"))
426                         result = true;
427                 }
428                 else
429                 {
430                     if (link1
431                         .getLink()
432                         .equals("http://normallink.com/sometext.html")
433                         && link2.getLink().equals(
434                             "http://normallink.com/sometext.html"))
435                         result = true;
436                 }
437             }
438             catch (ParserException e)
439             {
440                 System.err.println("Parser Exception");
441                 e.printStackTrace();
442             }
443             finally
444             {
445                 testProgress += id;
446             }
447         }
448
449         public LinkTag getLink1()
450         {
451             return link1;
452         }
453
454         public LinkTag getLink2()
455         {
456             return link2;
457         }
458
459         public boolean passed()
460         {
461             return result;
462         }
463     }
464 }
Popular Tags