LinkScannerTest


1   // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/scannersTests/LinkScannerTest.java,v 1.2 2004/02/11 02:16:58 woolfel Exp $
2   /*
3    * ====================================================================
4    * Copyright 2002-2004 The Apache Software Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   * 
18   */
19  
20  // The developers of JMeter and Apache are greatful to the developers
21  // of HTMLParser for giving Apache Software Foundation a non-exclusive
22  // license. The performance benefits of HTMLParser are clear and the
23  // users of JMeter will benefit from the hard work the HTMLParser
24  // team. For detailed information about HTMLParser, the project is
25  // hosted on sourceforge at http://htmlparser.sourceforge.net/.
26  //
27  // HTMLParser was originally created by Somik Raha in 2000. Since then
28  // a healthy community of users has formed and helped refine the
29  // design so that it is able to tackle the difficult task of parsing
30  // dirty HTML. Derrick Oswald is the current lead developer and was kind
31  // enough to assist JMeter.
32  
33  package org.htmlparser.tests.scannersTests;
34  
35  
36  import org.htmlparser.Node;
37  import org.htmlparser.Parser;
38  import org.htmlparser.StringNode;
39  import org.htmlparser.scanners.LinkScanner;
40  import org.htmlparser.tags.EndTag;
41  import org.htmlparser.tags.ImageTag;
42  import org.htmlparser.tags.LinkTag;
43  import org.htmlparser.tags.Tag;
44  import org.htmlparser.tags.data.TagData;
45  import org.htmlparser.tests.ParserTestCase;
46  import org.htmlparser.util.ParserException;
47  import org.htmlparser.util.SimpleNodeIterator;
48  
49  public class LinkScannerTest extends ParserTestCase
50  {
51      public LinkScannerTest(String   name)
52      {
53          super(name);
54      }
55  
56      public void testAccessKey() throws ParserException
57      {
58          createParser("<a HREF=\"http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph\" accessKey=1>Click Here</A>");
59          parser.addScanner(new LinkScanner("-l"));
60          parseAndAssertNodeCount(1);
61          assertTrue("The node should be a link tag", node[0] instanceof LinkTag);
62          LinkTag linkTag = (LinkTag) node[0];
63          assertEquals(
64              "Link URL of link tag",
65              "http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph",
66              linkTag.getLink());
67          assertEquals(
68              "Link Text of link tag",
69              "Click Here",
70              linkTag.getLinkText());
71          assertEquals("Access key", "1", linkTag.getAccessKey());
72      }
73  
74      public void testErroneousLinkBug() throws ParserException
75      {
76          createParser(
77              "<p>Site Comments?<br>"
78                  + "<a HREF=\"mailto:sam@neurogrid.com?subject=Site Comments\">"
79                  + "Mail Us"
80                  + "<a>"
81                  + "</p>");
82          parser.registerScanners();
83          parseAndAssertNodeCount(6);
84          // The first node should be a Tag 
85          assertTrue("First node should be a Tag", node[0] instanceof Tag);
86          // The second node should be a HTMLStringNode
87          assertTrue(
88              "Second node should be a HTMLStringNode",
89              node[1] instanceof StringNode);
90          StringNode stringNode = (StringNode) node[1];
91          assertEquals(
92              "Text of the StringNode",
93              "Site Comments?",
94              stringNode.getText());
95          assertTrue("Third node should be a tag", node[2] instanceof Tag);
96  
97      }
98  
99      /**
100      * Test case based on a report by Raghavender Srimantula, of the parser giving out of memory exceptions. Found to occur
101      * on the following piece of html
102      * <pre>
103      * <a HREF=s/8741><img SRC="http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif" height=16 width=16 border=0></img></td><td nowrap> &nbsp;
104      * <a HREF=s/7509>
105      * </pre>
106      */
107     public void testErroneousLinkBugFromYahoo2() throws ParserException
108     {
109         createParser(
110             "<td>"
111                 + "<a HREF=s/8741>"
112                 + "<img SRC=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" height=16 width=16 border=0>"
113                 + "</td>"
114                 + "<td nowrap> &nbsp;\n"
115                 + "<a HREF=s/7509><b>Yahoo! Movies</b></a>"
116                 + "</td>",
117             "http://www.yahoo.com");
118         parser.registerScanners();
119         Node linkNodes[] = parser.extractAllNodesThatAre(LinkTag.class);
120 
121         assertEquals("number of links", 2, linkNodes.length);
122         LinkTag linkTag = (LinkTag) linkNodes[0];
123         assertStringEquals(
124             "Link",
125             "http://www.yahoo.com/s/8741",
126             linkTag.getLink());
127         // Verify the link data
128         assertStringEquals("Link Text", "", linkTag.getLinkText());
129         // Verify the reconstruction html
130         assertStringEquals(
131             "toHTML",
132             "<A HREF=\"s/8741\"><IMG BORDER=\"0\" WIDTH=\"16\" SRC=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" HEIGHT=\"16\"></A>",
133             linkTag.toHtml());
134     }
135 
136     /**
137      * Test case based on a report by Raghavender Srimantula, of the parser giving out of memory exceptions. Found to occur
138      * on the following piece of html
139      * <pre>
140      * <a HREF=s/8741><img SRC="http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif" height=16 width=16 border=0></img>This is test
141      * <a HREF=s/7509>
142      * </pre>
143      */
144     public void testErroneousLinkBugFromYahoo() throws ParserException
145     {
146         createParser(
147             "<a HREF=s/8741>"
148                 + "<img SRC=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" "
149                 + "height=16 "
150                 + "width=16 "
151                 + "border=0>"
152                 + "This is a test\n"
153                 + "<a HREF=s/7509>"
154                 + "<b>Yahoo! Movies</b>"
155                 + "</a>",
156             "http://www.yahoo.com");
157 
158         parser.registerScanners();
159         parseAndAssertNodeCount(2);
160         // The first node should be a Tag 
161         assertTrue(
162             "First node should be a HTMLLinkTag",
163             node[0] instanceof LinkTag);
164         // The second node should be a HTMLStringNode
165         assertTrue(
166             "Second node should be a HTMLLinkTag",
167             node[1] instanceof LinkTag);
168         LinkTag linkTag = (LinkTag) node[0];
169         assertEquals("Link", "http://www.yahoo.com/s/8741", linkTag.getLink());
170         // Verify the link data
171         assertEquals("Link Text", "This is a test\r\n", linkTag.getLinkText());
172         // Verify the reconstruction html
173         assertStringEquals(
174             "toHTML()",
175             "<A HREF=\"s/8741\"><IMG BORDER=\"0\" WIDTH=\"16\" SRC=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" HEIGHT=\"16\">This is a test\r\n</A>",
176             linkTag.toHtml());
177     }
178 
179     public void testEvaluate()
180     {
181         LinkScanner scanner = new LinkScanner("-l");
182         boolean retVal = scanner.evaluate("   a href ", null);
183         assertEquals(
184             "Evaluation of the Link tag",
185             new Boolean  (true),
186             new Boolean  (retVal));
187     }
188 
189     /**
190      * This is the reproduction of a bug which causes a null pointer exception
191      */
192     public void testExtractLinkInvertedCommasBug() throws ParserException
193     {
194         String   tagContents = "a HREF=r/anorth/top.html";
195         Tag tag = new Tag(new TagData(0, 0, tagContents, ""));
196         String   url = "c:\\cvs\\html\\binaries\\yahoo.htm";
197         LinkScanner scanner = new LinkScanner("-l");
198         assertEquals(
199             "Extracted Link",
200             "r/anorth/top.html",
201             scanner.extractLink(tag, url));
202     }
203 
204     /**
205      * This is the reproduction of a bug which produces multiple text copies.
206      */
207     public void testExtractLinkInvertedCommasBug2() throws ParserException
208     {
209         createParser("<a HREF=\"http://cbc.ca/artsCanada/stories/greatnorth271202\" class=\"lgblacku\">Vancouver schools plan 'Great Northern Way'</a>");
210         parser.addScanner(new LinkScanner("-l"));
211         parseAndAssertNodeCount(1);
212         assertTrue("The node should be a link tag", node[0] instanceof LinkTag);
213         LinkTag linkTag = (LinkTag) node[0];
214         assertStringEquals(
215             "Extracted Text",
216             "Vancouver schools plan 'Great Northern Way'",
217             linkTag.getLinkText());
218     }
219 
220     /**
221      * Bug pointed out by Sam Joseph (sam@neurogrid.net)
222      * Links with spaces in them will get their spaces absorbed
223      */
224     public void testLinkSpacesBug() throws ParserException
225     {
226         createParser("<a HREF=\"http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph\">Click Here</A>");
227         parser.addScanner(new LinkScanner("-l"));
228         parseAndAssertNodeCount(1);
229         assertTrue("The node should be a link tag", node[0] instanceof LinkTag);
230         LinkTag linkTag = (LinkTag) node[0];
231         assertEquals(
232             "Link URL of link tag",
233             "http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph",
234             linkTag.getLink());
235         assertEquals(
236             "Link Text of link tag",
237             "Click Here",
238             linkTag.getLinkText());
239     }
240 
241     /**
242      * Bug reported by Raj Sharma,5-Apr-2002, upon parsing
243      * http://www.samachar.com, the entire page could not be picked up.
244      * The problem was occurring after parsing a particular link
245      * after which the parsing would not proceed. This link was spread over three lines.
246      * The bug has been reproduced and fixed.
247      */
248     public void testMultipleLineBug() throws ParserException
249     {
250         createParser(
251             "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n"
252                 + "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"
253                 + "nical.html\"> Journalism 3.0</a> by Rajesh Jain");
254         Parser.setLineSeparator("\r\n");
255         parser.addScanner(new LinkScanner("-l"));
256         parseAndAssertNodeCount(8);
257         assertTrue(
258             "Seventh node should be a link tag",
259             node[6] instanceof LinkTag);
260         LinkTag linkTag = (LinkTag) node[6];
261         String   exp =
262             new String  ("http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/technical.html");
263         //assertEquals("Length of link tag",exp.length(), linkTag.getLink().length());
264         assertStringEquals("Link URL of link tag", exp, linkTag.getLink());
265         assertEquals(
266             "Link Text of link tag",
267             " Journalism 3.0",
268             linkTag.getLinkText());
269         assertTrue(
270             "Eight node should be a string node",
271             node[7] instanceof StringNode);
272         StringNode stringNode = (StringNode) node[7];
273         assertEquals(
274             "String node contents",
275             " by Rajesh Jain",
276             stringNode.getText());
277     }
278 
279     public void testRelativeLinkScan() throws ParserException
280     {
281         createParser(
282             "<A HREF=\"mytest.html\"> Hello World</A>",
283             "http://www.yahoo.com");
284         // Register the image scanner
285         parser.addScanner(new LinkScanner("-l"));
286         parseAndAssertNodeCount(1);
287         assertTrue(
288             "Node identified should be HTMLLinkTag",
289             node[0] instanceof LinkTag);
290         LinkTag linkTag = (LinkTag) node[0];
291         assertEquals(
292             "Expected Link",
293             "http://www.yahoo.com/mytest.html",
294             linkTag.getLink());
295     }
296 
297     public void testRelativeLinkScan2() throws ParserException
298     {
299         createParser(
300             "<A HREF=\"abc/def/mytest.html\"> Hello World</A>",
301             "http://www.yahoo.com");
302         // Register the image scanner
303         parser.addScanner(new LinkScanner("-l"));
304         parseAndAssertNodeCount(1);
305         assertTrue(
306             "Node identified should be HTMLLinkTag",
307             node[0] instanceof LinkTag);
308         LinkTag linkTag = (LinkTag) node[0];
309         assertStringEquals(
310             "Expected Link",
311             "http://www.yahoo.com/abc/def/mytest.html",
312             linkTag.getLink());
313     }
314 
315     public void testRelativeLinkScan3() throws ParserException
316     {
317         createParser(
318             "<A HREF=\"../abc/def/mytest.html\"> Hello World</A>",
319             "http://www.yahoo.com/ghi");
320         // Register the image scanner
321         parser.addScanner(new LinkScanner("-l"));
322         parseAndAssertNodeCount(1);
323         assertTrue(
324             "Node identified should be HTMLLinkTag",
325             node[0] instanceof LinkTag);
326         LinkTag linkTag = (LinkTag) node[0];
327         assertStringEquals(
328             "Expected Link",
329             "http://www.yahoo.com/abc/def/mytest.html",
330             linkTag.getLink());
331     }
332 
333     /**
334      * Test scan with data which is of diff nodes type
335      */
336     public void testScan() throws ParserException
337     {
338         createParser(
339             "<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>",
340             "http://www.yahoo.com");
341         // Register the image scanner
342         LinkScanner linkScanner = new LinkScanner("-l");
343         parser.addScanner(linkScanner);
344         parser.addScanner(linkScanner.createImageScanner("-i"));
345 
346         parseAndAssertNodeCount(1);
347         assertTrue("Node should be a link node", node[0] instanceof LinkTag);
348 
349         LinkTag linkTag = (LinkTag) node[0];
350         // Get the link data and cross-check
351         Node[] dataNode = new Node[10];
352         int i = 0;
353         for (SimpleNodeIterator e = linkTag.children(); e.hasMoreNodes();)
354         {
355             dataNode[i++] = (Node) e.nextNode();
356         }
357         assertEquals("Number of data nodes", new Integer  (2), new Integer  (i));
358         assertTrue(
359             "First data node should be an Image Node",
360             dataNode[0] instanceof ImageTag);
361         assertTrue(
362             "Second data node shouls be a String Node",
363             dataNode[1] instanceof StringNode);
364 
365         // Check the contents of each data node
366         ImageTag imageTag = (ImageTag) dataNode[0];
367         assertEquals(
368             "Image URL",
369             "http://www.yahoo.com/abcd.jpg",
370             imageTag.getImageURL());
371         StringNode stringNode = (StringNode) dataNode[1];
372         assertEquals("String Contents", "Hello World", stringNode.getText());
373     }
374 
375     public void testReplaceFaultyTagWithEndTag() throws ParserException
376     {
377         String   currentLine =
378             "<p>Site Comments?<br><a HREF=\"mailto:sam@neurogrid.com?subject=Site Comments\">Mail Us<a></p>";
379         Tag tag = new Tag(new TagData(85, 87, "a", currentLine));
380         LinkScanner linkScanner = new LinkScanner();
381         String   newLine =
382             linkScanner.replaceFaultyTagWithEndTag(tag, currentLine);
383         assertEquals(
384             "Expected replacement",
385             "<p>Site Comments?<br><a HREF=\"mailto:sam@neurogrid.com?subject=Site Comments\">Mail Us</A></p>",
386             newLine);
387     }
388 
389     public void testInsertEndTagBeforeTag() throws ParserException
390     {
391         String   currentLine = "<a HREF=s/7509><b>Yahoo! Movies</b></a>";
392         Tag tag = new Tag(new TagData(0, 14, "a HREF=s/7509", currentLine));
393         LinkScanner linkScanner = new LinkScanner();
394         String   newLine = linkScanner.insertEndTagBeforeNode(tag, currentLine);
395         assertEquals(
396             "Expected insertion",
397             "</A><a HREF=s/7509><b>Yahoo! Movies</b></a>",
398             newLine);
399     }
400 
401     /**
402      * A bug in the freshmeat page - really bad html 
403      * tag - &lt;A&gt;Revision&lt;\a&gt;
404      * Reported by Mazlan Mat
405      */
406     public void testFreshMeatBug() throws ParserException
407     {
408         createParser("<a>Revision</a>", "http://www.yahoo.com");
409         // Register the image scanner
410         parser.addScanner(new LinkScanner("-l"));
411 
412         parseAndAssertNodeCount(3);
413         assertTrue("Node 0 should be a tag", node[0] instanceof Tag);
414         Tag tag = (Tag) node[0];
415         assertEquals("Tag Contents", "a", tag.getText());
416         assertTrue(
417             "Node 1 should be a string node",
418             node[1] instanceof StringNode);
419         StringNode stringNode = (StringNode) node[1];
420         assertEquals("StringNode Contents", "Revision", stringNode.getText());
421         assertTrue("Node 2 should be a string node", node[2] instanceof EndTag);
422         EndTag endTag = (EndTag) node[2];
423         assertEquals("End Tag Contents", "a", endTag.getText());
424     }
425 
426     /** 
427      * Test suggested by Cedric Rosa
428      * A really bad link tag sends parser into infinite loop
429      */
430     public void testBrokenLink() throws ParserException
431     {
432         createParser(
433             "<a HREF=\"faq.html\">"
434                 + "<br>\n"
435                 + "<img SRC=\"images/46revues.gif\" "
436                 + "width=\"100\" "
437                 + "height=\"46\" "
438                 + "border=\"0\" "
439                 + "alt=\"Rejoignez revues.org!\" "
440                 + "align=\"middle\">",
441             "http://www.yahoo.com");
442         // Register the image scanner
443         parser.addScanner(new LinkScanner("-l"));
444 
445         parseAndAssertNodeCount(1);
446         assertTrue("Node 0 should be a link tag", node[0] instanceof LinkTag);
447         LinkTag linkTag = (LinkTag) node[0];
448         assertNotNull(linkTag.toString());
449     }
450 
451     public void testLinkDataContents() throws ParserException
452     {
453         createParser(
454             "<a HREF=\"http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689\" target=\"_new\"><img SRC=\"http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif\" width=468 height=60 border=\"0\" alt=\"See Signs in Theaters 8-2 - Starring Mel Gibson\" align=><font face=\"verdana,arial,helvetica\" SIZE=\"1\"><b></b></font></a>",
455             "http://transfer.go.com");
456         // Register the image scanner
457         LinkScanner linkScanner = new LinkScanner("-l");
458         parser.addScanner(linkScanner);
459         parser.addScanner(linkScanner.createImageScanner("-i"));
460 
461         parseAndAssertNodeCount(1);
462         assertTrue("Node 0 should be a link tag", node[0] instanceof LinkTag);
463         LinkTag linkTag = (LinkTag) node[0];
464         assertEquals(
465             "Link URL",
466             "http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689",
467             linkTag.getLink());
468         assertEquals("Link Text", "", linkTag.getLinkText());
469         Node[] containedNodes = new Node[10];
470         int i = 0;
471         for (SimpleNodeIterator e = linkTag.children(); e.hasMoreNodes();)
472         {
473             containedNodes[i++] = e.nextNode();
474         }
475         assertEquals("There should be 5 contained nodes in the link tag", 5, i);
476         assertTrue(
477             "First contained node should be an image tag",
478             containedNodes[0] instanceof ImageTag);
479         ImageTag imageTag = (ImageTag) containedNodes[0];
480         assertEquals(
481             "Image Location",
482             "http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif",
483             imageTag.getImageURL());
484         assertEquals("Image Height", "60", imageTag.getAttribute("HEIGHT"));
485         assertEquals("Image Width", "468", imageTag.getAttribute("WIDTH"));
486         assertEquals("Image Border", "0", imageTag.getAttribute("BORDER"));
487         assertEquals(
488             "Image Alt",
489             "See Signs in Theaters 8-2 - Starring Mel Gibson",
490             imageTag.getAttribute("ALT"));
491         assertTrue(
492             "Second contained node should be Tag",
493             containedNodes[1] instanceof Tag);
494         Tag tag1 = (Tag) containedNodes[1];
495         assertEquals(
496             "Tag Contents",
497             "font face=\"verdana,arial,helvetica\" SIZE=\"1\"",
498             tag1.getText());
499         assertTrue(
500             "Third contained node should be Tag",
501             containedNodes[2] instanceof Tag);
502         Tag tag2 = (Tag) containedNodes[2];
503         assertEquals("Tag Contents", "b", tag2.getText());
504         assertTrue(
505             "Fourth contained node should be HTMLEndTag",
506             containedNodes[3] instanceof EndTag);
507         EndTag endTag1 = (EndTag) containedNodes[3];
508         assertEquals("Fourth Tag contents", "b", endTag1.getText());
509         assertTrue(
510             "Fifth contained node should be HTMLEndTag",
511             containedNodes[4] instanceof EndTag);
512         EndTag endTag2 = (EndTag) containedNodes[4];
513         assertEquals("Fifth Tag contents", "font", endTag2.getText());
514 
515     }
516 
517     public void testBaseRefLink() throws ParserException
518     {
519         createParser(
520             "<html>\n"
521                 + "<head>\n"
522                 + "<TITLE>test page</TITLE>\n"
523                 + "<BASE HREF=\"http://www.abc.com/\">\n"
524                 + "<a HREF=\"home.cfm\">Home</a>\n"
525                 + "...\n"
526                 + "</html>",
527             "http://transfer.go.com");
528         // Register the image scanner
529         parser.registerScanners();
530         parseAndAssertNodeCount(7);
531         assertTrue("Node 4 should be a link tag", node[4] instanceof LinkTag);
532         LinkTag linkTag = (LinkTag) node[4];
533         assertEquals(
534             "Resolved Link",
535             "http://www.abc.com/home.cfm",
536             linkTag.getLink());
537         assertEquals("Resolved Link Text", "Home", linkTag.getLinkText());
538     }
539 
540     /**
541      * This is a reproduction of bug 617228, reported by
542      * Stephen J. Harrington. When faced with a link like :
543      * &lt;A 
544      * HREF="../../../../../cgi-bin/view_search?query_text=postdate&gt;20020701&txt_clr=White&bg_clr=Red&url=http://loc 
545      * al 
546      * host/Testing/Report 
547      * 1.html"&gt;20020702 Report 1&lt;/A&gt;
548      * 
549      * parser is unable to handle the link correctly due to the greater than 
550      * symbol being confused to be the end of the tag.
551      */
552     public void testQueryLink() throws ParserException
553     {
554         createParser(
555             "<A \n"
556                 + "HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>",
557             "http://transfer.go.com");
558         // Register the image scanner
559         parser.registerScanners();
560         parseAndAssertNodeCount(1);
561         assertTrue("Node 1 should be a link tag", node[0] instanceof LinkTag);
562         LinkTag linkTag = (LinkTag) node[0];
563         assertStringEquals(
564             "Resolved Link",
565             "http://transfer.go.com/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html",
566             linkTag.getLink());
567         assertEquals(
568             "Resolved Link Text",
569             "20020702 Report 1",
570             linkTag.getLinkText());
571 
572     }
573 
574     public void testNotMailtoLink() throws ParserException
575     {
576         createParser(
577             "<A HREF=\"mailto.html\">not@for.real</A>",
578             "http://www.cj.com/");
579         parser.addScanner(new LinkScanner("-l"));
580         parseAndAssertNodeCount(1);
581         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
582         LinkTag linkTag = (LinkTag) node[0];
583 
584         assertEquals(
585             "Link Plain Text",
586             "not@for.real",
587             linkTag.toPlainTextString());
588         assertTrue("Link is not a mail link", !linkTag.isMailLink());
589     }
590 
591     public void testMailtoLink() throws ParserException
592     {
593         createParser(
594             "<A HREF=\"mailto:this@is.real\">this@is.real</A>",
595             "http://www.cj.com/");
596         parser.addScanner(new LinkScanner("-l"));
597         parseAndAssertNodeCount(1);
598         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
599         LinkTag linkTag = (LinkTag) node[0];
600         assertEquals(
601             "Link Plain Text",
602             "this@is.real",
603             linkTag.toPlainTextString());
604         assertTrue("Link is a mail link", linkTag.isMailLink());
605     }
606 
607     public void testJavascriptLink() throws ParserException
608     {
609         createParser(
610             "<A HREF=\"javascript:alert('hello');\">say hello</A>",
611             "http://www.cj.com/");
612         parser.addScanner(new LinkScanner("-l"));
613         parseAndAssertNodeCount(1);
614         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
615         LinkTag linkTag = (LinkTag) node[0];
616 
617         assertEquals(
618             "Link Plain Text",
619             "say hello",
620             linkTag.toPlainTextString());
621         assertTrue("Link is a Javascript command", linkTag.isJavascriptLink());
622     }
623 
624     public void testNotJavascriptLink() throws ParserException
625     {
626         createParser(
627             "<A HREF=\"javascript_not.html\">say hello</A>",
628             "http://www.cj.com/");
629         parser.addScanner(new LinkScanner("-l"));
630         parseAndAssertNodeCount(1);
631         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
632         LinkTag linkTag = (LinkTag) node[0];
633 
634         assertEquals(
635             "Link Plain Text",
636             "say hello",
637             linkTag.toPlainTextString());
638         assertTrue(
639             "Link is not a Javascript command",
640             !linkTag.isJavascriptLink());
641     }
642 
643     public void testFTPLink() throws ParserException
644     {
645         createParser(
646             "<A HREF=\"ftp://some.where.it\">my ftp</A>",
647             "http://www.cj.com/");
648         parser.addScanner(new LinkScanner("-l"));
649         parseAndAssertNodeCount(1);
650         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
651         LinkTag linkTag = (LinkTag) node[0];
652 
653         assertEquals("Link Plain Text", "my ftp", linkTag.toPlainTextString());
654         assertTrue("Link is a FTP site", linkTag.isFTPLink());
655     }
656 
657     public void testNotFTPLink() throws ParserException
658     {
659         createParser("<A HREF=\"ftp.html\">my ftp</A>", "http://www.cj.com/");
660         parser.addScanner(new LinkScanner("-l"));
661         parseAndAssertNodeCount(1);
662         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
663         LinkTag linkTag = (LinkTag) node[0];
664 
665         assertEquals("Link Plain Text", "my ftp", linkTag.toPlainTextString());
666         assertTrue("Link is not a FTP site", !linkTag.isFTPLink());
667     }
668 
669     public void testRelativeLinkNotHTMLBug() throws ParserException
670     {
671         createParser(
672             "<A HREF=\"newpage.html\">New Page</A>",
673             "http://www.mysite.com/books/some.asp");
674         parser.addScanner(new LinkScanner("-l"));
675         parseAndAssertNodeCount(1);
676         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
677         LinkTag linkTag = (LinkTag) node[0];
678         assertEquals(
679             "Link",
680             "http://www.mysite.com/books/newpage.html",
681             linkTag.getLink());
682     }
683 
684     public void testBadImageInLinkBug() throws ParserException
685     {
686         createParser(
687             "<a HREF=\"registration.asp?EventID=1272\"><img border=\"0\" SRC=\"\\images\\register.gif\"</a>",
688             "http://www.fedpage.com/Event.asp?EventID=1272");
689         parser.registerScanners();
690         parseAndAssertNodeCount(1);
691         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
692         LinkTag linkTag = (LinkTag) node[0];
693         // Get the image tag from the link
694 
695         Node insideNodes[] = new Node[10];
696         int j = 0;
697         for (SimpleNodeIterator e = linkTag.children(); e.hasMoreNodes();)
698         {
699             insideNodes[j++] = (Node) e.nextNode();
700         }
701         assertEquals("Number of contained internal nodes", 1, j);
702         assertTrue(insideNodes[0] instanceof ImageTag);
703         ImageTag imageTag = (ImageTag) insideNodes[0];
704         assertEquals(
705             "Image Tag Location",
706             "http://www.fedpage.com/images\\register.gif",
707             imageTag.getImageURL());
708     }
709 
710     /**
711      * This is an attempt to reproduce bug 677874 
712      * reported by James Moliere. A link tag of the form
713      * <code>
714      * <a class=rlbA HREF=/news/866201.asp?0sl=-
715      * 32>Shoe bomber handed life sentence</a>
716      * </code>
717      * is not parsed correctly. The second '=' sign in the link causes
718      * the parser to treat it as a seperate attribute
719      */
720     public void testLinkContainsEqualTo() throws Exception  
721     {
722         createParser(
723             "<a class=rlbA HREF=/news/866201.asp?0sl=-"
724                 + "32>Shoe bomber handed life sentence</a>");
725         parser.registerScanners();
726         parseAndAssertNodeCount(1);
727         assertType("node type", LinkTag.class, node[0]);
728         LinkTag linkTag = (LinkTag) node[0];
729         assertStringEquals(
730             "link text",
731             "Shoe bomber handed life sentence",
732             linkTag.getLinkText());
733         assertStringEquals(
734             "link url",
735             "/news/866201.asp?0sl=-32",
736             linkTag.getLink());
737     }
738 
739     /**
740      * Bug report by Cory Seefurth
741      * @throws Exception
742      */
743     public void _testLinkWithJSP() throws Exception  
744     {
745         createParser(
746             "<a HREF=\"<%=Application(\"sURL\")% "
747                 + ">/literature/index.htm\">Literature</a>");
748         parser.registerScanners();
749         parseAndAssertNodeCount(1);
750         assertType("should be link tag", LinkTag.class, node[0]);
751         LinkTag linkTag = (LinkTag) node[0];
752         assertStringEquals(
753             "expected link",
754             "<%=Application(\"sURL\")%>/literature/index.htm",
755             linkTag.getLink());
756     }
757 
758     public void testLinkScannerFilter() throws Exception  
759     {
760         LinkScanner linkScanner = new LinkScanner(LinkTag.LINK_TAG_FILTER);
761         assertEquals(
762             "linkscanner filter",
763             LinkTag.LINK_TAG_FILTER,
764             linkScanner.getFilter());
765     }
766 
767     public void testTagSymbolsInLinkText() throws Exception  
768     {
769         createParser(
770             "<a HREF=\"/cataclysm/Langy-AnEmpireReborn-Ch2.shtml#story\""
771                 + "><< An Empire Reborn: Chapter 2 <<</a>");
772         parser.registerScanners();
773         parseAndAssertNodeCount(1);
774         assertType("node", LinkTag.class, node[0]);
775         LinkTag linkTag = (LinkTag) node[0];
776         assertEquals(
777             "link text",
778             "<< An Empire Reborn: Chapter 2 <<",
779             linkTag.getLinkText());
780     }
781 }
782
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Free Books Free Magazines
Popular Tags