KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tests > tagTests > LinkTagTest


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/LinkTagTest.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/09/02 02:28:14 $
10
// $Revision: 1.52 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.tests.tagTests;
28
29 import org.htmlparser.Node;
30 import org.htmlparser.PrototypicalNodeFactory;
31 import org.htmlparser.Tag;
32 import org.htmlparser.Text;
33 import org.htmlparser.tags.HeadTag;
34 import org.htmlparser.tags.Html;
35 import org.htmlparser.tags.ImageTag;
36 import org.htmlparser.tags.LinkTag;
37 import org.htmlparser.tests.ParserTestCase;
38 import org.htmlparser.util.ParserException;
39 import org.htmlparser.util.SimpleNodeIterator;
40
41 public class LinkTagTest extends ParserTestCase {
42
43     static
44     {
45         System.setProperty ("org.htmlparser.tests.tagTests.LinkTagTest", "LinkTagTest");
46     }
47
48     public LinkTagTest(String JavaDoc name) {
49         super(name);
50     }
51
52     /**
53      * The bug being reproduced is this : <BR>
54      * &lt;BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR>
55      * vLink=#551a8b&gt;
56      * The above line is incorrectly parsed in that, the BODY tag is not identified.
57      * Creation date: (6/17/2001 4:01:06 PM)
58      */

59     public void testLinkNodeBug() throws ParserException
60     {
61         createParser("<A HREF=\"../test.html\">abcd</A>","http://www.google.com/test/index.html");
62         parseAndAssertNodeCount(1);
63         // The node should be an LinkTag
64
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
65         LinkTag linkNode = (LinkTag)node[0];
66         assertEquals("The image locn","http://www.google.com/test.html",linkNode.getLink());
67     }
68
69     /**
70      * The bug being reproduced is this : <BR>
71      * &lt;BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR>
72      * vLink=#551a8b&gt;
73      * The above line is incorrectly parsed in that, the BODY tag is not identified.
74      * Creation date: (6/17/2001 4:01:06 PM)
75      */

76     public void testLinkNodeBug2() throws ParserException
77     {
78         createParser("<A HREF=\"../../test.html\">abcd</A>","http://www.google.com/test/test/index.html");
79         parseAndAssertNodeCount(1);
80         // The node should be an LinkTag
81
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
82         LinkTag linkNode = (LinkTag)node[0];
83         assertEquals("The image locn","http://www.google.com/test.html",linkNode.getLink());
84     }
85
86     /**
87      * The bug being reproduced is this : <BR>
88      * When a url ends with a slash, and the link begins with a slash,the parser puts two slashes
89      * This bug was submitted by Roget Kjensrud
90      * Creation date: (6/17/2001 4:01:06 PM)
91      */

92     public void testLinkNodeBug3() throws ParserException
93     {
94         createParser("<A HREF=\"/mylink.html\">abcd</A>","http://www.cj.com/");
95         parseAndAssertNodeCount(1);
96         // The node should be an LinkTag
97
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
98         LinkTag linkNode = (LinkTag)node[0];
99         assertEquals("Link incorrect","http://www.cj.com/mylink.html",linkNode.getLink());
100     }
101
102     /**
103      * The bug being reproduced is this : <BR>
104      * Simple url without index.html, doesent get appended to link
105      * This bug was submitted by Roget Kjensrud
106      * Creation date: (6/17/2001 4:01:06 PM)
107      */

108     public void testLinkNodeBug4() throws ParserException
109     {
110         createParser("<A HREF=\"/mylink.html\">abcd</A>","http://www.cj.com");
111         parseAndAssertNodeCount(1);
112         // The node should be an LinkTag
113
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
114         LinkTag linkNode = (LinkTag)node[0];
115         assertEquals("Link incorrect!!","http://www.cj.com/mylink.html",linkNode.getLink());
116     }
117
118     public void testLinkNodeBug5() throws ParserException
119     {
120         String JavaDoc link1 = "http://note.kimo.com.tw/";
121         String JavaDoc link2 = "http://photo.kimo.com.tw/";
122         String JavaDoc link3 = "http://address.kimo.com.tw/";
123         createParser("<a HREF=" + link1 + ">���O</a>&nbsp; <a \n"+
124         "href=" + link2 + ">��ï</a>&nbsp; <a\n"+
125         "href=" + link3 + ">�q�T��</a>&nbsp;&nbsp;","http://www.cj.com");
126         parseAndAssertNodeCount(6);
127         assertTrue("Node should be a LinkTag",node[2] instanceof LinkTag);
128         LinkTag linkNode = (LinkTag)node[2];
129         assertStringEquals("Link incorrect!!",link2,linkNode.getLink());
130         assertTrue("Node should be a LinkTag",node[4] instanceof LinkTag);
131         LinkTag linkNode2 = (LinkTag)node[4];
132         assertStringEquals("Link incorrect!!",link3,linkNode2.getLink());
133     }
134
135     /**
136      * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner.
137      * Creation date: (7/1/2001 2:42:13 PM)
138      */

139     public void testLinkNodeBugNullPointerException() throws ParserException
140     {
141         createParser("<FORM action=http://search.yahoo.com/bin/search name=f><MAP name=m><AREA\n"+
142             "coords=0,0,52,52 HREF=\"http://www.yahoo.com/r/c1\" shape=RECT><AREA"+
143             "coords=53,0,121,52 HREF=\"http://www.yahoo.com/r/p1\" shape=RECT><AREA"+
144             "coords=122,0,191,52 HREF=\"http://www.yahoo.com/r/m1\" shape=RECT><AREA"+
145             "coords=441,0,510,52 HREF=\"http://www.yahoo.com/r/wn\" shape=RECT>","http://www.cj.com/");
146         parser.setNodeFactory (new PrototypicalNodeFactory (new LinkTag ()));
147         parseAndAssertNodeCount(6);
148     }
149
150     /**
151      * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner.
152      * Creation date: (7/1/2001 2:42:13 PM)
153      */

154     public void testLinkNodeMailtoBug() throws ParserException
155     {
156         createParser("<A HREF='mailto:somik@yahoo.com'>hello</A>","http://www.cj.com/");
157         parseAndAssertNodeCount(1);
158         assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
159         LinkTag linkNode = (LinkTag)node[0];
160         assertStringEquals("Link incorrect","somik@yahoo.com",linkNode.getLink());
161         assertEquals("Link Type",new Boolean JavaDoc(true),new Boolean JavaDoc(linkNode.isMailLink()));
162     }
163
164     /**
165      * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner.
166      * Creation date: (7/1/2001 2:42:13 PM)
167      */

168     public void testLinkNodeSingleQuoteBug() throws ParserException
169     {
170         createParser("<A HREF='abcd.html'>hello</A>","http://www.cj.com/");
171         parseAndAssertNodeCount(1);
172         assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
173         LinkTag linkNode = (LinkTag)node[0];
174         assertEquals("Link incorrect","http://www.cj.com/abcd.html",linkNode.getLink());
175     }
176
177     /**
178      * The bug being reproduced is this : <BR>
179      * &lt;BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR>
180      * vLink=#551a8b&gt;
181      * The above line is incorrectly parsed in that, the BODY tag is not identified.
182      * Creation date: (6/17/2001 4:01:06 PM)
183      */

184     public void testLinkTag() throws ParserException
185     {
186         createParser("<A HREF=\"test.html\">abcd</A>","http://www.google.com/test/index.html");
187         parseAndAssertNodeCount(1);
188         // The node should be an LinkTag
189
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
190         LinkTag LinkTag = (LinkTag)node[0];
191         assertEquals("The image locn","http://www.google.com/test/test.html",LinkTag.getLink());
192     }
193
194     /**
195      * The bug being reproduced is this : <BR>
196      * &lt;BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR>
197      * vLink=#551a8b&gt;
198      * The above line is incorrectly parsed in that, the BODY tag is not identified.
199      * Creation date: (6/17/2001 4:01:06 PM)
200      */

201     public void testLinkTagBug() throws ParserException
202     {
203         createParser("<A HREF=\"../test.html\">abcd</A>","http://www.google.com/test/index.html");
204         parseAndAssertNodeCount(1);
205         // The node should be an LinkTag
206
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
207         LinkTag LinkTag = (LinkTag)node[0];
208         assertEquals("The image locn","http://www.google.com/test.html",LinkTag.getLink());
209     }
210
211     /**
212      * The bug being reproduced is this : <BR>
213      * &lt;A HREF=&gt;Something&lt;A&gt;<BR>
214      * vLink=#551a8b&gt;
215      * The above line is incorrectly parsed in that, the BODY tag is not identified.
216      * Creation date: (6/17/2001 4:01:06 PM)
217      */

218     public void testNullTagBug() throws ParserException
219     {
220         createParser("<A HREF=>Something</A>","http://www.google.com/test/index.html");
221         parseAndAssertNodeCount(1);
222         // The node should be an LinkTag
223
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
224         LinkTag linkTag = (LinkTag)node[0];
225         assertEquals("The link location","",linkTag.getLink());
226         assertEquals("The link text","Something",linkTag.getLinkText());
227     }
228
229     public void testToPlainTextString() throws ParserException {
230         createParser("<A HREF='mailto:somik@yahoo.com'>hello</A>","http://www.cj.com/");
231         parseAndAssertNodeCount(1);
232         assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
233         LinkTag linkTag = (LinkTag)node[0];
234         assertEquals("Link Plain Text","hello",linkTag.toPlainTextString());
235     }
236
237     public void testToHTML() throws ParserException {
238         String JavaDoc link1 = "<A HREF='mailto:somik@yahoo.com'>hello</A>";
239         String JavaDoc link2 = "<a \n"+
240             "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"+
241             "nical.html\"> Journalism 3.0</a>";
242         createParser(link1 + "\n"+
243             "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font>" +
244             link2 + " by Rajesh Jain","http://www.cj.com/");
245         parser.setNodeFactory (new PrototypicalNodeFactory (new LinkTag ()));
246         parseAndAssertNodeCount(10);
247         assertTrue("First Node should be a LinkTag",node[0] instanceof LinkTag);
248         LinkTag linkTag = (LinkTag)node[0];
249         assertStringEquals("Link Raw Text",link1,linkTag.toHtml());
250         assertTrue("Ninth Node should be a LinkTag",node[8] instanceof LinkTag);
251         linkTag = (LinkTag)node[8];
252         assertStringEquals("Link Raw Text",link2,linkTag.toHtml());
253     }
254
255     public void testTypeHttps() throws ParserException
256     {
257         LinkTag link;
258
259         createParser ("<A HREF='https://www.someurl.com'>Try https.</A>","http://sourceforge.net");
260         parseAndAssertNodeCount (1);
261         assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
262         link = (LinkTag)node[0];
263         assertTrue("This is a https link",link.isHTTPSLink());
264     }
265
266     public void testTypeFtp() throws ParserException
267     {
268         LinkTag link;
269
270         createParser ("<A HREF='ftp://www.someurl.com'>Try ftp.</A>","http://sourceforge.net");
271         parseAndAssertNodeCount (1);
272         assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
273         link = (LinkTag)node[0];
274         assertTrue("This is an ftp link",link.isFTPLink());
275     }
276
277     public void testTypeJavaScript() throws ParserException
278     {
279         LinkTag link;
280
281         createParser ("<A HREF='javascript://www.someurl.com'>Try javascript.</A>","http://sourceforge.net");
282         parseAndAssertNodeCount (1);
283         assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
284         link = (LinkTag)node[0];
285         assertTrue("This is a javascript link",link.isJavascriptLink());
286     }
287
288     public void testTypeHttpLink() throws ParserException
289     {
290         LinkTag link;
291
292         createParser ("<A HREF='http://www.someurl.com'>Try http.</A>","http://sourceforge.net");
293         parseAndAssertNodeCount (1);
294         assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
295         link = (LinkTag)node[0];
296         assertTrue("This is a http link : "+link.getLink(),link.isHTTPLink());
297     }
298
299     public void testRelativeTypeHttpLink() throws ParserException
300     {
301         LinkTag link;
302
303         createParser ("<A HREF='somePage.html'>Try relative http.</A>","http://sourceforge.net");
304         parseAndAssertNodeCount (1);
305         assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
306         link = (LinkTag)node[0];
307         assertTrue("This relative link is also a http link : "+link.getLink(),link.isHTTPLink());
308     }
309     
310     public void testTypeNonHttp() throws ParserException
311     {
312         LinkTag link;
313
314         createParser ("<A HREF='ftp://www.someurl.com'>Try non-http.</A>","http://sourceforge.net");
315         parseAndAssertNodeCount (1);
316         assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
317         link = (LinkTag)node[0];
318         assertTrue("This is not a http link : "+link.getLink(),!link.isHTTPLink());
319     }
320
321     public void testTypeHttpLikeLink() throws ParserException
322     {
323         LinkTag link;
324
325         createParser ("<A HREF='http://'>Try basic http.</A>","http://sourceforge.net");
326         parseAndAssertNodeCount (1);
327         assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
328         link = (LinkTag)node[0];
329         assertTrue("This is a http link",link.isHTTPLikeLink());
330         
331         createParser ("<A HREF='https://www.someurl.com'>Try https.</A>","http://sourceforge.net");
332         parseAndAssertNodeCount (1);
333         assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
334         link = (LinkTag)node[0];
335         assertTrue("This is a https link",link.isHTTPLikeLink());
336     }
337
338     /**
339      * Test mail link.
340      * Bug #738504 MailLink != HTTPLink
341      */

342     public void testMailToIsNotAHTTPLink () throws ParserException
343     {
344         LinkTag link;
345
346         createParser ("<A HREF='mailto:derrickoswald@users.sourceforge.net'>Derrick</A>","http://sourceforge.net");
347         parseAndAssertNodeCount (1);
348         assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
349         link = (LinkTag)node[0];
350         assertTrue ("bug #738504 MailLink != HTTPLink", !link.isHTTPLink ());
351         assertTrue ("bug #738504 MailLink != HTTPSLink", !link.isHTTPSLink ());
352     }
353
354     /**
355      * Bug #784767 irc://server/channel urls are HTTPLike?
356      */

357     public void testIrcIsNotAHTTPLink () throws ParserException
358     {
359         LinkTag link;
360
361         createParser ("<A HREF='irc://server/channel'>Try irc.</A>","http://sourceforge.net");
362         parseAndAssertNodeCount (1);
363         assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
364         link = (LinkTag)node[0];
365         assertTrue("This is not a http link", !link.isHTTPLikeLink ());
366     }
367
368     public void testAccessKey() throws ParserException {
369         createParser("<a HREF=\"http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph\" accessKey=1>Click Here</A>");
370         parseAndAssertNodeCount(1);
371         assertTrue("The node should be a link tag",node[0] instanceof LinkTag);
372         LinkTag linkTag = (LinkTag)node[0];
373         assertEquals("Link URL of link tag","http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph",linkTag.getLink());
374         assertEquals("Link Text of link tag","Click Here",linkTag.getLinkText());
375         assertEquals("Access key","1",linkTag.getAccessKey());
376     }
377
378     public void testErroneousLinkBug() throws ParserException {
379         createParser(
380             "<p>Site Comments?<br>" +
381                 "<a HREF=\"mailto:sam@neurogrid.com?subject=Site Comments\">" +
382                     "Mail Us" +
383                 "<a>" +
384             "</p>"
385         );
386         parseAndAssertNodeCount(6);
387         // The first node should be a Tag
388
assertTrue("First node should be a Tag",node[0] instanceof Tag);
389         // The second node should be a Text
390
assertTrue("Second node should be a Text",node[1] instanceof Text);
391         Text stringNode = (Text)node[1];
392         assertEquals("Text of the Text","Site Comments?",stringNode.getText());
393         assertTrue("Third node should be a tag",node[2] instanceof Tag);
394         assertTrue("Fourth node should be a link",node[3] instanceof LinkTag);
395         // LinkScanner.evaluate() says no HREF means it isn't a link:
396
assertTrue("Fifth node should be a tag",node[4] instanceof Tag);
397         assertTrue("Sixth node should be a tag",node[5] instanceof Tag);
398     }
399
400     /**
401      * Test case based on a report by Raghavender Srimantula, of the parser giving out of memory exceptions. Found to occur
402      * on the following piece of html
403      * <pre>
404      * <a HREF=s/8741><img SRC="http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif" height=16 width=16 border=0></img></td><td nowrap> &nbsp;
405      * <a HREF=s/7509>
406      * </pre>
407      */

408     public void testErroneousLinkBugFromYahoo2() throws ParserException {
409         String JavaDoc link = "<a HREF=s/8741>" +
410                 "<img SRC=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" height=16 width=16 border=0>";
411         createParser(
412             "<td>" +
413                 link +
414             "</td>" +
415             "<td nowrap> &nbsp;\n"+
416                 "<a HREF=s/7509><b>Yahoo! Movies</b></a>" +
417             "</td>","http://www.yahoo.com");
418         Node linkNodes [] = parser.extractAllNodesThatAre(LinkTag.class);
419
420         assertEquals("number of links",2,linkNodes.length);
421         LinkTag linkTag = (LinkTag)linkNodes[0];
422         assertStringEquals("Link","http://www.yahoo.com/s/8741",linkTag.getLink());
423         // Verify the link data
424
assertStringEquals("Link Text","",linkTag.getLinkText());
425         // Verify the reconstruction html
426
assertStringEquals("toHTML",link + "</a>",linkTag.toHtml());
427     }
428
429     /**
430      * Test case based on a report by Raghavender Srimantula, of the parser giving out of memory exceptions. Found to occur
431      * on the following piece of html
432      * <pre>
433      * <a HREF=s/8741><img SRC="http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif" height=16 width=16 border=0></img>This is test
434      * <a HREF=s/7509>
435      * </pre>
436      */

437     public void testErroneousLinkBugFromYahoo() throws ParserException {
438         String JavaDoc link =
439             "<a HREF=s/8741>" +
440                 "<img SRC=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" " +
441                      "height=16 " +
442                      "width=16 " +
443                      "border=0>" +
444                 "This is a test\n";
445         createParser(
446                 link +
447                 "<a HREF=s/7509>" +
448                     "<b>Yahoo! Movies</b>" +
449                 "</a>",
450             "http://www.yahoo.com"
451         );
452         parseAndAssertNodeCount(2);
453         assertTrue("First node should be a LinkTag",node[0] instanceof LinkTag);
454         assertTrue("Second node should be a LinkTag",node[1] instanceof LinkTag);
455         LinkTag linkTag = (LinkTag)node[0];
456         assertEquals("Link","http://www.yahoo.com/s/8741",linkTag.getLink());
457         // Verify the link data
458
assertEquals("Link Text","This is a test\n",linkTag.getLinkText());
459         // Verify the reconstruction html
460
assertStringEquals("toHTML()",link + "</a>",linkTag.toHtml());
461     }
462
463     /**
464      * This is the reproduction of a bug which produces multiple text copies.
465      */

466     public void testExtractLinkInvertedCommasBug2() throws ParserException
467     {
468         createParser("<a HREF=\"http://cbc.ca/artsCanada/stories/greatnorth271202\" class=\"lgblacku\">Vancouver schools plan 'Great Northern Way'</a>");
469         parseAndAssertNodeCount(1);
470         assertTrue("The node should be a link tag",node[0] instanceof LinkTag);
471         LinkTag linkTag = (LinkTag)node[0];
472         assertStringEquals("Extracted Text","Vancouver schools plan 'Great Northern Way'", linkTag.getLinkText ());
473     }
474
475     /**
476      * Bug pointed out by Sam Joseph (sam@neurogrid.net)
477      * Links with spaces in them will get their spaces absorbed
478      */

479     public void testLinkSpacesBug() throws ParserException{
480         createParser("<a HREF=\"http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph\">Click Here</A>");
481         parseAndAssertNodeCount(1);
482         assertTrue("The node should be a link tag",node[0] instanceof LinkTag);
483         LinkTag linkTag = (LinkTag)node[0];
484         assertEquals("Link URL of link tag","http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph",linkTag.getLink());
485         assertEquals("Link Text of link tag","Click Here",linkTag.getLinkText());
486     }
487
488     /**
489      * Bug reported by Raj Sharma,5-Apr-2002, upon parsing
490      * http://www.samachar.com, the entire page could not be picked up.
491      * The problem was occurring after parsing a particular link
492      * after which the parsing would not proceed. This link was spread over three lines.
493      * The bug has been reproduced and fixed.
494      */

495     public void testMultipleLineBug() throws ParserException {
496         createParser("<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n"+
497         "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"+
498         "nical.html\"> Journalism 3.0</a> by Rajesh Jain");
499         parser.setNodeFactory (new PrototypicalNodeFactory (new LinkTag ()));
500         parseAndAssertNodeCount(8);
501         assertTrue("Seventh node should be a link tag",node[6] instanceof LinkTag);
502         LinkTag linkTag = (LinkTag)node[6];
503         String JavaDoc exp = new String JavaDoc("http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/technical.html");
504         //assertEquals("Length of link tag",exp.length(), linkTag.getLink().length());
505
assertStringEquals("Link URL of link tag",exp,linkTag.getLink());
506         assertEquals("Link Text of link tag"," Journalism 3.0",linkTag.getLinkText());
507         assertTrue("Eight node should be a string node",node[7] instanceof Text);
508         Text stringNode = (Text)node[7];
509         assertEquals("String node contents"," by Rajesh Jain",stringNode.getText());
510     }
511
512     public void testRelativeLinkScan() throws ParserException {
513         createParser("<A HREF=\"mytest.html\"> Hello World</A>","http://www.yahoo.com");
514         parseAndAssertNodeCount(1);
515         assertTrue("Node identified should be HTMLLinkTag",node[0] instanceof LinkTag);
516         LinkTag linkTag = (LinkTag)node[0];
517         assertEquals("Expected Link","http://www.yahoo.com/mytest.html",linkTag.getLink());
518     }
519
520     public void testRelativeLinkScan2() throws ParserException {
521         createParser("<A HREF=\"abc/def/mytest.html\"> Hello World</A>","http://www.yahoo.com");
522         parseAndAssertNodeCount(1);
523         assertTrue("Node identified should be HTMLLinkTag",node[0] instanceof LinkTag);
524         LinkTag linkTag = (LinkTag)node[0];
525         assertStringEquals("Expected Link","http://www.yahoo.com/abc/def/mytest.html",linkTag.getLink());
526     }
527
528     public void testRelativeLinkScan3() throws ParserException {
529         createParser("<A HREF=\"../abc/def/mytest.html\"> Hello World</A>","http://www.yahoo.com/ghi");
530         parseAndAssertNodeCount(1);
531         assertTrue("Node identified should be HTMLLinkTag",node[0] instanceof LinkTag);
532         LinkTag linkTag = (LinkTag)node[0];
533         assertStringEquals("Expected Link","http://www.yahoo.com/abc/def/mytest.html",linkTag.getLink());
534     }
535
536     /**
537      * Test scan with data which is of diff nodes type
538      */

539     public void testScan() throws ParserException
540     {
541         createParser("<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>","http://www.yahoo.com");
542         parser.setNodeFactory (
543             new PrototypicalNodeFactory (
544                 new Tag[] {
545                     new LinkTag (),
546                     new ImageTag (),
547                 }));
548         parseAndAssertNodeCount(1);
549         assertTrue("Node should be a link node",node[0] instanceof LinkTag);
550
551         LinkTag linkTag = (LinkTag)node[0];
552         // Get the link data and cross-check
553
Node [] dataNode= new Node[10];
554         int i = 0;
555         for (SimpleNodeIterator e = linkTag.children();e.hasMoreNodes();)
556         {
557             dataNode[i++] = e.nextNode();
558         }
559         assertEquals("Number of data nodes",new Integer JavaDoc(2),new Integer JavaDoc(i));
560         assertTrue("First data node should be an Image Node",dataNode[0] instanceof ImageTag);
561         assertTrue("Second data node shouls be a String Node",dataNode[1] instanceof Text);
562
563         // Check the contents of each data node
564
ImageTag imageTag = (ImageTag)dataNode[0];
565         assertEquals("Image URL","http://www.yahoo.com/abcd.jpg",imageTag.getImageURL());
566         Text stringNode = (Text)dataNode[1];
567         assertEquals("String Contents","Hello World",stringNode.getText());
568     }
569
570     /**
571      * A bug in the freshmeat page - really bad html
572      * tag - &lt;A&gt;Revision&lt;\a&gt;
573      * Reported by Mazlan Mat
574      * Note: Actually, this is completely legal HTML - Derrick
575      */

576     public void testFreshMeatBug() throws ParserException
577     {
578         String JavaDoc html = "<a>Revision</a>";
579         createParser(html,"http://www.yahoo.com");
580         parseAndAssertNodeCount(1);
581         assertTrue("Node 0 should be a tag",node[0] instanceof Tag);
582         Tag tag = (Tag)node[0];
583         assertEquals("Tag Contents",html,tag.toHtml());
584         assertEquals("Node 0 should have one child", 1, tag.getChildren ().size ());
585         assertTrue("The child should be a string node", tag.getChildren ().elementAt (0) instanceof Text);
586         Text stringNode = (Text)tag.getChildren ().elementAt (0);
587         assertEquals("Text Contents","Revision",stringNode.getText());
588     }
589
590     /**
591      * Test suggested by Cedric Rosa
592      * A really bad link tag sends parser into infinite loop
593      */

594     public void testBrokenLink() throws ParserException {
595         createParser(
596             "<a HREF=\"faq.html\">" +
597                 "<br>\n"+
598                 "<img SRC=\"images/46revues.gif\" " +
599                      "width=\"100\" " +
600                      "height=\"46\" " +
601                      "border=\"0\" " +
602                      "alt=\"Rejoignez revues.org!\" " +
603                      "align=\"middle\">",
604             "http://www.yahoo.com"
605         );
606         parseAndAssertNodeCount(1);
607         assertTrue("Node 0 should be a link tag",node[0] instanceof LinkTag);
608         LinkTag linkTag = (LinkTag)node[0];
609         assertNotNull(linkTag.toString());
610     }
611
612     public void testLinkDataContents() throws ParserException {
613         createParser("<a HREF=\"http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689\" target=\"_new\"><img SRC=\"http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif\" width=468 height=60 border=\"0\" alt=\"See Signs in Theaters 8-2 - Starring Mel Gibson\" align=><font face=\"verdana,arial,helvetica\" SIZE=\"1\"><b></b></font></a>","http://transfer.go.com");
614         parser.setNodeFactory (
615             new PrototypicalNodeFactory (
616                 new Tag[] {
617                     new LinkTag (),
618                     new ImageTag (),
619                 }));
620         parseAndAssertNodeCount(1);
621         assertTrue("Node 0 should be a link tag",node[0] instanceof LinkTag);
622         LinkTag linkTag = (LinkTag)node[0];
623         assertEquals("Link URL","http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689",linkTag.getLink());
624         assertEquals("Link Text","",linkTag.getLinkText());
625         Node [] containedNodes = new Node[10];
626         int i=0;
627         for (SimpleNodeIterator e = linkTag.children();e.hasMoreNodes();) {
628             containedNodes[i++] = e.nextNode();
629         }
630         assertEquals("There should be 5 contained nodes in the link tag",5,i);
631         assertTrue("First contained node should be an image tag",containedNodes[0] instanceof ImageTag);
632         ImageTag imageTag = (ImageTag)containedNodes[0];
633         assertEquals("Image Location","http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif",imageTag.getImageURL());
634         assertEquals("Image Height","60",imageTag.getAttribute("HEIGHT"));
635         assertEquals("Image Width","468",imageTag.getAttribute("WIDTH"));
636         assertEquals("Image Border","0",imageTag.getAttribute("BORDER"));
637         assertEquals("Image Alt","See Signs in Theaters 8-2 - Starring Mel Gibson",imageTag.getAttribute("ALT"));
638         assertTrue("Second contained node should be Tag",containedNodes[1] instanceof Tag);
639         Tag tag1 = (Tag)containedNodes[1];
640         assertEquals("Tag Contents","font face=\"verdana,arial,helvetica\" SIZE=\"1\"",tag1.getText());
641         assertTrue("Third contained node should be Tag",containedNodes[2] instanceof Tag);
642         Tag tag2 = (Tag)containedNodes[2];
643         assertEquals("Tag Contents","b",tag2.getText());
644         assertTrue("Fourth contained node should be a Tag",containedNodes[3] instanceof Tag);
645         Tag tag = (Tag)containedNodes[3];
646         assertTrue("Fourth contained node should be an EndTag",tag.isEndTag ());
647         assertEquals("Fourth Tag contents","/b",tag.getText());
648         assertTrue("Fifth contained node should be a Tag",containedNodes[4] instanceof Tag);
649         tag = (Tag)containedNodes[4];
650         assertTrue("Fifth contained node should be an EndTag",tag.isEndTag ());
651         assertEquals("Fifth Tag contents","/font",tag.getText());
652
653     }
654
655     public void testBaseRefLink() throws ParserException {
656         createParser("<html>\n"+
657             "<head>\n"+
658             "<TITLE>test page</TITLE>\n"+
659             "<BASE HREF=\"http://www.abc.com/\">\n"+
660             "<a HREF=\"home.cfm\">Home</a>\n"+
661             "...\n"+
662             "</html>","http://transfer.go.com");
663         parseAndAssertNodeCount(1);
664         assertTrue("Node 1 should be a HTML tag", node[0] instanceof Html);
665         Html html = (Html)node[0];
666         assertTrue("Html tag should have 2 children", 2 == html.getChildCount ());
667         assertTrue("Html 2nd child should be HEAD tag", html.getChild (1) instanceof HeadTag);
668         HeadTag head = (HeadTag)html.getChild (1);
669         assertTrue("Head tag should have 7 children", 7 == head.getChildCount ());
670         assertTrue("Head 6th child should be a link tag", head.getChild (5) instanceof LinkTag);
671         LinkTag linkTag = (LinkTag)head.getChild (5);
672         assertEquals("Resolved Link","http://www.abc.com/home.cfm",linkTag.getLink());
673         assertEquals("Resolved Link Text","Home",linkTag.getLinkText());
674     }
675
676     /**
677      * This is a reproduction of bug 617228, reported by
678      * Stephen J. Harrington. When faced with a link like :
679      * &lt;A
680      * HREF="../../../../../cgi-bin/view_search?query_text=postdate&gt;20020701&txt_clr=White&bg_clr=Red&url=http://loc
681      * al
682      * host/Testing/Report
683      * 1.html"&gt;20020702 Report 1&lt;/A&gt;
684      *
685      * parser is unable to handle the link correctly due to the greater than
686      * symbol being confused to be the end of the tag.
687      */

688     public void testQueryLink() throws ParserException {
689         createParser("<A \n"+
690         "HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>","http://transfer.go.com");
691         parseAndAssertNodeCount(1);
692         assertTrue("Node 1 should be a link tag",node[0] instanceof LinkTag);
693         LinkTag linkTag = (LinkTag)node[0];
694         assertStringEquals("Resolved Link","http://transfer.go.com/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html",linkTag.getLink());
695         assertEquals("Resolved Link Text","20020702 Report 1",linkTag.getLinkText());
696
697     }
698
699     public void testNotMailtoLink() throws ParserException {
700         createParser("<A HREF=\"mailto.html\">not@for.real</A>","http://www.cj.com/");
701         parseAndAssertNodeCount(1);
702         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
703         LinkTag linkTag = (LinkTag) node[0];
704
705         assertEquals("Link Plain Text", "not@for.real", linkTag.toPlainTextString());
706         assertTrue("Link is not a mail link", !linkTag.isMailLink());
707     }
708
709     public void testMailtoLink() throws ParserException {
710         createParser("<A HREF=\"mailto:this@is.real\">this@is.real</A>","http://www.cj.com/");
711         parseAndAssertNodeCount(1);
712         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
713         LinkTag linkTag = (LinkTag) node[0];
714         assertEquals("Link Plain Text", "this@is.real", linkTag.toPlainTextString());
715         assertTrue("Link is a mail link", linkTag.isMailLink());
716     }
717
718     public void testJavascriptLink() throws ParserException {
719         createParser("<A HREF=\"javascript:alert('hello');\">say hello</A>","http://www.cj.com/");
720         parseAndAssertNodeCount(1);
721         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
722         LinkTag linkTag = (LinkTag) node[0];
723
724         assertEquals("Link Plain Text", "say hello", linkTag.toPlainTextString());
725         assertTrue("Link is a Javascript command", linkTag.isJavascriptLink());
726     }
727
728     public void testNotJavascriptLink() throws ParserException {
729         createParser("<A HREF=\"javascript_not.html\">say hello</A>","http://www.cj.com/");
730         parseAndAssertNodeCount(1);
731         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
732         LinkTag linkTag = (LinkTag) node[0];
733
734         assertEquals("Link Plain Text", "say hello", linkTag.toPlainTextString());
735         assertTrue("Link is not a Javascript command", !linkTag.isJavascriptLink());
736     }
737
738     public void testFTPLink() throws ParserException {
739         createParser("<A HREF=\"ftp://some.where.it\">my ftp</A>","http://www.cj.com/");
740         parseAndAssertNodeCount(1);
741         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
742         LinkTag linkTag = (LinkTag) node[0];
743
744         assertEquals("Link Plain Text", "my ftp", linkTag.toPlainTextString());
745         assertTrue("Link is a FTP site", linkTag.isFTPLink());
746     }
747
748     public void testNotFTPLink() throws ParserException {
749         createParser("<A HREF=\"ftp.html\">my ftp</A>","http://www.cj.com/");
750         parseAndAssertNodeCount(1);
751         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
752         LinkTag linkTag = (LinkTag) node[0];
753
754         assertEquals("Link Plain Text", "my ftp", linkTag.toPlainTextString());
755         assertTrue("Link is not a FTP site", !linkTag.isFTPLink());
756     }
757
758     public void testRelativeLinkNotHTMLBug() throws ParserException {
759         createParser("<A HREF=\"newpage.html\">New Page</A>","http://www.mysite.com/books/some.asp");
760         parseAndAssertNodeCount(1);
761         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
762         LinkTag linkTag = (LinkTag) node[0];
763         assertEquals("Link","http://www.mysite.com/books/newpage.html",linkTag.getLink());
764     }
765
766     public void testBadImageInLinkBug() throws ParserException {
767         createParser("<a HREF=\"registration.asp?EventID=1272\"><img border=\"0\" SRC=\"\\images\\register.gif\"</a>","http://www.fedpage.com/Event.asp?EventID=1272");
768         parseAndAssertNodeCount(1);
769         assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
770         LinkTag linkTag = (LinkTag) node[0];
771         // Get the image tag from the link
772

773         Node insideNodes [] = new Node[10];
774         int j =0 ;
775         for (SimpleNodeIterator e = linkTag.children();e.hasMoreNodes();) {
776             insideNodes[j++]= e.nextNode();
777         }
778         assertEquals("Number of contained internal nodes",1,j);
779         assertTrue(insideNodes[0] instanceof ImageTag);
780         ImageTag imageTag = (ImageTag)insideNodes[0];
781         assertEquals("Image Tag Location","http://www.fedpage.com/images\\register.gif",imageTag.getImageURL());
782     }
783
784     /**
785      * This is an attempt to reproduce bug 677874
786      * reported by James Moliere. A link tag of the form
787      * <code>
788      * <a class=rlbA HREF=/news/866201.asp?0sl=-
789      * 32>Shoe bomber handed life sentence</a>
790      * </code>
791      * is not parsed correctly. The second '=' sign in the link causes
792      * the parser to treat it as a seperate attribute
793      */

794     public void testLinkContainsEqualTo() throws Exception JavaDoc {
795         createParser(
796             "<a class=rlbA HREF=/news/866201.asp?0sl=-" +
797             "32>Shoe bomber handed life sentence</a>"
798         );
799         parseAndAssertNodeCount(1);
800         assertType("node type",LinkTag.class,node[0]);
801         LinkTag linkTag = (LinkTag)node[0];
802         assertStringEquals(
803             "link text",
804             "Shoe bomber handed life sentence",
805             linkTag.getLinkText()
806         );
807         assertStringEquals(
808             "link url",
809             "/news/866201.asp?0sl=-32",
810             linkTag.getLink()
811         );
812     }
813
814     /**
815      * Bug report by Cory Seefurth
816      * @throws Exception
817      */

818     public void _testLinkWithJSP() throws Exception JavaDoc {
819         createParser(
820             "<a HREF=\"<%=Application(\"sURL\")% " +
821             ">/literature/index.htm\">Literature</a>"
822         );
823         parseAndAssertNodeCount(1);
824         assertType("should be link tag",LinkTag.class,node[0]);
825         LinkTag linkTag = (LinkTag)node[0];
826         assertStringEquals("expected link","<%=Application(\"sURL\")%>/literature/index.htm",linkTag.getLink());
827     }
828
829     public void testTagSymbolsInLinkText() throws Exception JavaDoc {
830         createParser(
831             "<a HREF=\"/cataclysm/Langy-AnEmpireReborn-Ch2.shtml#story\"" +
832             "><< An Empire Reborn: Chapter 2 <<</a>"
833         );
834         parseAndAssertNodeCount(1);
835         assertType("node",LinkTag.class, node[0]);
836         LinkTag linkTag = (LinkTag)node[0];
837         assertEquals("link text","<< An Empire Reborn: Chapter 2 <<",linkTag.getLinkText());
838     }
839
840     /**
841      * See bug #813838 links not parsed correctly
842      */

843     public void testPlainText() throws Exception JavaDoc
844     {
845         String JavaDoc html = "<a HREF=Cities/><b>Cities</b></a>";
846         createParser (html);
847         parseAndAssertNodeCount (1);
848         assertType("node", LinkTag.class, node[0]);
849         LinkTag linkTag = (LinkTag)node[0];
850         assertEquals ("plain text", "Cities", linkTag.toPlainTextString ());
851     }
852
853     /**
854      * See bug #982175 False Positives on &reg; entity
855      */

856     public void testCharacterReferenceInLink() throws Exception JavaDoc
857     {
858         String JavaDoc html = "<a HREF=\"http://www.someplace.com/somepage.html?&region=us\">Search By Region</a>" +
859             "<a HREF=\"http://www.someplace.com/somepage.html?&region=&destination=184\">Search by Destination</a>";
860         createParser (html);
861         parseAndAssertNodeCount (2);
862         assertType("node", LinkTag.class, node[0]);
863         LinkTag linkTag = (LinkTag)node[0];
864         assertEquals ("link", "http://www.someplace.com/somepage.html?&region=us", linkTag.getLink());
865         assertType("node", LinkTag.class, node[1]);
866         linkTag = (LinkTag)node[1];
867         assertEquals ("link", "http://www.someplace.com/somepage.html?&region=&destination=184", linkTag.getLink());
868     }
869     
870 }
871
Popular Tags