KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tests > utilTests > HTMLLinkProcessorTest


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/utilTests/HTMLLinkProcessorTest.java,v 1.2 2004/02/11 02:16:57 woolfel Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33 package org.htmlparser.tests.utilTests;
34 import org.htmlparser.tags.LinkTag;
35 import org.htmlparser.tests.ParserTestCase;
36 import org.htmlparser.util.LinkProcessor;
37 import org.htmlparser.util.ParserException;
38
39 public class HTMLLinkProcessorTest extends ParserTestCase
40 {
41     private LinkProcessor lp;
42
43     public HTMLLinkProcessorTest(String JavaDoc name)
44     {
45         super(name);
46     }
47
48     protected void setUp()
49     {
50         lp = new LinkProcessor();
51     }
52
53     public void testIsURL()
54     {
55         String JavaDoc resourceLoc1 = "http://someurl.com";
56         String JavaDoc resourceLoc2 = "myfilehttp.dat";
57         assertTrue(
58             resourceLoc1 + " should be a url",
59             LinkProcessor.isURL(resourceLoc1));
60         assertTrue(
61             resourceLoc2 + " should not be a url",
62             !LinkProcessor.isURL(resourceLoc2));
63         String JavaDoc resourceLoc3 =
64             "file://localhost/D:/java/jdk1.3/docs/api/overview-summary.html";
65         assertTrue(
66             resourceLoc3 + " should be a url",
67             LinkProcessor.isURL(resourceLoc3));
68
69     }
70
71     public void testFixSpaces()
72     {
73         String JavaDoc url =
74             "http://htmlparser.sourceforge.net/test/This is a Test Page.html";
75         String JavaDoc fixedURL = LinkProcessor.fixSpaces(url);
76         int index = fixedURL.indexOf(" ");
77         assertEquals(
78             "Expected",
79             "http://htmlparser.sourceforge.net/test/This%20is%20a%20Test%20Page.html",
80             fixedURL);
81     }
82
83     /**
84      * Reproduction of bug 673379 reported by Joe Robbins. Parser goes into
85      * infinte loop if the link has no slashes.
86      */

87     public void testLinkWithNoSlashes() throws Exception JavaDoc
88     {
89         createParser("<A HREF=\".foo.txt\">Foo</A>", "http://www.oygevalt.com");
90         parser.registerScanners();
91         parseAndAssertNodeCount(1);
92         assertTrue(node[0] instanceof LinkTag);
93         LinkTag linkTag = (LinkTag) node[0];
94         assertStringEquals(
95             "link",
96             "http://www.oygevalt.com/foo.txt",
97             linkTag.getLink());
98         assertEquals("link", "Foo", linkTag.getLinkText());
99     }
100     //
101
// Tests from Appendix C Examples of Resolving Relative URI References
102
// RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
103
// T. Berners-Lee et al.
104
// http://www.ietf.org/rfc/rfc2396.txt
105

106     // Within an object with a well-defined base URI of
107
static final String JavaDoc baseURI = "http://a/b/c/d;p?q";
108     // the relative URI would be resolved as follows:
109

110     // C.1. Normal Examples
111
// g:h = g:h
112
// g = http://a/b/c/g
113
// ./g = http://a/b/c/g
114
// g/ = http://a/b/c/g/
115
// /g = http://a/g
116
// //g = http://g
117
// ?y = http://a/b/c/?y
118
// g?y = http://a/b/c/g?y
119
// #s = (current document)#s
120
// g#s = http://a/b/c/g#s
121
// g?y#s = http://a/b/c/g?y#s
122
// ;x = http://a/b/c/;x
123
// g;x = http://a/b/c/g;x
124
// g;x?y#s = http://a/b/c/g;x?y#s
125
// . = http://a/b/c/
126
// ./ = http://a/b/c/
127
// .. = http://a/b/
128
// ../ = http://a/b/
129
// ../g = http://a/b/g
130
// ../.. = http://a/
131
// ../../ = http://a/
132
// ../../g = http://a/g
133

134     public void test1() throws ParserException
135     {
136         assertEquals(
137             "test1 failed",
138             "https:h",
139             (new LinkProcessor()).extract("https:h", baseURI));
140     }
141     public void test2() throws ParserException
142     {
143         assertEquals(
144             "test2 failed",
145             "http://a/b/c/g",
146             (new LinkProcessor()).extract("g", baseURI));
147     }
148     public void test3() throws ParserException
149     {
150         assertEquals(
151             "test3 failed",
152             "http://a/b/c/g",
153             (new LinkProcessor()).extract("./g", baseURI));
154     }
155     public void test4() throws ParserException
156     {
157         assertEquals(
158             "test4 failed",
159             "http://a/b/c/g/",
160             (new LinkProcessor()).extract("g/", baseURI));
161     }
162     public void test5() throws ParserException
163     {
164         assertEquals(
165             "test5 failed",
166             "http://a/g",
167             (new LinkProcessor()).extract("/g", baseURI));
168     }
169     public void test6() throws ParserException
170     {
171         assertEquals(
172             "test6 failed",
173             "http://g",
174             (new LinkProcessor()).extract("//g", baseURI));
175     }
176     public void test7() throws ParserException
177     {
178         assertEquals(
179             "test7 failed",
180             "http://a/b/c/?y",
181             (new LinkProcessor()).extract("?y", baseURI));
182     }
183     public void test8() throws ParserException
184     {
185         assertEquals(
186             "test8 failed",
187             "http://a/b/c/g?y",
188             (new LinkProcessor()).extract("g?y", baseURI));
189     }
190     public void test9() throws ParserException
191     {
192         assertEquals(
193             "test9 failed",
194             "https:h",
195             (new LinkProcessor()).extract("https:h", baseURI));
196     }
197     public void test10() throws ParserException
198     {
199         assertEquals(
200             "test10 failed",
201             "https:h",
202             (new LinkProcessor()).extract("https:h", baseURI));
203     }
204     // #s = (current document)#s
205
public void test11() throws ParserException
206     {
207         assertEquals(
208             "test11 failed",
209             "http://a/b/c/g#s",
210             (new LinkProcessor()).extract("g#s", baseURI));
211     }
212     public void test12() throws ParserException
213     {
214         assertEquals(
215             "test12 failed",
216             "http://a/b/c/g?y#s",
217             (new LinkProcessor()).extract("g?y#s", baseURI));
218     }
219     public void test13() throws ParserException
220     {
221         assertEquals(
222             "test13 failed",
223             "http://a/b/c/;x",
224             (new LinkProcessor()).extract(";x", baseURI));
225     }
226     public void test14() throws ParserException
227     {
228         assertEquals(
229             "test14 failed",
230             "http://a/b/c/g;x",
231             (new LinkProcessor()).extract("g;x", baseURI));
232     }
233     public void test15() throws ParserException
234     {
235         assertEquals(
236             "test15 failed",
237             "http://a/b/c/g;x?y#s",
238             (new LinkProcessor()).extract("g;x?y#s", baseURI));
239     }
240     public void test16() throws ParserException
241     {
242         assertEquals(
243             "test16 failed",
244             "http://a/b/c/",
245             (new LinkProcessor()).extract(".", baseURI));
246     }
247     public void test17() throws ParserException
248     {
249         assertEquals(
250             "test17 failed",
251             "http://a/b/c/",
252             (new LinkProcessor()).extract("./", baseURI));
253     }
254     public void test18() throws ParserException
255     {
256         assertEquals(
257             "test18 failed",
258             "http://a/b/",
259             (new LinkProcessor()).extract("..", baseURI));
260     }
261     public void test19() throws ParserException
262     {
263         assertEquals(
264             "test19 failed",
265             "http://a/b/",
266             (new LinkProcessor()).extract("../", baseURI));
267     }
268     public void test20() throws ParserException
269     {
270         assertEquals(
271             "test20 failed",
272             "http://a/b/g",
273             (new LinkProcessor()).extract("../g", baseURI));
274     }
275     public void test21() throws ParserException
276     {
277         assertEquals(
278             "test21 failed",
279             "http://a/",
280             (new LinkProcessor()).extract("../..", baseURI));
281     }
282     public void test22() throws ParserException
283     {
284         assertEquals(
285             "test22 failed",
286             "http://a/g",
287             (new LinkProcessor()).extract("../../g", baseURI));
288     }
289
290     // C.2. Abnormal Examples
291
// Although the following abnormal examples are unlikely to occur in
292
// normal practice, all URI parsers should be capable of resolving them
293
// consistently. Each example uses the same base as above.
294
//
295
// An empty reference refers to the start of the current document.
296
//
297
// <> = (current document)
298
//
299
// Parsers must be careful in handling the case where there are more
300
// relative path ".." segments than there are hierarchical levels in the
301
// base URI's path. Note that the ".." syntax cannot be used to change
302
// the authority component of a URI.
303
//
304
// ../../../g = http://a/../g
305
// ../../../../g = http://a/../../g
306
//
307
// In practice, some implementations strip leading relative symbolic
308
// elements (".", "..") after applying a relative URI calculation, based
309
// on the theory that compensating for obvious author errors is better
310
// than allowing the request to fail. Thus, the above two references
311
// will be interpreted as "http://a/g" by some implementations.
312
//
313
// Similarly, parsers must avoid treating "." and ".." as special when
314
// they are not complete components of a relative path.
315
//
316
// /./g = http://a/./g
317
// /../g = http://a/../g
318
// g. = http://a/b/c/g.
319
// .g = http://a/b/c/.g
320
// g.. = http://a/b/c/g..
321
// ..g = http://a/b/c/..g
322
//
323
// Less likely are cases where the relative URI uses unnecessary or
324
// nonsensical forms of the "." and ".." complete path segments.
325
//
326
// ./../g = http://a/b/g
327
// ./g/. = http://a/b/c/g/
328
// g/./h = http://a/b/c/g/h
329
// g/../h = http://a/b/c/h
330
// g;x=1/./y = http://a/b/c/g;x=1/y
331
// g;x=1/../y = http://a/b/c/y
332
//
333
// All client applications remove the query component from the base URI
334
// before resolving relative URI. However, some applications fail to
335
// separate the reference's query and/or fragment components from a
336
// relative path before merging it with the base path. This error is
337
// rarely noticed, since typical usage of a fragment never includes the
338
// hierarchy ("/") character, and the query component is not normally
339
// used within relative references.
340
//
341
// g?y/./x = http://a/b/c/g?y/./x
342
// g?y/../x = http://a/b/c/g?y/../x
343
// g#s/./x = http://a/b/c/g#s/./x
344
// g#s/../x = http://a/b/c/g#s/../x
345
//
346
// Some parsers allow the scheme name to be present in a relative URI if
347
// it is the same as the base URI scheme. This is considered to be a
348
// loophole in prior specifications of partial URI [RFC1630]. Its use
349
// should be avoided.
350
//
351
// http:g = http:g ; for validating parsers
352
// | http://a/b/c/g ; for backwards compatibility
353

354     // public void test23 () throws HTMLParserException
355
// {
356
// assertEquals ("test23 failed", "http://a/../g", (new HTMLLinkProcessor ()).extract ("../../../g", baseURI));
357
// }
358
// public void test24 () throws HTMLParserException
359
// {
360
// assertEquals ("test24 failed", "http://a/../../g", (new HTMLLinkProcessor ()).extract ("../../../../g", baseURI));
361
// }
362
public void test23() throws ParserException
363     {
364         assertEquals(
365             "test23 failed",
366             "http://a/g",
367             (new LinkProcessor()).extract("../../../g", baseURI));
368     }
369     public void test24() throws ParserException
370     {
371         assertEquals(
372             "test24 failed",
373             "http://a/g",
374             (new LinkProcessor()).extract("../../../../g", baseURI));
375     }
376     public void test25() throws ParserException
377     {
378         assertEquals(
379             "test25 failed",
380             "http://a/./g",
381             (new LinkProcessor()).extract("/./g", baseURI));
382     }
383     public void test26() throws ParserException
384     {
385         assertEquals(
386             "test26 failed",
387             "http://a/../g",
388             (new LinkProcessor()).extract("/../g", baseURI));
389     }
390     public void test27() throws ParserException
391     {
392         assertEquals(
393             "test27 failed",
394             "http://a/b/c/g.",
395             (new LinkProcessor()).extract("g.", baseURI));
396     }
397     public void test28() throws ParserException
398     {
399         assertEquals(
400             "test28 failed",
401             "http://a/b/c/.g",
402             (new LinkProcessor()).extract(".g", baseURI));
403     }
404     public void test29() throws ParserException
405     {
406         assertEquals(
407             "test29 failed",
408             "http://a/b/c/g..",
409             (new LinkProcessor()).extract("g..", baseURI));
410     }
411     public void test30() throws ParserException
412     {
413         assertEquals(
414             "test30 failed",
415             "http://a/b/c/..g",
416             (new LinkProcessor()).extract("..g", baseURI));
417     }
418     public void test31() throws ParserException
419     {
420         assertEquals(
421             "test31 failed",
422             "http://a/b/g",
423             (new LinkProcessor()).extract("./../g", baseURI));
424     }
425     public void test32() throws ParserException
426     {
427         assertEquals(
428             "test32 failed",
429             "http://a/b/c/g/",
430             (new LinkProcessor()).extract("./g/.", baseURI));
431     }
432     public void test33() throws ParserException
433     {
434         assertEquals(
435             "test33 failed",
436             "http://a/b/c/g/h",
437             (new LinkProcessor()).extract("g/./h", baseURI));
438     }
439     public void test34() throws ParserException
440     {
441         assertEquals(
442             "test34 failed",
443             "http://a/b/c/h",
444             (new LinkProcessor()).extract("g/../h", baseURI));
445     }
446     public void test35() throws ParserException
447     {
448         assertEquals(
449             "test35 failed",
450             "http://a/b/c/g;x=1/y",
451             (new LinkProcessor()).extract("g;x=1/./y", baseURI));
452     }
453     public void test36() throws ParserException
454     {
455         assertEquals(
456             "test36 failed",
457             "http://a/b/c/y",
458             (new LinkProcessor()).extract("g;x=1/../y", baseURI));
459     }
460     public void test37() throws ParserException
461     {
462         assertEquals(
463             "test37 failed",
464             "http://a/b/c/g?y/./x",
465             (new LinkProcessor()).extract("g?y/./x", baseURI));
466     }
467     public void test38() throws ParserException
468     {
469         assertEquals(
470             "test38 failed",
471             "http://a/b/c/g?y/../x",
472             (new LinkProcessor()).extract("g?y/../x", baseURI));
473     }
474     public void test39() throws ParserException
475     {
476         assertEquals(
477             "test39 failed",
478             "http://a/b/c/g#s/./x",
479             (new LinkProcessor()).extract("g#s/./x", baseURI));
480     }
481     public void test40() throws ParserException
482     {
483         assertEquals(
484             "test40 failed",
485             "http://a/b/c/g#s/../x",
486             (new LinkProcessor()).extract("g#s/../x", baseURI));
487     }
488     // public void test41 () throws HTMLParserException
489
// {
490
// assertEquals ("test41 failed", "http:g", (new HTMLLinkProcessor ()).extract ("http:g", baseURI));
491
// }
492
public void test41() throws ParserException
493     {
494         assertEquals(
495             "test41 failed",
496             "http://a/b/c/g",
497             (new LinkProcessor()).extract("http:g", baseURI));
498     }
499 }
500
Popular Tags