KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tests > scannersTests > ImageScannerTest


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/scannersTests/ImageScannerTest.java,v 1.2 2004/02/11 02:16:58 woolfel Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33 package org.htmlparser.tests.scannersTests;
34 import org.htmlparser.Node;
35 import org.htmlparser.Parser;
36 import org.htmlparser.scanners.ImageScanner;
37 import org.htmlparser.scanners.TableScanner;
38 import org.htmlparser.tags.ImageTag;
39 import org.htmlparser.tags.LinkTag;
40 import org.htmlparser.tags.TableColumn;
41 import org.htmlparser.tags.TableRow;
42 import org.htmlparser.tags.Tag;
43 import org.htmlparser.tags.data.TagData;
44 import org.htmlparser.tests.ParserTestCase;
45 import org.htmlparser.util.LinkProcessor;
46 import org.htmlparser.util.NodeIterator;
47 import org.htmlparser.util.ParserException;
48
49 public class ImageScannerTest extends ParserTestCase
50 {
51
52     public ImageScannerTest(String JavaDoc name)
53     {
54         super(name);
55     }
56
57     public void testDynamicRelativeImageScan() throws ParserException
58     {
59         createParser(
60             "<IMG SRC=\"../abc/def/mypic.jpg\">",
61             "http://www.yahoo.com/ghi?abcdefg");
62         // Register the image scanner
63
parser.addScanner(new ImageScanner("-i", new LinkProcessor()));
64         parseAndAssertNodeCount(1);
65         assertTrue(
66             "Node identified should be HTMLImageTag",
67             node[0] instanceof ImageTag);
68         ImageTag imageTag = (ImageTag) node[0];
69         assertEquals(
70             "Expected Link",
71             "http://www.yahoo.com/abc/def/mypic.jpg",
72             imageTag.getImageURL());
73     }
74
75     public void testEvaluate()
76     {
77         ImageScanner scanner = new ImageScanner("-i", new LinkProcessor());
78         boolean retVal = scanner.evaluate(" img ", null);
79         assertEquals(
80             "Evaluation of IMG tag",
81             new Boolean JavaDoc(true),
82             new Boolean JavaDoc(retVal));
83     }
84
85     /**
86      * This is the reproduction of a bug which causes a null pointer exception
87      */

88     public void testExtractImageLocnInvertedCommasBug() throws ParserException
89     {
90         Tag tag =
91             new Tag(
92                 new TagData(
93                     0,
94                     0,
95                     "img width=638 height=53 border=0 usemap=\"#m\" SRC=http://us.a1.yimg.com/us.yimg.com/i/ww/m5v5.gif alt=Yahoo",
96                     ""));
97         String JavaDoc link =
98             "img width=638 height=53 border=0 usemap=\"#m\" SRC=http://us.a1.yimg.com/us.yimg.com/i/ww/m5v5.gif alt=Yahoo";
99         String JavaDoc url = "c:\\cvs\\html\\binaries\\yahoo.htm";
100         ImageScanner scanner = new ImageScanner("-i", new LinkProcessor());
101         assertEquals(
102             "Extracted Image Locn",
103             "http://us.a1.yimg.com/us.yimg.com/i/ww/m5v5.gif",
104             scanner.extractImageLocn(tag, url));
105     }
106
107     /**
108      * This test has been improved to check for params
109      * in the image tag, based on requirement by Annette Doyle.
110      * Thereby an important bug was detected.
111      */

112     public void testPlaceHolderImageScan() throws ParserException
113     {
114         createParser(
115             "<IMG width=1 height=1 alt=\"a\">",
116             "http://www.yahoo.com/ghi?abcdefg");
117
118         // Register the image scanner
119
parser.addScanner(new ImageScanner("-i", new LinkProcessor()));
120         parseAndAssertNodeCount(1);
121         assertTrue(
122             "Node identified should be HTMLImageTag",
123             node[0] instanceof ImageTag);
124         ImageTag imageTag = (ImageTag) node[0];
125         assertEquals("Expected Image Locn", "", imageTag.getImageURL());
126         assertEquals("Image width", "1", imageTag.getAttribute("WIDTH"));
127         assertEquals("Image height", "1", imageTag.getAttribute("HEIGHT"));
128         assertEquals("alt", "a", imageTag.getAttribute("ALT"));
129     }
130
131     public void testRelativeImageScan() throws ParserException
132     {
133         createParser("<IMG SRC=\"mypic.jpg\">", "http://www.yahoo.com");
134
135         // Register the image scanner
136
parser.addScanner(new ImageScanner("-i", new LinkProcessor()));
137         parseAndAssertNodeCount(1);
138         assertTrue(
139             "Node identified should be HTMLImageTag",
140             node[0] instanceof ImageTag);
141         ImageTag imageTag = (ImageTag) node[0];
142         assertEquals(
143             "Expected Link",
144             "http://www.yahoo.com/mypic.jpg",
145             imageTag.getImageURL());
146     }
147
148     public void testRelativeImageScan2() throws ParserException
149     {
150         createParser("<IMG SRC=\"abc/def/mypic.jpg\">", "http://www.yahoo.com");
151         // Register the image scanner
152
parser.addScanner(new ImageScanner("-i", new LinkProcessor()));
153         parseAndAssertNodeCount(1);
154         assertTrue(
155             "Node identified should be HTMLImageTag",
156             node[0] instanceof ImageTag);
157         ImageTag imageTag = (ImageTag) node[0];
158         assertEquals(
159             "Expected Link",
160             "http://www.yahoo.com/abc/def/mypic.jpg",
161             imageTag.getImageURL());
162     }
163
164     public void testRelativeImageScan3() throws ParserException
165     {
166         createParser(
167             "<IMG SRC=\"../abc/def/mypic.jpg\">",
168             "http://www.yahoo.com/ghi");
169         // Register the image scanner
170
parser.addScanner(new ImageScanner("-i", new LinkProcessor()));
171         parseAndAssertNodeCount(1);
172         assertTrue(
173             "Node identified should be HTMLImageTag",
174             node[0] instanceof ImageTag);
175         ImageTag imageTag = (ImageTag) node[0];
176         assertEquals(
177             "Expected Link",
178             "http://www.yahoo.com/abc/def/mypic.jpg",
179             imageTag.getImageURL());
180     }
181
182     /**
183      * Test image url which contains spaces in it.
184      * This was actually a bug reported by Sam Joseph (sam@neurogrid.net)
185      */

186     public void testImageWithSpaces() throws ParserException
187     {
188         createParser(
189             "<IMG SRC=\"../abc/def/Hello World.jpg\">",
190             "http://www.yahoo.com/ghi");
191         // Register the image scanner
192
parser.addScanner(new ImageScanner("-i", new LinkProcessor()));
193         parseAndAssertNodeCount(1);
194         assertTrue(
195             "Node identified should be HTMLImageTag",
196             node[0] instanceof ImageTag);
197         ImageTag imageTag = (ImageTag) node[0];
198         assertEquals(
199             "Expected Link",
200             "http://www.yahoo.com/abc/def/Hello World.jpg",
201             imageTag.getImageURL());
202     }
203
204     public void testImageWithNewLineChars() throws ParserException
205     {
206         createParser(
207             "<IMG SRC=\"../abc/def/Hello \r\nWorld.jpg\">",
208             "http://www.yahoo.com/ghi");
209         Parser.setLineSeparator("\r\n");
210         // Register the image scanner
211
parser.addScanner(new ImageScanner("-i", new LinkProcessor()));
212         parseAndAssertNodeCount(1);
213         assertTrue(
214             "Node identified should be HTMLImageTag",
215             node[0] instanceof ImageTag);
216         ImageTag imageTag = (ImageTag) node[0];
217         String JavaDoc exp = new String JavaDoc("http://www.yahoo.com/abc/def/Hello World.jpg");
218         //assertEquals("Length of image",exp.length(),imageTag.getImageLocation().length());
219
assertStringEquals("Expected Image", exp, imageTag.getImageURL());
220     }
221
222     /**
223      * Test case to reproduce bug reported by Annette
224      */

225     public void testImageTagsFromYahoo() throws ParserException
226     {
227         createParser(
228             "<small><a HREF=s/5926>Air</a>, <a HREF=s/5927>Hotel</a>, <a HREF=s/5928>Vacations</a>, <a HREF=s/5929>Cruises</a></small></td><td align=center><a HREF=\"http://rd.yahoo.com/M=218794.2020165.3500581.220161/D=yahoo_top/S=2716149:NP/A=1041273/?http://adfarm.mediaplex.com/ad/ck/990-1736-1039-211\" target=\"_top\"><img width=230 height=33 SRC=\"http://us.a1.yimg.com/us.yimg.com/a/co/columbiahouse/4for49Freesh_230x33_redx2.gif\" alt=\"\" border=0></a></td><td nowrap align=center width=215>Find your match on<br><a HREF=s/2734><b>Yahoo! Personals</b></a></td></tr><tr><td colspan=3 align=center><input size=30 name=p>\n"
229                 + "<input type=submit value=Search> <a HREF=r/so>advanced search</a></td></tr></table><table border=0 cellspacing=0 cellpadding=3 width=640><tr><td nowrap align=center><table border=0 cellspacing=0 cellpadding=0><tr><td><a HREF=s/5948><img SRC=\"http://us.i1.yimg.com/us.yimg.com/i/ligans/klgs/eet.gif\" width=20 height=20 border=0></a></td><td> &nbsp; &nbsp; <a HREF=s/1048><b>Yahooligans!</b></a> - <a HREF=s/5282>Eet & Ern</a>, <a HREF=s/5283>Games</a>, <a HREF=s/5284>Science</a>, <a HREF=s/5285>Sports</a>, <a HREF=s/5286>Movies</a>, <a HREF=s/1048>more</a> &nbsp; &nbsp; </td><td><a HREF=s/5948><img SRC=\"http://us.i1.yimg.com/us.yimg.com/i/ligans/klgs/ern.gif\" width=20 height=20 border=0></a></td></tr></table></td></tr><tr><td nowrap align=center><small><b>Shop</b>&nbsp;\n",
230             "http://www.yahoo.com");
231         Node[] node = new Node[10];
232         // Register the image scanner
233
parser.addScanner(new ImageScanner("-i", new LinkProcessor()));
234         int i = 0;
235         Node thisNode;
236         for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
237         {
238             thisNode = (Node) e.nextNode();
239             if (thisNode instanceof ImageTag)
240                 node[i++] = thisNode;
241         }
242         assertEquals("Number of nodes identified should be 3", 3, i);
243         assertTrue(
244             "Node identified should be HTMLImageTag",
245             node[0] instanceof ImageTag);
246         ImageTag imageTag = (ImageTag) node[0];
247         assertEquals(
248             "Expected Image",
249             "http://us.a1.yimg.com/us.yimg.com/a/co/columbiahouse/4for49Freesh_230x33_redx2.gif",
250             imageTag.getImageURL());
251         ImageTag imageTag2 = (ImageTag) node[1];
252         assertEquals(
253             "Expected Image 2",
254             "http://us.i1.yimg.com/us.yimg.com/i/ligans/klgs/eet.gif",
255             imageTag2.getImageURL());
256         ImageTag imageTag3 = (ImageTag) node[2];
257         assertEquals(
258             "Expected Image 3",
259             "http://us.i1.yimg.com/us.yimg.com/i/ligans/klgs/ern.gif",
260             imageTag3.getImageURL());
261     }
262
263     /**
264      * Test case to reproduce bug reported by Annette
265      */

266     public void testImageTagsFromYahooWithAllScannersRegistered()
267         throws ParserException
268     {
269         createParser(
270             "<tr>"
271                 + "<td>"
272                 + " <small><a HREF=s/5926>Air</a>, <a HREF=s/5927>Hotel</a>, "
273                 + "<a HREF=s/5928>Vacations</a>, <a HREF=s/5929>Cruises</a></small>"
274                 + "</td>"
275                 + "<td align=center>"
276                 + "<a HREF=\"http://rd.yahoo.com/M=218794.2020165.3500581.220161/D=yahoo_top/S="
277                 + "2716149:NP/A=1041273/?http://adfarm.mediaplex.com/ad/ck/990-1736-1039-211\" "
278                 + "target=\"_top\"><img width=230 height=33 SRC=\"http://us.a1.yimg.com/us.yimg.com/a/co/"
279                 + "columbiahouse/4for49Freesh_230x33_redx2.gif\" alt=\"\" border=0></a>"
280                 + "</td>"
281                 + "<td nowrap align=center width=215>"
282                 + "Find your match on<br><a HREF=s/2734>"
283                 + "<b>Yahoo! Personals</b></a>"
284                 + "</td>"
285                 + "</tr>"
286                 + "<tr>"
287                 + "<td colspan=3 align=center>"
288                 + "<input size=30 "
289                 + "name=p>\n"
290                 + "</td>"
291                 + "</tr>",
292             "http://www.yahoo.com",
293             30);
294
295         // Register the image scanner
296
parser.registerScanners();
297         // parser.addScanner(new TableScanner(parser));
298
parseAndAssertNodeCount(2);
299         assertType("first node type", TableRow.class, node[0]);
300         TableRow row = (TableRow) node[0];
301         TableColumn col = row.getColumns()[1];
302         Node node = col.children().nextNode();
303         assertType(
304             "Node identified should be HTMLLinkTag",
305             LinkTag.class,
306             node);
307         LinkTag linkTag = (LinkTag) node;
308         Node nodeInsideLink = linkTag.children().nextNode();
309         assertType(
310             "Tag within link should be an image tag",
311             ImageTag.class,
312             nodeInsideLink);
313         ImageTag imageTag = (ImageTag) nodeInsideLink;
314         assertStringEquals(
315             "Expected Image",
316             "http://us.a1.yimg.com/us.yimg.com/a/co/columbiahouse/4for49Freesh_230x33_redx2.gif",
317             imageTag.getImageURL());
318     }
319
320     /**
321      * This is the reproduction of a bug reported
322      * by Annette Doyle
323      */

324     public void testImageTagOnMultipleLines() throws ParserException
325     {
326         createParser(
327             "<td rowspan=3>"
328                 + "<img height=49 \n\n"
329                 + "alt=\"Central Intelligence Agency, Director of Central Intelligence\" \n\n"
330                 + "src=\"graphics/images_home2/cia_banners_template3_01.gif\" \n\n"
331                 + "width=241>"
332                 + "</td>",
333             "http://www.cia.gov");
334
335         // Register the image scanner
336
parser.registerScanners();
337         parser.addScanner(new TableScanner(parser));
338         parseAndAssertNodeCount(1);
339         assertType("node should be", TableColumn.class, node[0]);
340         TableColumn col = (TableColumn) node[0];
341         Node node = col.children().nextNode();
342         assertType("node inside column", ImageTag.class, node);
343         ImageTag imageTag = (ImageTag) node;
344         // Get the data from the node
345
assertEquals(
346             "Image location",
347             "http://www.cia.gov/graphics/images_home2/cia_banners_template3_01.gif",
348             imageTag.getImageURL());
349         assertEquals(
350             "Alt Value",
351             "Central Intelligence Agency, Director of Central Intelligence",
352             imageTag.getAttribute("ALT"));
353         assertEquals("Width", "241", imageTag.getAttribute("WIDTH"));
354         assertEquals("Height", "49", imageTag.getAttribute("HEIGHT"));
355     }
356
357     public void testDirectRelativeLinks() throws ParserException
358     {
359         createParser(
360             "<IMG SRC = \"/images/lines/li065.jpg\">",
361             "http://www.cybergeo.presse.fr/REVGEO/ttsavoir/joly.htm");
362
363         // Register the image scanner
364
parser.registerScanners();
365         parseAndAssertNodeCount(1);
366         assertTrue(
367             "Node identified should be HTMLImageTag",
368             node[0] instanceof ImageTag);
369         ImageTag imageTag = (ImageTag) node[0];
370         assertEquals(
371             "Image Location",
372             "http://www.cybergeo.presse.fr/images/lines/li065.jpg",
373             imageTag.getImageURL());
374
375     }
376
377     /**
378      * Based on a page submitted by Claude Duguay, the image tag has IMG SRC"somefile.jpg" - a missing equal
379      * to sign
380      */

381     public void testMissingEqualTo() throws ParserException
382     {
383         createParser(
384             "<img src\"/images/spacer.gif\" width=\"1\" height=\"1\" alt=\"\">",
385             "http://www.htmlparser.org/subdir1/subdir2");
386
387         // Register the image scanner
388
parser.registerScanners();
389         parseAndAssertNodeCount(1);
390         assertTrue(
391             "Node identified should be HTMLImageTag",
392             node[0] instanceof ImageTag);
393         ImageTag imageTag = (ImageTag) node[0];
394         assertStringEquals(
395             "Image Location",
396             "http://www.htmlparser.org/images/spacer.gif",
397             imageTag.getImageURL());
398         assertEquals("Width", "1", imageTag.getAttribute("WIDTH"));
399         assertEquals("Height", "1", imageTag.getAttribute("HEIGHT"));
400         assertEquals("Alt", "", imageTag.getAttribute("ALT"));
401     }
402 }
403
Popular Tags