1 19 20 33 package org.htmlparser.tests.scannersTests; 34 import org.htmlparser.Node; 35 import org.htmlparser.Parser; 36 import org.htmlparser.scanners.ImageScanner; 37 import org.htmlparser.scanners.TableScanner; 38 import org.htmlparser.tags.ImageTag; 39 import org.htmlparser.tags.LinkTag; 40 import org.htmlparser.tags.TableColumn; 41 import org.htmlparser.tags.TableRow; 42 import org.htmlparser.tags.Tag; 43 import org.htmlparser.tags.data.TagData; 44 import org.htmlparser.tests.ParserTestCase; 45 import org.htmlparser.util.LinkProcessor; 46 import org.htmlparser.util.NodeIterator; 47 import org.htmlparser.util.ParserException; 48 49 public class ImageScannerTest extends ParserTestCase 50 { 51 52 public ImageScannerTest(String name) 53 { 54 super(name); 55 } 56 57 public void testDynamicRelativeImageScan() throws ParserException 58 { 59 createParser( 60 "<IMG SRC=\"../abc/def/mypic.jpg\">", 61 "http://www.yahoo.com/ghi?abcdefg"); 62 parser.addScanner(new ImageScanner("-i", new LinkProcessor())); 64 parseAndAssertNodeCount(1); 65 assertTrue( 66 "Node identified should be HTMLImageTag", 67 node[0] instanceof ImageTag); 68 ImageTag imageTag = (ImageTag) node[0]; 69 assertEquals( 70 "Expected Link", 71 "http://www.yahoo.com/abc/def/mypic.jpg", 72 imageTag.getImageURL()); 73 } 74 75 public void testEvaluate() 76 { 77 ImageScanner scanner = new ImageScanner("-i", new LinkProcessor()); 78 boolean retVal = scanner.evaluate(" img ", null); 79 assertEquals( 80 "Evaluation of IMG tag", 81 new Boolean (true), 82 new Boolean (retVal)); 83 } 84 85 88 public void testExtractImageLocnInvertedCommasBug() throws ParserException 89 { 90 Tag tag = 91 new Tag( 92 new TagData( 93 0, 94 0, 95 "img width=638 height=53 border=0 usemap=\"#m\" SRC=http://us.a1.yimg.com/us.yimg.com/i/ww/m5v5.gif alt=Yahoo", 96 "")); 97 String link = 98 "img width=638 height=53 border=0 usemap=\"#m\" SRC=http://us.a1.yimg.com/us.yimg.com/i/ww/m5v5.gif alt=Yahoo"; 99 String url = "c:\\cvs\\html\\binaries\\yahoo.htm"; 100 ImageScanner scanner = new ImageScanner("-i", new LinkProcessor()); 101 assertEquals( 102 "Extracted Image Locn", 103 "http://us.a1.yimg.com/us.yimg.com/i/ww/m5v5.gif", 104 scanner.extractImageLocn(tag, url)); 105 } 106 107 112 public void testPlaceHolderImageScan() throws ParserException 113 { 114 createParser( 115 "<IMG width=1 height=1 alt=\"a\">", 116 "http://www.yahoo.com/ghi?abcdefg"); 117 118 parser.addScanner(new ImageScanner("-i", new LinkProcessor())); 120 parseAndAssertNodeCount(1); 121 assertTrue( 122 "Node identified should be HTMLImageTag", 123 node[0] instanceof ImageTag); 124 ImageTag imageTag = (ImageTag) node[0]; 125 assertEquals("Expected Image Locn", "", imageTag.getImageURL()); 126 assertEquals("Image width", "1", imageTag.getAttribute("WIDTH")); 127 assertEquals("Image height", "1", imageTag.getAttribute("HEIGHT")); 128 assertEquals("alt", "a", imageTag.getAttribute("ALT")); 129 } 130 131 public void testRelativeImageScan() throws ParserException 132 { 133 createParser("<IMG SRC=\"mypic.jpg\">", "http://www.yahoo.com"); 134 135 parser.addScanner(new ImageScanner("-i", new LinkProcessor())); 137 parseAndAssertNodeCount(1); 138 assertTrue( 139 "Node identified should be HTMLImageTag", 140 node[0] instanceof ImageTag); 141 ImageTag imageTag = (ImageTag) node[0]; 142 assertEquals( 143 "Expected Link", 144 "http://www.yahoo.com/mypic.jpg", 145 imageTag.getImageURL()); 146 } 147 148 public void testRelativeImageScan2() throws ParserException 149 { 150 createParser("<IMG SRC=\"abc/def/mypic.jpg\">", "http://www.yahoo.com"); 151 parser.addScanner(new ImageScanner("-i", new LinkProcessor())); 153 parseAndAssertNodeCount(1); 154 assertTrue( 155 "Node identified should be HTMLImageTag", 156 node[0] instanceof ImageTag); 157 ImageTag imageTag = (ImageTag) node[0]; 158 assertEquals( 159 "Expected Link", 160 "http://www.yahoo.com/abc/def/mypic.jpg", 161 imageTag.getImageURL()); 162 } 163 164 public void testRelativeImageScan3() throws ParserException 165 { 166 createParser( 167 "<IMG SRC=\"../abc/def/mypic.jpg\">", 168 "http://www.yahoo.com/ghi"); 169 parser.addScanner(new ImageScanner("-i", new LinkProcessor())); 171 parseAndAssertNodeCount(1); 172 assertTrue( 173 "Node identified should be HTMLImageTag", 174 node[0] instanceof ImageTag); 175 ImageTag imageTag = (ImageTag) node[0]; 176 assertEquals( 177 "Expected Link", 178 "http://www.yahoo.com/abc/def/mypic.jpg", 179 imageTag.getImageURL()); 180 } 181 182 186 public void testImageWithSpaces() throws ParserException 187 { 188 createParser( 189 "<IMG SRC=\"../abc/def/Hello World.jpg\">", 190 "http://www.yahoo.com/ghi"); 191 parser.addScanner(new ImageScanner("-i", new LinkProcessor())); 193 parseAndAssertNodeCount(1); 194 assertTrue( 195 "Node identified should be HTMLImageTag", 196 node[0] instanceof ImageTag); 197 ImageTag imageTag = (ImageTag) node[0]; 198 assertEquals( 199 "Expected Link", 200 "http://www.yahoo.com/abc/def/Hello World.jpg", 201 imageTag.getImageURL()); 202 } 203 204 public void testImageWithNewLineChars() throws ParserException 205 { 206 createParser( 207 "<IMG SRC=\"../abc/def/Hello \r\nWorld.jpg\">", 208 "http://www.yahoo.com/ghi"); 209 Parser.setLineSeparator("\r\n"); 210 parser.addScanner(new ImageScanner("-i", new LinkProcessor())); 212 parseAndAssertNodeCount(1); 213 assertTrue( 214 "Node identified should be HTMLImageTag", 215 node[0] instanceof ImageTag); 216 ImageTag imageTag = (ImageTag) node[0]; 217 String exp = new String ("http://www.yahoo.com/abc/def/Hello World.jpg"); 218 assertStringEquals("Expected Image", exp, imageTag.getImageURL()); 220 } 221 222 225 public void testImageTagsFromYahoo() throws ParserException 226 { 227 createParser( 228 "<small><a HREF=s/5926>Air</a>, <a HREF=s/5927>Hotel</a>, <a HREF=s/5928>Vacations</a>, <a HREF=s/5929>Cruises</a></small></td><td align=center><a HREF=\"http://rd.yahoo.com/M=218794.2020165.3500581.220161/D=yahoo_top/S=2716149:NP/A=1041273/?http://adfarm.mediaplex.com/ad/ck/990-1736-1039-211\" target=\"_top\"><img width=230 height=33 SRC=\"http://us.a1.yimg.com/us.yimg.com/a/co/columbiahouse/4for49Freesh_230x33_redx2.gif\" alt=\"\" border=0></a></td><td nowrap align=center width=215>Find your match on<br><a HREF=s/2734><b>Yahoo! Personals</b></a></td></tr><tr><td colspan=3 align=center><input size=30 name=p>\n" 229 + "<input type=submit value=Search> <a HREF=r/so>advanced search</a></td></tr></table><table border=0 cellspacing=0 cellpadding=3 width=640><tr><td nowrap align=center><table border=0 cellspacing=0 cellpadding=0><tr><td><a HREF=s/5948><img SRC=\"http://us.i1.yimg.com/us.yimg.com/i/ligans/klgs/eet.gif\" width=20 height=20 border=0></a></td><td> <a HREF=s/1048><b>Yahooligans!</b></a> - <a HREF=s/5282>Eet & Ern</a>, <a HREF=s/5283>Games</a>, <a HREF=s/5284>Science</a>, <a HREF=s/5285>Sports</a>, <a HREF=s/5286>Movies</a>, <a HREF=s/1048>more</a> </td><td><a HREF=s/5948><img SRC=\"http://us.i1.yimg.com/us.yimg.com/i/ligans/klgs/ern.gif\" width=20 height=20 border=0></a></td></tr></table></td></tr><tr><td nowrap align=center><small><b>Shop</b> \n", 230 "http://www.yahoo.com"); 231 Node[] node = new Node[10]; 232 parser.addScanner(new ImageScanner("-i", new LinkProcessor())); 234 int i = 0; 235 Node thisNode; 236 for (NodeIterator e = parser.elements(); e.hasMoreNodes();) 237 { 238 thisNode = (Node) e.nextNode(); 239 if (thisNode instanceof ImageTag) 240 node[i++] = thisNode; 241 } 242 assertEquals("Number of nodes identified should be 3", 3, i); 243 assertTrue( 244 "Node identified should be HTMLImageTag", 245 node[0] instanceof ImageTag); 246 ImageTag imageTag = (ImageTag) node[0]; 247 assertEquals( 248 "Expected Image", 249 "http://us.a1.yimg.com/us.yimg.com/a/co/columbiahouse/4for49Freesh_230x33_redx2.gif", 250 imageTag.getImageURL()); 251 ImageTag imageTag2 = (ImageTag) node[1]; 252 assertEquals( 253 "Expected Image 2", 254 "http://us.i1.yimg.com/us.yimg.com/i/ligans/klgs/eet.gif", 255 imageTag2.getImageURL()); 256 ImageTag imageTag3 = (ImageTag) node[2]; 257 assertEquals( 258 "Expected Image 3", 259 "http://us.i1.yimg.com/us.yimg.com/i/ligans/klgs/ern.gif", 260 imageTag3.getImageURL()); 261 } 262 263 266 public void testImageTagsFromYahooWithAllScannersRegistered() 267 throws ParserException 268 { 269 createParser( 270 "<tr>" 271 + "<td>" 272 + " <small><a HREF=s/5926>Air</a>, <a HREF=s/5927>Hotel</a>, " 273 + "<a HREF=s/5928>Vacations</a>, <a HREF=s/5929>Cruises</a></small>" 274 + "</td>" 275 + "<td align=center>" 276 + "<a HREF=\"http://rd.yahoo.com/M=218794.2020165.3500581.220161/D=yahoo_top/S=" 277 + "2716149:NP/A=1041273/?http://adfarm.mediaplex.com/ad/ck/990-1736-1039-211\" " 278 + "target=\"_top\"><img width=230 height=33 SRC=\"http://us.a1.yimg.com/us.yimg.com/a/co/" 279 + "columbiahouse/4for49Freesh_230x33_redx2.gif\" alt=\"\" border=0></a>" 280 + "</td>" 281 + "<td nowrap align=center width=215>" 282 + "Find your match on<br><a HREF=s/2734>" 283 + "<b>Yahoo! Personals</b></a>" 284 + "</td>" 285 + "</tr>" 286 + "<tr>" 287 + "<td colspan=3 align=center>" 288 + "<input size=30 " 289 + "name=p>\n" 290 + "</td>" 291 + "</tr>", 292 "http://www.yahoo.com", 293 30); 294 295 parser.registerScanners(); 297 parseAndAssertNodeCount(2); 299 assertType("first node type", TableRow.class, node[0]); 300 TableRow row = (TableRow) node[0]; 301 TableColumn col = row.getColumns()[1]; 302 Node node = col.children().nextNode(); 303 assertType( 304 "Node identified should be HTMLLinkTag", 305 LinkTag.class, 306 node); 307 LinkTag linkTag = (LinkTag) node; 308 Node nodeInsideLink = linkTag.children().nextNode(); 309 assertType( 310 "Tag within link should be an image tag", 311 ImageTag.class, 312 nodeInsideLink); 313 ImageTag imageTag = (ImageTag) nodeInsideLink; 314 assertStringEquals( 315 "Expected Image", 316 "http://us.a1.yimg.com/us.yimg.com/a/co/columbiahouse/4for49Freesh_230x33_redx2.gif", 317 imageTag.getImageURL()); 318 } 319 320 324 public void testImageTagOnMultipleLines() throws ParserException 325 { 326 createParser( 327 "<td rowspan=3>" 328 + "<img height=49 \n\n" 329 + "alt=\"Central Intelligence Agency, Director of Central Intelligence\" \n\n" 330 + "src=\"graphics/images_home2/cia_banners_template3_01.gif\" \n\n" 331 + "width=241>" 332 + "</td>", 333 "http://www.cia.gov"); 334 335 parser.registerScanners(); 337 parser.addScanner(new TableScanner(parser)); 338 parseAndAssertNodeCount(1); 339 assertType("node should be", TableColumn.class, node[0]); 340 TableColumn col = (TableColumn) node[0]; 341 Node node = col.children().nextNode(); 342 assertType("node inside column", ImageTag.class, node); 343 ImageTag imageTag = (ImageTag) node; 344 assertEquals( 346 "Image location", 347 "http://www.cia.gov/graphics/images_home2/cia_banners_template3_01.gif", 348 imageTag.getImageURL()); 349 assertEquals( 350 "Alt Value", 351 "Central Intelligence Agency, Director of Central Intelligence", 352 imageTag.getAttribute("ALT")); 353 assertEquals("Width", "241", imageTag.getAttribute("WIDTH")); 354 assertEquals("Height", "49", imageTag.getAttribute("HEIGHT")); 355 } 356 357 public void testDirectRelativeLinks() throws ParserException 358 { 359 createParser( 360 "<IMG SRC = \"/images/lines/li065.jpg\">", 361 "http://www.cybergeo.presse.fr/REVGEO/ttsavoir/joly.htm"); 362 363 parser.registerScanners(); 365 parseAndAssertNodeCount(1); 366 assertTrue( 367 "Node identified should be HTMLImageTag", 368 node[0] instanceof ImageTag); 369 ImageTag imageTag = (ImageTag) node[0]; 370 assertEquals( 371 "Image Location", 372 "http://www.cybergeo.presse.fr/images/lines/li065.jpg", 373 imageTag.getImageURL()); 374 375 } 376 377 381 public void testMissingEqualTo() throws ParserException 382 { 383 createParser( 384 "<img src\"/images/spacer.gif\" width=\"1\" height=\"1\" alt=\"\">", 385 "http://www.htmlparser.org/subdir1/subdir2"); 386 387 parser.registerScanners(); 389 parseAndAssertNodeCount(1); 390 assertTrue( 391 "Node identified should be HTMLImageTag", 392 node[0] instanceof ImageTag); 393 ImageTag imageTag = (ImageTag) node[0]; 394 assertStringEquals( 395 "Image Location", 396 "http://www.htmlparser.org/images/spacer.gif", 397 imageTag.getImageURL()); 398 assertEquals("Width", "1", imageTag.getAttribute("WIDTH")); 399 assertEquals("Height", "1", imageTag.getAttribute("HEIGHT")); 400 assertEquals("Alt", "", imageTag.getAttribute("ALT")); 401 } 402 } 403 | Popular Tags |