1 19 20 33 package org.htmlparser.tests.scannersTests; 34 35 import org.htmlparser.Node; 36 import org.htmlparser.Parser; 37 import org.htmlparser.RemarkNode; 38 import org.htmlparser.scanners.FormScanner; 39 import org.htmlparser.scanners.LinkScanner; 40 import org.htmlparser.tags.FormTag; 41 import org.htmlparser.tags.InputTag; 42 import org.htmlparser.tags.LinkTag; 43 import org.htmlparser.tags.TextareaTag; 44 import org.htmlparser.tests.ParserTestCase; 45 import org.htmlparser.util.ParserException; 46 import org.htmlparser.util.SimpleNodeIterator; 47 48 public class FormScannerTest extends ParserTestCase 49 { 50 public static final String FORM_HTML = 51 "<FORM METHOD=\"" 52 + FormTag.POST 53 + "\" ACTION=\"do_login.php\" NAME=\"login_form\" onSubmit=\"return CheckData()\">\n" 54 + "<TR><TD ALIGN=\"center\"> </TD></TR>\n" 55 + "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>User Name</b></font></TD></TR>\n" 56 + "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"text\" NAME=\"name\" SIZE=\"20\"></TD></TR>\n" 57 + "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>Password</b></font></TD></TR>\n" 58 + "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"password\" NAME=\"passwd\" SIZE=\"20\"></TD></TR>\n" 59 + "<TR><TD ALIGN=\"center\"> </TD></TR>\n" 60 + "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"submit\" NAME=\"submit\" VALUE=\"Login\"></TD></TR>\n" 61 + "<TR><TD ALIGN=\"center\"> </TD></TR>\n" 62 + "<TEXTAREA name=\"Description\" rows=\"15\" cols=\"55\" wrap=\"virtual\" class=\"composef\" tabindex=\"5\">Contents of TextArea</TEXTAREA>\n" 63 + 64 "<INPUT TYPE=\"hidden\" NAME=\"password\" SIZE=\"20\">\n" 66 + "<INPUT TYPE=\"submit\">\n" 67 + "</FORM>"; 68 69 public static final String EXPECTED_FORM_HTML_FORMLINE = 70 "<FORM ACTION=\"http://www.google.com/test/do_login.php\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\"" 71 + FormTag.POST 72 + "\">\r\n"; 73 public static final String EXPECTED_FORM_HTML_REST_OF_FORM = 74 "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n" 75 + "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>User Name</B></FONT></TD></TR>\r\n" 76 + "<TR><TD ALIGN=\"center\"><INPUT NAME=\"name\" SIZE=\"20\" TYPE=\"text\"></TD></TR>\r\n" 77 + "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>Password</B></FONT></TD></TR>\r\n" 78 + "<TR><TD ALIGN=\"center\"><INPUT NAME=\"passwd\" SIZE=\"20\" TYPE=\"password\"></TD></TR>\r\n" 79 + "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n" 80 + "<TR><TD ALIGN=\"center\"><INPUT VALUE=\"Login\" NAME=\"submit\" TYPE=\"submit\"></TD></TR>\r\n" 81 + "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n" 82 + "<TEXTAREA TABINDEX=\"5\" ROWS=\"15\" COLS=\"55\" CLASS=\"composef\" NAME=\"Description\" WRAP=\"virtual\">Contents of TextArea</TEXTAREA>\r\n" 83 + 84 "<INPUT NAME=\"password\" SIZE=\"20\" TYPE=\"hidden\">\r\n" 86 + "<INPUT TYPE=\"submit\">\r\n" 87 + "</FORM>"; 88 public static final String EXPECTED_FORM_HTML = 89 EXPECTED_FORM_HTML_FORMLINE + EXPECTED_FORM_HTML_REST_OF_FORM; 90 91 public FormScannerTest(String name) 92 { 93 super(name); 94 } 95 96 public void testEvaluate() 97 { 98 String line1 = 99 "form method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; 100 String line2 = 101 "FORM method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; 102 String line3 = 103 "Form method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; 104 FormScanner formScanner = new FormScanner("", Parser.createParser("")); 105 assertTrue("Line 1", formScanner.evaluate(line1, null)); 106 assertTrue("Line 2", formScanner.evaluate(line2, null)); 107 assertTrue("Line 3", formScanner.evaluate(line3, null)); 108 } 109 110 public void assertTypeNameSize( 111 String description, 112 String type, 113 String name, 114 String size, 115 InputTag inputTag) 116 { 117 assertEquals( 118 description + " type", 119 type, 120 inputTag.getAttribute("TYPE")); 121 assertEquals( 122 description + " name", 123 name, 124 inputTag.getAttribute("NAME")); 125 assertEquals( 126 description + " size", 127 size, 128 inputTag.getAttribute("SIZE")); 129 } 130 public void assertTypeNameValue( 131 String description, 132 String type, 133 String name, 134 String value, 135 InputTag inputTag) 136 { 137 assertEquals( 138 description + " type", 139 type, 140 inputTag.getAttribute("TYPE")); 141 assertEquals( 142 description + " name", 143 name, 144 inputTag.getAttribute("NAME")); 145 assertEquals( 146 description + " value", 147 value, 148 inputTag.getAttribute("VALUE")); 149 } 150 public void testScan() throws ParserException 151 { 152 createParser(FORM_HTML, "http://www.google.com/test/index.html"); 153 parser.addScanner(new FormScanner("", parser)); 154 parseAndAssertNodeCount(1); 155 assertTrue("Node 0 should be Form Tag", node[0] instanceof FormTag); 156 FormTag formTag = (FormTag) node[0]; 157 assertStringEquals("Method", FormTag.POST, formTag.getFormMethod()); 158 assertStringEquals( 159 "Location", 160 "http://www.google.com/test/do_login.php", 161 formTag.getFormLocation()); 162 assertStringEquals("Name", "login_form", formTag.getFormName()); 163 InputTag nameTag = formTag.getInputTag("name"); 164 InputTag passwdTag = formTag.getInputTag("passwd"); 165 InputTag submitTag = formTag.getInputTag("submit"); 166 InputTag dummyTag = formTag.getInputTag("dummy"); 167 assertNotNull("Input Name Tag should not be null", nameTag); 168 assertNotNull("Input Password Tag should not be null", passwdTag); 169 assertNotNull("Input Submit Tag should not be null", submitTag); 170 assertNull("Input dummy tag should be null", dummyTag); 171 172 assertTypeNameSize("Input Name Tag", "text", "name", "20", nameTag); 173 assertTypeNameSize( 174 "Input Password Tag", 175 "password", 176 "passwd", 177 "20", 178 passwdTag); 179 assertTypeNameValue( 180 "Input Submit Tag", 181 "submit", 182 "submit", 183 "Login", 184 submitTag); 185 186 TextareaTag textAreaTag = formTag.getTextAreaTag("Description"); 187 assertNotNull("Text Area Tag should have been found", textAreaTag); 188 assertEquals( 189 "Text Area Tag Contents", 190 "Contents of TextArea", 191 textAreaTag.getValue()); 192 assertNull("Should have been null", formTag.getTextAreaTag("junk")); 193 194 assertStringEquals("toHTML", EXPECTED_FORM_HTML, formTag.toHtml()); 195 } 196 197 public void testScanFormWithNoEnding() throws Exception 198 { 199 createParser( 200 "<TABLE>\n" 201 + "<FORM METHOD=\"post\" ACTION=\"do_login.php\" NAME=\"login_form\" onSubmit=\"return CheckData()\">\n" 202 + "<TR><TD ALIGN=\"center\"> </TD></TR>\n" 203 + "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>User Name</b></font></TD></TR>\n" 204 + "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"text\" NAME=\"name\" SIZE=\"20\"></TD></TR>\n" 205 + "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>Password</b></font></TD></TR>\n" 206 + "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"password\" NAME=\"passwd\" SIZE=\"20\"></TD></TR>\n" 207 + "<TR><TD ALIGN=\"center\"> </TD></TR>\n" 208 + "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"submit\" NAME=\"submit\" VALUE=\"Login\"></TD></TR>\n" 209 + "<TR><TD ALIGN=\"center\"> </TD></TR>\n" 210 + "<INPUT TYPE=\"hidden\" NAME=\"password\" SIZE=\"20\">\n" 211 + "</TABLE>", 212 "http://www.google.com/test/index.html"); 213 214 parser.addScanner(new FormScanner("", parser)); 215 216 parseAndAssertNodeCount(2); 217 } 218 222 public void testScanFormWithLinks() throws ParserException 223 { 224 createParser( 225 "<form action=\"/search\" name=f><table cellspacing=0 cellpadding=0><tr><td width=75> " 226 + "</td><td align=center><input type=hidden name=hl value=en><input type=hidden name=ie " 227 + "value=\"UTF-8\"><input type=hidden name=oe value=\"UTF-8\"><input maxLength=256 size=55" 228 + " name=q value=\"\"><br><input type=submit value=\"Google Search\" name=btnG><input type=" 229 + "submit value=\"I'm Feeling Lucky\" name=btnI></td><td valign=top nowrap><font size=-2>" 230 + " • <a HREF=/advanced_search?hl=en>Advanced Search</a><br> •" 231 + " <a HREF=/preferences?hl=en>Preferences</a><br> • <a HREF=/" 232 + "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>"); 233 234 parser.addScanner(new FormScanner("", parser)); 235 parser.addScanner(new LinkScanner()); 236 parseAndAssertNodeCount(1); 237 assertTrue("Should be a HTMLFormTag", node[0] instanceof FormTag); 238 FormTag formTag = (FormTag) node[0]; 239 LinkTag[] linkTag = new LinkTag[10]; 240 int i = 0; 241 for (SimpleNodeIterator e = formTag.children(); e.hasMoreNodes();) 242 { 243 Node formNode = e.nextNode(); 244 if (formNode instanceof LinkTag) 245 { 246 linkTag[i++] = (LinkTag) formNode; 247 } 248 } 249 assertEquals("Link Tag Count", 3, i); 250 assertEquals( 251 "First Link Tag Text", 252 "Advanced Search", 253 linkTag[0].getLinkText()); 254 assertEquals( 255 "Second Link Tag Text", 256 "Preferences", 257 linkTag[1].getLinkText()); 258 assertEquals( 259 "Third Link Tag Text", 260 "Language Tools", 261 linkTag[2].getLinkText()); 262 } 263 266 public void testScanFormWithComments() throws ParserException 267 { 268 createParser( 269 "<form action=\"/search\" name=f><table cellspacing=0 cellpadding=0><tr><td width=75> " 270 + "</td><td align=center><input type=hidden name=hl value=en><input type=hidden name=ie " 271 + "value=\"UTF-8\"><input type=hidden name=oe value=\"UTF-8\"><!-- Hello World -->" 272 + "<input maxLength=256 size=55" 273 + " name=q value=\"\"><br><input type=submit value=\"Google Search\" name=btnG><input type=" 274 + "submit value=\"I'm Feeling Lucky\" name=btnI></td><td valign=top nowrap><font size=-2>" 275 + " • <a HREF=/advanced_search?hl=en>Advanced Search</a><br> •" 276 + " <a HREF=/preferences?hl=en>Preferences</a><br> • <a HREF=/" 277 + "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>"); 278 279 parser.addScanner(new FormScanner("", parser)); 280 parseAndAssertNodeCount(1); 281 assertTrue("Should be a HTMLFormTag", node[0] instanceof FormTag); 282 FormTag formTag = (FormTag) node[0]; 283 RemarkNode[] remarkNode = new RemarkNode[10]; 284 int i = 0; 285 for (SimpleNodeIterator e = formTag.children(); e.hasMoreNodes();) 286 { 287 Node formNode = (Node) e.nextNode(); 288 if (formNode instanceof RemarkNode) 289 { 290 remarkNode[i++] = (RemarkNode) formNode; 291 } 292 } 293 assertEquals("Remark Node Count", 1, i); 294 assertEquals( 295 "First Remark Node", 296 " Hello World ", 297 remarkNode[0].toPlainTextString()); 298 } 299 302 public void testScanFormWithComments2() throws ParserException 303 { 304 createParser( 305 "<FORM id=\"id\" name=\"name\" action=\"http://some.site/aPage.asp?id=97\" method=\"post\">\n" 306 + " <!--\n" 307 + " Just a Comment\n" 308 + " -->\n" 309 + "</FORM>"); 310 parser.registerScanners(); 311 parseAndAssertNodeCount(1); 312 assertTrue("Should be a HTMLFormTag", node[0] instanceof FormTag); 313 FormTag formTag = (FormTag) node[0]; 314 RemarkNode[] remarkNode = new RemarkNode[10]; 315 int i = 0; 316 for (SimpleNodeIterator e = formTag.children(); e.hasMoreNodes();) 317 { 318 Node formNode = (Node) e.nextNode(); 319 if (formNode instanceof RemarkNode) 320 { 321 remarkNode[i++] = (RemarkNode) formNode; 322 } 323 } 324 assertEquals("Remark Node Count", 1, i); 325 } 326 327 331 public void testScanFormWithPreviousOpenLink() throws ParserException 332 { 333 createParser( 334 "<A HREF=\"http://www.oygevalt.org/\">Home</A>\n" 335 + "<P>\n" 336 + "And now, the good stuff:\n" 337 + "<P>\n" 338 + "<A HREF=\"http://www.yahoo.com\">Yahoo!\n" 339 + "<FORM ACTION=\".\" METHOD=\"GET\">\n" 340 + "<INPUT TYPE=\"TEXT\">\n" 341 + "<BR>\n" 342 + "<A HREF=\"http://www.helpme.com\">Help</A> " 343 + "<INPUT TYPE=\"checkbox\">\n" 344 + "<P>\n" 345 + "<INPUT TYPE=\"SUBMIT\">\n" 346 + "</FORM>"); 347 parser.addScanner(new FormScanner("", parser)); 348 parser.addScanner(new LinkScanner()); 349 parseAndAssertNodeCount(6); 350 assertTrue("Fifth Node is a link", node[4] instanceof LinkTag); 351 LinkTag linkTag = (LinkTag) node[4]; 352 assertEquals("Link Text", "Yahoo!\r\n", linkTag.getLinkText()); 353 assertEquals("Link URL", "http://www.yahoo.com", linkTag.getLink()); 354 assertType("Sixth Node", FormTag.class, node[5]); 355 } 356 357 362 public void testFormScanningShouldNotHappen() throws Exception 363 { 364 String testHTML = 365 "<HTML><HEAD><TITLE>Test Form Tag</TITLE></HEAD>" 366 + "<BODY><FORM name=\"form0\"><INPUT type=\"text\" name=\"text0\"></FORM>" 367 + "</BODY></HTML>"; 368 createParser(testHTML); 369 parser.registerScanners(); 370 parser.removeScanner(new FormScanner("", parser)); 371 Node[] nodes = parser.extractAllNodesThatAre(FormTag.class); 372 assertEquals("shouldnt have found form tag", 0, nodes.length); 373 } 374 } 375 | Popular Tags |