1 27 package org.htmlparser.tests.filterTests; 28 29 import org.htmlparser.Parser; 30 import org.htmlparser.Tag; 31 import org.htmlparser.filters.AndFilter; 32 import org.htmlparser.filters.CssSelectorNodeFilter; 33 import org.htmlparser.filters.HasAttributeFilter; 34 import org.htmlparser.filters.HasChildFilter; 35 import org.htmlparser.filters.NodeClassFilter; 36 import org.htmlparser.filters.NotFilter; 37 import org.htmlparser.filters.OrFilter; 38 import org.htmlparser.filters.RegexFilter; 39 import org.htmlparser.filters.StringFilter; 40 import org.htmlparser.filters.TagNameFilter; 41 import org.htmlparser.lexer.Lexer; 42 import org.htmlparser.Text; 43 import org.htmlparser.tags.BodyTag; 44 import org.htmlparser.tags.LinkTag; 45 import org.htmlparser.tests.ParserTestCase; 46 import org.htmlparser.util.NodeIterator; 47 import org.htmlparser.util.NodeList; 48 import org.htmlparser.util.ParserException; 49 50 53 public class FilterTest extends ParserTestCase 54 { 55 static 56 { 57 System.setProperty ("org.htmlparser.tests.filterTests.FilterTest", "FilterTest"); 58 } 59 60 public FilterTest (String name) 61 { 62 super (name); 63 } 64 65 68 public void testNodeClass () throws ParserException 69 { 70 String guts; 71 String html; 72 NodeList list; 73 74 guts = "<body>Now is the time for all good men..</body>"; 75 html = "<html>" + guts + "</html>"; 76 createParser (html); 77 list = parser.extractAllNodesThatMatch (new NodeClassFilter (BodyTag.class)); 78 assertEquals ("only one element", 1, list.size ()); 79 assertType ("should be BodyTag", BodyTag.class, list.elementAt (0)); 80 BodyTag body = (BodyTag)list.elementAt (0); 81 assertEquals ("only one child", 1, body.getChildCount ()); 82 assertSuperType ("should be Text", Text.class, body.getChildren ().elementAt (0)); 83 assertStringEquals("html", guts, body.toHtml ()); 84 } 85 86 87 90 public void testTagName () throws ParserException 91 { 92 String guts; 93 String html; 94 NodeList list; 95 96 guts = "<booty>Now is the time for all good men..</booty>"; 97 html = "<html>" + guts + "</html>"; 98 createParser (html); 99 list = parser.extractAllNodesThatMatch (new TagNameFilter ("booty")); 100 assertEquals ("only one element", 1, list.size ()); 101 assertSuperType ("should be Tag", Tag.class, list.elementAt (0)); 102 assertStringEquals("name", "BOOTY", ((Tag)(list.elementAt (0))).getTagName ()); 103 } 104 105 108 public void testString () throws ParserException 109 { 110 String guts; 111 String html; 112 NodeList list; 113 114 guts = "<body>Now is the <a id=target><b>time</b></a> for all good <time>men</time>..</body>"; 115 html = "<html>" + guts + "</html>"; 116 createParser (html); 117 list = parser.extractAllNodesThatMatch (new StringFilter ("Time")); 118 assertEquals ("only one element", 1, list.size ()); 119 assertSuperType ("should be String", Text.class, list.elementAt (0)); 120 assertStringEquals("name", "time", ((Text)list.elementAt (0)).getText ()); 121 list = parser.extractAllNodesThatMatch (new StringFilter ("Time", true)); 123 assertEquals ("should be no elements", 0, list.size ()); 124 } 125 126 129 public void testChild () throws ParserException 130 { 131 String guts; 132 String html; 133 NodeList list; 134 135 guts = "<body>Now is the <a id=target><b>time</b></a> for all good <a HREF=http://bongo.com>men</a>..</body>"; 136 html = "<html>" + guts + "</html>"; 137 createParser (html); 138 list = parser.extractAllNodesThatMatch (new HasChildFilter (new TagNameFilter ("b"))); 139 assertEquals ("only one element", 1, list.size ()); 140 assertType ("should be LinkTag", LinkTag.class, list.elementAt (0)); 141 LinkTag link = (LinkTag)list.elementAt (0); 142 assertEquals ("three children", 3, link.getChildCount ()); 143 assertSuperType ("should be TagNode", Tag.class, link.getChildren ().elementAt (0)); 144 Tag tag = (Tag)link.getChildren ().elementAt (0); 145 assertStringEquals("name", "B", tag.getTagName ()); 146 } 147 148 151 public void testAttribute () throws ParserException 152 { 153 String guts; 154 String html; 155 NodeList list; 156 157 guts = "<body>Now is the <a id=target><b>time</b></a> for all good <a HREF=http://bongo.com>men</a>..</body>"; 158 html = "<html>" + guts + "</html>"; 159 createParser (html); 160 list = parser.extractAllNodesThatMatch (new HasAttributeFilter ("id")); 161 assertEquals ("only one element", 1, list.size ()); 162 assertType ("should be LinkTag", LinkTag.class, list.elementAt (0)); 163 LinkTag link = (LinkTag)list.elementAt (0); 164 assertEquals ("attribute value", "target", link.getAttribute ("id")); 165 } 166 167 170 public void testAnd () throws ParserException 171 { 172 String guts; 173 String html; 174 NodeList list; 175 176 guts = "<body>Now is the <a id=one><b>time</b></a> for all good <a id=two><b>men</b></a>..</body>"; 177 html = "<html>" + guts + "</html>"; 178 createParser (html); 179 list = parser.extractAllNodesThatMatch ( 180 new AndFilter ( 181 new HasChildFilter ( 182 new TagNameFilter ("b")), 183 new HasChildFilter ( 184 new StringFilter ("men"))) 185 ); 186 assertEquals ("only one element", 1, list.size ()); 187 assertType ("should be LinkTag", LinkTag.class, list.elementAt (0)); 188 LinkTag link = (LinkTag)list.elementAt (0); 189 assertEquals ("attribute value", "two", link.getAttribute ("id")); 190 } 191 192 195 public void testOr () throws ParserException 196 { 197 String guts; 198 String html; 199 NodeList list; 200 201 guts = "<body>Now is the <a id=one><b>time</b></a> for <a id=two><b>all</b></a> good <a id=three><b>men</b></a>..</body>"; 202 html = "<html>" + guts + "</html>"; 203 createParser (html); 204 list = parser.extractAllNodesThatMatch ( 205 new OrFilter ( 206 new HasChildFilter ( 207 new StringFilter ("time")), 208 new HasChildFilter ( 209 new StringFilter ("men"))) 210 ); 211 assertEquals ("two elements", 2, list.size ()); 212 assertType ("should be LinkTag", LinkTag.class, list.elementAt (0)); 213 LinkTag link = (LinkTag)list.elementAt (0); 214 assertEquals ("attribute value", "one", link.getAttribute ("id")); 215 assertType ("should be LinkTag", LinkTag.class, list.elementAt (1)); 216 link = (LinkTag)list.elementAt (1); 217 assertEquals ("attribute value", "three", link.getAttribute ("id")); 218 } 219 220 223 public void testNot () throws ParserException 224 { 225 String guts; 226 String html; 227 NodeList list; 228 229 guts = "<body>Now is the <a id=one><b>time</b></a> for <a id=two><b>all</b></a> good <a id=three><b>men</b></a>..</body>"; 230 html = "<html>" + guts + "</html>"; 231 createParser (html); 232 list = parser.extractAllNodesThatMatch ( 233 new AndFilter ( 234 new HasChildFilter ( 235 new TagNameFilter ("b")), 236 new NotFilter ( 237 new HasChildFilter ( 238 new StringFilter ("all")))) 239 ); 240 assertEquals ("two elements", 2, list.size ()); 241 assertType ("should be LinkTag", LinkTag.class, list.elementAt (0)); 242 LinkTag link = (LinkTag)list.elementAt (0); 243 assertEquals ("attribute value", "one", link.getAttribute ("id")); 244 assertType ("should be LinkTag", LinkTag.class, list.elementAt (1)); 245 link = (LinkTag)list.elementAt (1); 246 assertEquals ("attribute value", "three", link.getAttribute ("id")); 247 } 248 249 public void testEscape() throws Exception  250 { 251 assertEquals ("douchebag", CssSelectorNodeFilter.unescape ("doucheba\\g").toString ()); 252 } 253 254 public void testSelectors() throws Exception  255 { 256 String html = "<html><head><title>sample title</title></head><body inserterr=\"true\" yomama=\"false\"><h3 id=\"heading\">big </invalid>heading</h3><ul id=\"things\"><li><br word=\"broken\"/>>moocow<li><applet/>doohickey<li class=\"last\"><b class=\"item\">final<br>item</b></ul></body></html>"; 257 Lexer l; 258 Parser p; 259 CssSelectorNodeFilter it; 260 NodeIterator i; 261 int count; 262 263 l = new Lexer (html); 264 p = new Parser (l); 265 it = new CssSelectorNodeFilter ("li + li"); 266 count = 0; 267 for (i = p.extractAllNodesThatMatch (it).elements (); i.hasMoreNodes ();) 268 { 269 assertEquals ("tag name wrong", "LI", ((Tag)i.nextNode()).getTagName()); 270 count++; 271 } 272 assertEquals ("wrong count", 2, count); 273 } 274 275 278 public void testRegularExpression () throws Exception  279 { 280 String target = 281 "\n" 282 + "\n" 283 + "Most recently, in the Western Conference final, the Flames knocked off \n" 284 + "the San Jose Sharks, the Pacific Division champions, to become the first \n" 285 + "Canadian team to reach the Stanley Cup Championship series since 1994."; 286 287 String html = 288 "<html><head><title>CBC Sports Online: NHL Playoffs</title></head>" 289 + "<body><h1>CBC SPORTS ONLINE</h1>\n" 290 + "The Calgary Flames have already defeated three NHL division winners \n" 291 + "during their improbable playoff run. If they are to hoist the Stanley \n" 292 + "Cup they'll have to go through one more. <p><table ALIGN=\"Right\" width=196 CELLPADDING=0 cellspacing=0 hspace=4> <tr><td><img SRC=\"/gfx/topstory/sports/iginla_j0524.jpg\" width=194 height=194 hspace=3 border=1><br>\n" 293 + "\n" 294 + "<font SIZE=\"1\" FACE=\"verdana,arial\">\n" 295 + "Jarome Iginla skates during the Flames' practice on Monday. Calgary takes on the Tampa Bay Lightning in the Stanley Cup finals beginning Tuesday night in Tampa\n" 296 + "</font></td></tr></table>\n" 297 + "\n" 298 + "\n" 299 + "In the post-season's first round, the Flames defeated the Vancouver \n" 300 + "Canucks, the Northwest Division winners, in seven tough games. <p>\n" 301 + "\n" 302 + "In Round 2 it was the Detroit Red Wings, who not only won the Central \n" 303 + "Division, but also boasted the NHL's best overall record during the \n" 304 + "regular season, who fell to the Flames. <p>" 305 + target 306 + "<p>\n" 307 + "\n" 308 + "Up next for the Flames is the Tampa Bay Lighting -- the runaway winners \n" 309 + "of the NHL's Southeast Division and the Eastern Conference's best team \n" 310 + "during the regular season. <p>\n" 311 + "\n" 312 + "The Lighting advanced by beating the Philadelphia Flyers in the Eastern \n" 313 + "Conference final. <p>\n" 314 + "</body></html>\n"; 315 Lexer lexer; 316 Parser parser; 317 RegexFilter filter; 318 NodeIterator iterator; 319 int count; 320 321 lexer = new Lexer (html); 322 parser = new Parser (lexer); 323 filter = new RegexFilter ("(19|20)\\d\\d([- \\\\/.](0[1-9]|1[012])[- \\\\/.](0[1-9]|[12][0-9]|3[01]))?"); 324 count = 0; 325 for (iterator = parser.extractAllNodesThatMatch (filter).elements (); iterator.hasMoreNodes ();) 326 { 327 assertEquals ("text wrong", target, iterator.nextNode ().toHtml ()); 328 count++; 329 } 330 assertEquals ("wrong count", 1, count); 331 } 332 } 333 334
| Popular Tags
|