KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tests > filterTests > FilterTest


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2003 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/filterTests/FilterTest.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/07/02 00:49:29 $
10
// $Revision: 1.7 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.tests.filterTests;
28
29 import org.htmlparser.Parser;
30 import org.htmlparser.Tag;
31 import org.htmlparser.filters.AndFilter;
32 import org.htmlparser.filters.CssSelectorNodeFilter;
33 import org.htmlparser.filters.HasAttributeFilter;
34 import org.htmlparser.filters.HasChildFilter;
35 import org.htmlparser.filters.NodeClassFilter;
36 import org.htmlparser.filters.NotFilter;
37 import org.htmlparser.filters.OrFilter;
38 import org.htmlparser.filters.RegexFilter;
39 import org.htmlparser.filters.StringFilter;
40 import org.htmlparser.filters.TagNameFilter;
41 import org.htmlparser.lexer.Lexer;
42 import org.htmlparser.Text;
43 import org.htmlparser.tags.BodyTag;
44 import org.htmlparser.tags.LinkTag;
45 import org.htmlparser.tests.ParserTestCase;
46 import org.htmlparser.util.NodeIterator;
47 import org.htmlparser.util.NodeList;
48 import org.htmlparser.util.ParserException;
49
50 /**
51  * Test the operation of filters.
52  */

53 public class FilterTest extends ParserTestCase
54 {
55     static
56     {
57         System.setProperty ("org.htmlparser.tests.filterTests.FilterTest", "FilterTest");
58     }
59
60     public FilterTest (String JavaDoc name)
61     {
62         super (name);
63     }
64
65     /**
66      * Test node class filtering.
67      */

68     public void testNodeClass () throws ParserException
69     {
70         String JavaDoc guts;
71         String JavaDoc html;
72         NodeList list;
73
74         guts = "<body>Now is the time for all good men..</body>";
75         html = "<html>" + guts + "</html>";
76         createParser (html);
77         list = parser.extractAllNodesThatMatch (new NodeClassFilter (BodyTag.class));
78         assertEquals ("only one element", 1, list.size ());
79         assertType ("should be BodyTag", BodyTag.class, list.elementAt (0));
80         BodyTag body = (BodyTag)list.elementAt (0);
81         assertEquals ("only one child", 1, body.getChildCount ());
82         assertSuperType ("should be Text", Text.class, body.getChildren ().elementAt (0));
83         assertStringEquals("html", guts, body.toHtml ());
84     }
85
86
87     /**
88      * Test tag name filtering.
89      */

90     public void testTagName () throws ParserException
91     {
92         String JavaDoc guts;
93         String JavaDoc html;
94         NodeList list;
95
96         guts = "<booty>Now is the time for all good men..</booty>";
97         html = "<html>" + guts + "</html>";
98         createParser (html);
99         list = parser.extractAllNodesThatMatch (new TagNameFilter ("booty"));
100         assertEquals ("only one element", 1, list.size ());
101         assertSuperType ("should be Tag", Tag.class, list.elementAt (0));
102         assertStringEquals("name", "BOOTY", ((Tag)(list.elementAt (0))).getTagName ());
103     }
104
105     /**
106      * Test string filtering.
107      */

108     public void testString () throws ParserException
109     {
110         String JavaDoc guts;
111         String JavaDoc html;
112         NodeList list;
113
114         guts = "<body>Now is the <a id=target><b>time</b></a> for all good <time>men</time>..</body>";
115         html = "<html>" + guts + "</html>";
116         createParser (html);
117         list = parser.extractAllNodesThatMatch (new StringFilter ("Time"));
118         assertEquals ("only one element", 1, list.size ());
119         assertSuperType ("should be String", Text.class, list.elementAt (0));
120         assertStringEquals("name", "time", ((Text)list.elementAt (0)).getText ());
121         // test case sensitivity
122
list = parser.extractAllNodesThatMatch (new StringFilter ("Time", true));
123         assertEquals ("should be no elements", 0, list.size ());
124     }
125
126     /**
127      * Test child filtering.
128      */

129     public void testChild () throws ParserException
130     {
131         String JavaDoc guts;
132         String JavaDoc html;
133         NodeList list;
134
135         guts = "<body>Now is the <a id=target><b>time</b></a> for all good <a HREF=http://bongo.com>men</a>..</body>";
136         html = "<html>" + guts + "</html>";
137         createParser (html);
138         list = parser.extractAllNodesThatMatch (new HasChildFilter (new TagNameFilter ("b")));
139         assertEquals ("only one element", 1, list.size ());
140         assertType ("should be LinkTag", LinkTag.class, list.elementAt (0));
141         LinkTag link = (LinkTag)list.elementAt (0);
142         assertEquals ("three children", 3, link.getChildCount ());
143         assertSuperType ("should be TagNode", Tag.class, link.getChildren ().elementAt (0));
144         Tag tag = (Tag)link.getChildren ().elementAt (0);
145         assertStringEquals("name", "B", tag.getTagName ());
146     }
147
148     /**
149      * Test attribute filtering.
150      */

151     public void testAttribute () throws ParserException
152     {
153         String JavaDoc guts;
154         String JavaDoc html;
155         NodeList list;
156
157         guts = "<body>Now is the <a id=target><b>time</b></a> for all good <a HREF=http://bongo.com>men</a>..</body>";
158         html = "<html>" + guts + "</html>";
159         createParser (html);
160         list = parser.extractAllNodesThatMatch (new HasAttributeFilter ("id"));
161         assertEquals ("only one element", 1, list.size ());
162         assertType ("should be LinkTag", LinkTag.class, list.elementAt (0));
163         LinkTag link = (LinkTag)list.elementAt (0);
164         assertEquals ("attribute value", "target", link.getAttribute ("id"));
165     }
166
167     /**
168      * Test and filtering.
169      */

170     public void testAnd () throws ParserException
171     {
172         String JavaDoc guts;
173         String JavaDoc html;
174         NodeList list;
175
176         guts = "<body>Now is the <a id=one><b>time</b></a> for all good <a id=two><b>men</b></a>..</body>";
177         html = "<html>" + guts + "</html>";
178         createParser (html);
179         list = parser.extractAllNodesThatMatch (
180             new AndFilter (
181                 new HasChildFilter (
182                     new TagNameFilter ("b")),
183                 new HasChildFilter (
184                     new StringFilter ("men")))
185                 );
186         assertEquals ("only one element", 1, list.size ());
187         assertType ("should be LinkTag", LinkTag.class, list.elementAt (0));
188         LinkTag link = (LinkTag)list.elementAt (0);
189         assertEquals ("attribute value", "two", link.getAttribute ("id"));
190     }
191
192     /**
193      * Test or filtering.
194      */

195     public void testOr () throws ParserException
196     {
197         String JavaDoc guts;
198         String JavaDoc html;
199         NodeList list;
200
201         guts = "<body>Now is the <a id=one><b>time</b></a> for <a id=two><b>all</b></a> good <a id=three><b>men</b></a>..</body>";
202         html = "<html>" + guts + "</html>";
203         createParser (html);
204         list = parser.extractAllNodesThatMatch (
205             new OrFilter (
206                 new HasChildFilter (
207                     new StringFilter ("time")),
208                 new HasChildFilter (
209                     new StringFilter ("men")))
210                 );
211         assertEquals ("two elements", 2, list.size ());
212         assertType ("should be LinkTag", LinkTag.class, list.elementAt (0));
213         LinkTag link = (LinkTag)list.elementAt (0);
214         assertEquals ("attribute value", "one", link.getAttribute ("id"));
215         assertType ("should be LinkTag", LinkTag.class, list.elementAt (1));
216         link = (LinkTag)list.elementAt (1);
217         assertEquals ("attribute value", "three", link.getAttribute ("id"));
218     }
219
220     /**
221      * Test not filtering.
222      */

223     public void testNot () throws ParserException
224     {
225         String JavaDoc guts;
226         String JavaDoc html;
227         NodeList list;
228
229         guts = "<body>Now is the <a id=one><b>time</b></a> for <a id=two><b>all</b></a> good <a id=three><b>men</b></a>..</body>";
230         html = "<html>" + guts + "</html>";
231         createParser (html);
232         list = parser.extractAllNodesThatMatch (
233             new AndFilter (
234                 new HasChildFilter (
235                     new TagNameFilter ("b")),
236                 new NotFilter (
237                     new HasChildFilter (
238                         new StringFilter ("all"))))
239                 );
240         assertEquals ("two elements", 2, list.size ());
241         assertType ("should be LinkTag", LinkTag.class, list.elementAt (0));
242         LinkTag link = (LinkTag)list.elementAt (0);
243         assertEquals ("attribute value", "one", link.getAttribute ("id"));
244         assertType ("should be LinkTag", LinkTag.class, list.elementAt (1));
245         link = (LinkTag)list.elementAt (1);
246         assertEquals ("attribute value", "three", link.getAttribute ("id"));
247     }
248
249     public void testEscape() throws Exception JavaDoc
250     {
251         assertEquals ("douchebag", CssSelectorNodeFilter.unescape ("doucheba\\g").toString ());
252     }
253
254     public void testSelectors() throws Exception JavaDoc
255     {
256         String JavaDoc html = "<html><head><title>sample title</title></head><body inserterr=\"true\" yomama=\"false\"><h3 id=\"heading\">big </invalid>heading</h3><ul id=\"things\"><li><br word=\"broken\"/>&gt;moocow<li><applet/>doohickey<li class=\"last\"><b class=\"item\">final<br>item</b></ul></body></html>";
257         Lexer l;
258         Parser p;
259         CssSelectorNodeFilter it;
260         NodeIterator i;
261         int count;
262
263         l = new Lexer (html);
264         p = new Parser (l);
265         it = new CssSelectorNodeFilter ("li + li");
266         count = 0;
267         for (i = p.extractAllNodesThatMatch (it).elements (); i.hasMoreNodes ();)
268         {
269             assertEquals ("tag name wrong", "LI", ((Tag)i.nextNode()).getTagName());
270             count++;
271         }
272         assertEquals ("wrong count", 2, count);
273     }
274
275     /**
276      * Test regular expression matching:
277      */

278     public void testRegularExpression () throws Exception JavaDoc
279     {
280         String JavaDoc target =
281               "\n"
282             + "\n"
283             + "Most recently, in the Western Conference final, the Flames knocked off \n"
284             + "the San Jose Sharks, the Pacific Division champions, to become the first \n"
285             + "Canadian team to reach the Stanley Cup Championship series since 1994.";
286             
287         String JavaDoc html =
288               "<html><head><title>CBC Sports Online: NHL Playoffs</title></head>"
289             + "<body><h1>CBC SPORTS ONLINE</h1>\n"
290             + "The Calgary Flames have already defeated three NHL division winners \n"
291             + "during their improbable playoff run. If they are to hoist the Stanley \n"
292             + "Cup they'll have to go through one more. <p><table ALIGN=\"Right\" width=196 CELLPADDING=0 cellspacing=0 hspace=4> <tr><td><img SRC=\"/gfx/topstory/sports/iginla_j0524.jpg\" width=194 height=194 hspace=3 border=1><br>\n"
293             + "\n"
294             + "<font SIZE=\"1\" FACE=\"verdana,arial\">\n"
295             + "Jarome Iginla skates during the Flames' practice on Monday. Calgary takes on the Tampa Bay Lightning in the Stanley Cup finals beginning Tuesday night in Tampa\n"
296             + "</font></td></tr></table>\n"
297             + "\n"
298             + "\n"
299             + "In the post-season's first round, the Flames defeated the Vancouver \n"
300             + "Canucks, the Northwest Division winners, in seven tough games. <p>\n"
301             + "\n"
302             + "In Round 2 it was the Detroit Red Wings, who not only won the Central \n"
303             + "Division, but also boasted the NHL's best overall record during the \n"
304             + "regular season, who fell to the Flames. <p>"
305             + target
306             + "<p>\n"
307             + "\n"
308             + "Up next for the Flames is the Tampa Bay Lighting -- the runaway winners \n"
309             + "of the NHL's Southeast Division and the Eastern Conference's best team \n"
310             + "during the regular season. <p>\n"
311             + "\n"
312             + "The Lighting advanced by beating the Philadelphia Flyers in the Eastern \n"
313             + "Conference final. <p>\n"
314             + "</body></html>\n";
315         Lexer lexer;
316         Parser parser;
317         RegexFilter filter;
318         NodeIterator iterator;
319         int count;
320
321         lexer = new Lexer (html);
322         parser = new Parser (lexer);
323         filter = new RegexFilter ("(19|20)\\d\\d([- \\\\/.](0[1-9]|1[012])[- \\\\/.](0[1-9]|[12][0-9]|3[01]))?");
324         count = 0;
325         for (iterator = parser.extractAllNodesThatMatch (filter).elements (); iterator.hasMoreNodes ();)
326         {
327             assertEquals ("text wrong", target, iterator.nextNode ().toHtml ());
328             count++;
329         }
330         assertEquals ("wrong count", 1, count);
331     }
332 }
333
334
Popular Tags