KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > gargoylesoftware > htmlunit > HTMLParserTest


1 /*
2  * Copyright (c) 2002, 2005 Gargoyle Software Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright notice,
8  * this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright notice,
10  * this list of conditions and the following disclaimer in the documentation
11  * and/or other materials provided with the distribution.
12  * 3. The end-user documentation included with the redistribution, if any, must
13  * include the following acknowledgment:
14  *
15  * "This product includes software developed by Gargoyle Software Inc.
16  * (http://www.GargoyleSoftware.com/)."
17  *
18  * Alternately, this acknowledgment may appear in the software itself, if
19  * and wherever such third-party acknowledgments normally appear.
20  * 4. The name "Gargoyle Software" must not be used to endorse or promote
21  * products derived from this software without prior written permission.
22  * For written permission, please contact info@GargoyleSoftware.com.
23  * 5. Products derived from this software may not be called "HtmlUnit", nor may
24  * "HtmlUnit" appear in their name, without prior written permission of
25  * Gargoyle Software Inc.
26  *
27  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
28  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
29  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
30  * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
33  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
36  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37  */

38 package com.gargoylesoftware.htmlunit;
39
40 import java.net.ConnectException;
41 import java.net.SocketException;
42 import java.net.URL;
43 import java.net.URLConnection;
44 import java.util.ArrayList;
45 import java.util.Arrays;
46 import java.util.Collections;
47 import java.util.List;
48
49 import com.gargoylesoftware.htmlunit.html.HTMLParser;
50 import com.gargoylesoftware.htmlunit.html.HtmlElement;
51 import com.gargoylesoftware.htmlunit.html.HtmlNoScript;
52 import com.gargoylesoftware.htmlunit.html.HtmlPage;
53 import com.gargoylesoftware.htmlunit.html.xpath.HtmlUnitXPath;
54
55 /**
56  * test driver for the new HTMLParser implementation
57  *
58  * @version $Revision: 1.15 $
59  * @author <a HREF="mailto:cse@dynabean.de">Christian Sell</a>
60  */

61 public class HTMLParserTest extends WebTestCase {
62
63     /**
64      * Create an instance
65      * @param name The name of the test
66      */

67     public HTMLParserTest( final String name ) {
68         super(name);
69     }
70
71     /**
72      * test the new HTMLParser on a simple HTML string and use the Jaxen XPath navigator
73      * to validate results
74      * @throws Exception failure
75      */

76     public void testSimpleHTMLString() throws Exception {
77         final WebClient webClient = new WebClient();
78         final WebResponse webResponse = new StringWebResponse(
79             "<html><head><title>TITLE</title><noscript>TEST</noscript></head><body></body></html>");
80
81         final HtmlPage page = HTMLParser.parse(webResponse, webClient.getCurrentWindow());
82
83         HtmlUnitXPath xpath = new HtmlUnitXPath("//noscript");
84         final String stringVal = xpath.stringValueOf(page);
85
86         assertEquals("TEST", stringVal);
87
88         xpath = new HtmlUnitXPath("//*[./text() = 'TEST']");
89         final HtmlElement node = (HtmlElement)xpath.selectSingleNode(page);
90
91         assertEquals(node.getTagName(), HtmlNoScript.TAG_NAME);
92     }
93
94     /**
95      * Test when <form> inside <table> and before <tr>
96      * @throws Exception failure
97      */

98     public void testBadlyFormedHTML() throws Exception {
99         final String content
100             = "<html><head><title>first</title>"
101             + "<script>"
102             + "function test()"
103             + "{"
104             + " alert(document.getElementById('myInput').form.id);\n"
105             + "}"
106             + "</script>"
107             + "</head>"
108             + "<body onload='test()'>"
109             + "<table>"
110             + "<form name='myForm' action='foo' id='myForm'>"
111             + "<tr><td>"
112             + "<input type='text' name='myInput' id='myInput'/>"
113             + "</td></tr>"
114             + "</form>"
115             + "</table>"
116             + "</body></html>";
117
118         final List collectedAlerts = new ArrayList();
119         final List expectedAlerts = Arrays.asList(new String[]{"myForm"});
120         createTestPageForRealBrowserIfNeeded(content, expectedAlerts);
121
122         loadPage(content, collectedAlerts);
123
124         assertEquals( expectedAlerts, collectedAlerts );
125     }
126
127     /**
128      * Test when an illegal tag is found in head as some websites do
129      * @throws Exception failure
130      */

131     public void testUnknownTagInHead() throws Exception {
132         if (true) {
133             notImplemented();
134             return;
135         }
136
137         // Note: the <meta> tag in this test is quite important because
138
// I could adapt the TagBalancer to make it work except with this <meta http-equiv...
139
// (it worked with <meta name=...)
140
final String content
141             = "<html><head><mainA3>"
142             + "<meta http-equiv='Content-Type' content='text/html; charset=ISO-8859-1'>"
143             + "<title>first</title>"
144             + "<script>"
145             + "function test()"
146             + "{"
147             + " alert(document.title);\n"
148             + "}"
149             + "</script>"
150             + "</head>"
151             + "<body onload='test()'>"
152             + "</body></html>";
153
154         final List collectedAlerts = new ArrayList();
155         final List expectedAlerts = Arrays.asList(new String[]{"first"});
156         createTestPageForRealBrowserIfNeeded(content, expectedAlerts);
157
158         final HtmlPage page = loadPage(content, collectedAlerts);
159         System.out.println(page.asXml());
160
161         assertEquals( expectedAlerts, collectedAlerts );
162     }
163
164     /**
165      * test the new HTMLParser by accessing the HtmlUnit home page and detecting the copyright
166      * string.
167      *
168      * @throws Exception failure
169      */

170     public static void testHtmlUnitHomePage() throws Exception {
171
172         final URL htmlUnitSite = new URL("http://htmlunit.sourceforge.net");
173         try {
174             final URLConnection connection = htmlUnitSite.openConnection();
175             connection.connect();
176         }
177         catch (final ConnectException e) {
178             /* sf.net's flaky web servers and not being able to connect
179              * here from the shell server can cause this, doesn't mean something
180              * is broken
181              */

182             System.out.println("Connection could not be made to " + htmlUnitSite.toExternalForm());
183             return;
184         }
185         catch (final SocketException e) {
186             /* Some systems do not have access to the sf.net's web page. If the connection
187              * timesout, do not fail the test
188              */

189             System.out.println("Connection could not be made to " + htmlUnitSite.toExternalForm());
190             return;
191         }
192         
193         final WebClient webClient = new WebClient();
194         final WebResponse webResponse = new HttpWebConnection(webClient).getResponse(
195                 htmlUnitSite,
196                 SubmitMethod.GET,
197                 Collections.EMPTY_LIST,
198                 Collections.EMPTY_MAP
199         );
200
201         final HtmlPage page = HTMLParser.parse(webResponse, webClient.getCurrentWindow());
202
203         //find the copyright string
204
HtmlUnitXPath xpath = new HtmlUnitXPath("//div[@id='footer']/div[@class='xright']");
205         final String stringVal = xpath.stringValueOf(page).trim();
206         assertEquals("\u00A9 2002-2005, Gargoyle Software Inc.", stringVal);
207
208         //see if the Google adds were added via Javascript
209
/* google ads not on page anymore
210         xpath = new HtmlUnitXPath("//iframe[@name = 'google_ads_frame']");
211         final HtmlInlineFrame inline = (HtmlInlineFrame)xpath.selectSingleNode(page);
212
213         assertNotNull("find Google ads", inline);
214
215         final HtmlPage innerPage = (HtmlPage)inline.getEnclosedPage();
216         assertNotNull(innerPage);
217         */

218     }
219 }
220
Popular Tags