KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > clipbuilder > html > web > http > impl > SimpleHTMLUnitProcessor


1 package org.jahia.clipbuilder.html.web.http.impl;
2
3 import java.io.*;
4 import java.net.*;
5 import java.util.*;
6
7 import com.gargoylesoftware.htmlunit.*;
8 import com.gargoylesoftware.htmlunit.html.*;
9 import org.jahia.clipbuilder.html.bean.*;
10 import org.jahia.clipbuilder.html.util.*;
11 import org.jahia.clipbuilder.html.web.http.*;
12 import org.org.apache.commons.httpclient.*;
13 import org.org.apache.commons.httpclient.params.*;
14
15 /**
16  * Description of the Class
17  *
18  *@author Tlili Khaled
19  */

20 public class SimpleHTMLUnitProcessor implements HTMLClient {
21     // Url to process
22
/**
23      * Description of the Field
24      */

25     protected UrlBean urlBean;
26     /**
27      * Description of the Field
28      */

29     protected boolean javascriptEnable;
30
31     //Proxy
32
/**
33      * Description of the Field
34      */

35     protected String JavaDoc proxyHost;
36     /**
37      * Description of the Field
38      */

39     protected int proxyPort = 0;
40
41     // Client to use
42
/**
43      * Description of the Field
44      */

45     protected WebClient webClient = null;
46     /**
47      * Description of the Field
48      */

49     protected RefreshHandler refreshHandler_;
50     /**
51      * Description of the Field
52      */

53     protected Header[] headers;
54
55     // Resulting page;
56
/**
57      * Description of the Field
58      */

59     protected HtmlPage lastPage_;
60
61     //List of name of the frame includind in the page
62
/**
63      * Description of the Field
64      */

65     protected List allNameFrame = new ArrayList();
66
67     //Method to call: Get, post,...
68
/**
69      * Description of the Field
70      */

71     protected SubmitMethod method = null;
72
73     // resultiing htmlDocument
74
/**
75      * Description of the Field
76      */

77     protected String JavaDoc finalUrl;
78     /**
79      * Description of the Field
80      */

81     protected String JavaDoc htmlAsString;
82     /**
83      * Description of the Field
84      */

85     protected String JavaDoc responseCharSet;
86
87     /**
88      * Description of the Field
89      */

90     protected boolean errorHasOccured = false;
91
92     /**
93      * Description of the Field
94      */

95     protected boolean refreshHandlerExceptionOccured_ = false;
96     private UrlBean previousUrlBean;
97     /**
98      * Description of the Field
99      */

100     protected static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(SimpleHTMLUnitProcessor.class);
101
102
103     /**
104      * Constructor for the HTMLUnitProcessor object
105      *
106      *@param headers Description of Parameter
107      *@param httpMethod Description of Parameter
108      *@param uBean Description of Parameter
109      */

110     public SimpleHTMLUnitProcessor(Header[] headers, String JavaDoc httpMethod, UrlBean uBean) {
111         this.setHeaders(headers);
112         setMethod(httpMethod);
113         setUrlBean(uBean);
114               // DefaultHttpParams.getDefaultParams().setBooleanParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
115
}
116
117
118     /**
119      * Sets the UrlBean attribute of the HTMLUnitProcessor object
120      *
121      *@param urlBean The new UrlBean value
122      */

123     public void setUrlBean(UrlBean urlBean) {
124         this.urlBean = urlBean;
125     }
126
127
128     /**
129      * Sets the Method attribute of the HTMLUnitProcessor object
130      *
131      *@param method The new Method value
132      */

133     public void setMethod(String JavaDoc method) {
134         if (method.equalsIgnoreCase("GET")) {
135             this.method = SubmitMethod.GET;
136         }
137         else if (method.equalsIgnoreCase("POST")) {
138             this.method = SubmitMethod.POST;
139         }
140         else {
141             logger.error("[ Method " + method + " unknown]");
142         }
143
144     }
145
146
147     /**
148      * Sets the WebClient attribute of the HTMLUnitProcessor object
149      *
150      *@param webClient The new WebClient value
151      */

152     public void setWebClient(Object JavaDoc webClient) {
153         this.webClient = (WebClient) webClient;
154     }
155
156
157
158     /**
159      * Sets the ProxyPort attribute of the HTMLUnitProcessor object
160      *
161      *@param proxyPort The new ProxyPort value
162      */

163     public void setProxyPort(int proxyPort) {
164         this.proxyPort = proxyPort;
165     }
166
167
168     /**
169      * Sets the Proxy attribute of the HTMLUnitProcessor object
170      *
171      *@param proxy The new Proxy value
172      */

173     public void setProxyHost(String JavaDoc proxy) {
174         this.proxyHost = proxy;
175     }
176
177
178     /**
179      * Sets the HtmlAsString attribute of the HTMLUnitProcessor object
180      *
181      *@param htmlAsString The new HtmlAsString value
182      */

183     public void setHtmlAsString(String JavaDoc htmlAsString) {
184         this.htmlAsString = htmlAsString;
185
186     }
187
188
189     /**
190      * Sets the FinalUrl attribute of the HTMLUnitProcessor object
191      *
192      *@param finalUrl The new FinalUrl value
193      */

194     public void setFinalUrl(String JavaDoc finalUrl) {
195         this.finalUrl = finalUrl;
196     }
197
198
199     /**
200      * Sets the Header attribute of the HTMLUnitProcessor object
201      *
202      *@param headers The new Headers value
203      */

204     public void setHeaders(Header[] headers) {
205         this.headers = headers;
206
207     }
208
209
210     /**
211      * Sets the ResponseCharSet attribute of the HTMLUnitProcessor object
212      *
213      *@param responseCharSet The new ResponseCharSet value
214      */

215     public void setResponseCharSet(String JavaDoc responseCharSet) {
216         this.responseCharSet = responseCharSet;
217     }
218
219
220     /**
221      * Sets the PreviousUrlBean attribute of the SimpleHTMLUnitProcessor object
222      *
223      *@param previousUrlBean The new PreviousUrlBean value
224      */

225     public void setPreviousUrlBean(UrlBean previousUrlBean) {
226         this.previousUrlBean = previousUrlBean;
227     }
228
229
230     /**
231      * Gets the UrlBean attribute of the HTMLUnitProcessor object
232      *
233      *@return The UrlBean value
234      */

235     public UrlBean getUrlBean() {
236         return urlBean;
237     }
238
239
240     /**
241      * Gets the Method attribute of the HTMLUnitProcessor object
242      *
243      *@return The Method value
244      */

245     public SubmitMethod getSubmitMethod() {
246         return method;
247     }
248
249
250     /**
251      * Gets the Method attribute of the HTMLUnitProcessor object
252      *
253      *@return The Method value
254      */

255     public String JavaDoc getMethod() {
256         return method.getName();
257     }
258
259
260     /**
261      * Gets the WebClient attribute of the HTMLUnitProcessor object
262      *
263      *@return The WebClient value
264      */

265     public WebClient getWebClient() {
266         return webClient;
267     }
268
269
270
271     /**
272      * Gets the ProxyPort attribute of the HTMLUnitProcessor object
273      *
274      *@return The ProxyPort value
275      */

276     public int getProxyPort() {
277         return proxyPort;
278     }
279
280
281     /**
282      * Gets the Proxy attribute of the HTMLUnitProcessor object
283      *
284      *@return The Proxy value
285      */

286     public String JavaDoc getProxyHost() {
287         return proxyHost;
288     }
289
290
291     /**
292      * Gets the AllNameFrame attribute of the HTMLUnitProcessor object
293      *
294      *@return The AllNameFrame value
295      */

296     public List getAllNameFrame() {
297         return allNameFrame;
298     }
299
300
301     /**
302      * Gets the HtmlAsString attribute of the HTMLUnitProcessor object
303      *
304      *@return The HtmlAsString value
305      */

306     public String JavaDoc getHtmlAsString() {
307         return htmlAsString;
308     }
309
310
311     /**
312      * Gets the FinalUrl attribute of the HTMLUnitProcessor object
313      *
314      *@return The FinalUrl value
315      */

316     public String JavaDoc getFinalUrl() {
317         return finalUrl;
318     }
319
320
321     /**
322      * Gets the RequestedUrlFromUrlBean attribute of the HTMLUnitProcessor
323      * object
324      *
325      *@return The RequestedUrlFromUrlBean value
326      */

327     public String JavaDoc getRelativeUrlFromUrlBean() {
328         return getUrlBean().getRelativeUrlValue();
329     }
330
331
332     /**
333      * Gets the AbsoluteUrlFromUrlBean attribute of the SimpleHTMLUnitProcessor
334      * object
335      *
336      *@return The AbsoluteUrlFromUrlBean value
337      */

338     public String JavaDoc getAbsoluteUrlFromUrlBean() {
339         return getUrlBean().getAbsoluteUrlValue();
340     }
341
342
343
344     /**
345      * Gets the JavascriptEnable attribute of the HTMLClient object
346      *
347      *@return The JavascriptEnable value
348      */

349     public boolean isJavascriptEnable() {
350         return this.javascriptEnable;
351     }
352
353
354     /**
355      * Gets the ResponseCharSet attribute of the HTMLUnitProcessor object
356      *
357      *@return The ResponseCharSet value
358      */

359     public String JavaDoc getResponseCharSet() {
360         return responseCharSet;
361     }
362
363
364     /**
365      * Gets the PreviousUrlBean attribute of the SimpleHTMLUnitProcessor object
366      *
367      *@return The PreviousUrlBean value
368      */

369     public UrlBean getPreviousUrlBean() {
370         return previousUrlBean;
371     }
372
373
374     /**
375      * Gets the Header attribute of the HTMLUnitProcessor object
376      *
377      *@return The Header value
378      */

379     public Header[] getHeader() {
380         return headers;
381     }
382
383
384     /**
385      * Adds a feature to the NameFrame attribute of the HTMLUnitProcessor object
386      *
387      *@param name The feature to be added to the NameFrame attribute
388      */

389     public void addNameFrame(String JavaDoc name) {
390         getAllNameFrame().add(name);
391     }
392
393
394     /**
395      * Description of the Method
396      *
397      *@exception WebClippingException Description of Exception
398      */

399     public void execute() throws WebClippingException {
400         String JavaDoc html = "";
401         try {
402
403             // Assert a non null webClient
404
assertNotNullWebClient();
405
406             //Get the requested url
407
URL url = new URL(getAbsoluteUrlFromUrlBean());
408
409             //set cookies
410
webClient.getWebConnection().getStateForUrl(url);
411
412             // add header
413
//addHeaders();
414

415             // Get the paramter
416
SubmitMethod httpMethod = getSubmitMethod();
417             ArrayList paramsList = getNameValuePairListFromURLBean();
418
419             // execute
420
lastPage_ = (HtmlPage) webClient.getPage(url, httpMethod, paramsList);
421
422             // Get the response
423
WebResponse response = lastPage_.getWebResponse();
424             String JavaDoc charSet = response.getContentCharSet();
425             logger.debug("CharSet: " + charSet);
426
427             // update url after all redirections
428
logger.debug("[ URL before process is: " + getUrlBean().getAbsoluteURL().toExternalForm() + " ]");
429             setFinalUrl(response.getUrl().toExternalForm());
430             getUrlBean().setRedirectUrl(URLUtilities.getURL(getFinalUrl()));
431
432             logger.debug("[ URL after process is: " + getUrlBean().getAbsoluteURL().toExternalForm() + " ]");
433
434             // set the reponse as string
435
html = new String JavaDoc(response.getResponseBody(), HTMLUtilities.getEncoding(response.getContentAsString(), "iso-8859-1"));
436
437             logger.debug("[ Encoding of the response is: " + response.getContentCharSet() + " ]");
438
439             // set the html
440
setHtmlAsString(html);
441
442         }
443         catch (FailingHttpStatusCodeException ex) {
444             logger.error("[ FailingHttpStatusCodeException: " + ex.getMessage() + " ]");
445             ex.printStackTrace();
446             throw new WebClippingException("Failing Http Status Code", ex);
447         }
448         catch (RuntimeException JavaDoc ex) {
449             if (!refreshHandlerExceptionOccured_) {
450                 logger.warn("[ RuntimeException occured: " + ex.getMessage() + " ]");
451                 logger.debug("[ Try whith WaitingRefreshHandler]");
452                 getWebClient().setRefreshHandler(new WaitingRefreshHandler());
453                 refreshHandlerExceptionOccured_ = true;
454                 this.execute();
455             }
456             else {
457                 logger.error("[ RuntimeException occured: " + ex.getMessage() + " ]");
458                 ex.printStackTrace();
459                 throw new WebClippingException("RefreshHandler", ex);
460             }
461         }
462
463         catch (MalformedURLException ex) {
464             logger.error("[ MalformedURLException: " + ex.getMessage() + " ]");
465             ex.printStackTrace();
466             throw new WebClippingException("Malformed URL", ex);
467         }
468         catch (IOException ex) {
469             logger.error("[ IOException occured: " + ex.getMessage() + " ]");
470             ex.printStackTrace();
471             throw new WebClippingException("IOException", ex);
472         }
473         catch (Exception JavaDoc ex) {
474             logger.debug("[ Exception occured: " + ex.getMessage() + " ]");
475             throw new WebClippingException("Exception", ex);
476         }
477
478     }
479
480
481
482     /**
483      * Allow to know if there is frame on the taget file.
484      *
485      *@return Description of the Returned Value
486      */

487     public boolean foundFrame() {
488
489         HtmlPage htmlPage = (HtmlPage) lastPage_;
490         List frameList = htmlPage.getFrames();
491         return !frameList.isEmpty();
492     }
493
494
495     /**
496      * Enable/disable javascript processing
497      *
498      *@param enable Description of Parameter
499      */

500     public void enabledJavascript(boolean enable) {
501         javascriptEnable = enable;
502         //update the refesh handler of the web client
503
WebClient webClient = this.getWebClient();
504         webClient.setJavaScriptEnabled(isJavascriptEnable());
505     }
506
507
508     /**
509      * Remove the last recorded url
510      */

511     public void removeLastUrl() {
512         // do nothing.
513
}
514
515
516     /**
517      * Gets the NameValuePairListFromURLBean attribute of the HTMLUnitProcessor
518      * object
519      *
520      *@return The NameValuePairListFromURLBean value
521      */

522     private ArrayList getNameValuePairListFromURLBean() {
523         logger.debug("[ Convert Map To List NameValuePair ]");
524         // retrieve the query parameters
525
if (getUrlBean() == null) {
526             logger.error("[ URL is not set]");
527             return null;
528         }
529         List queryParamBeanList = getUrlBean().getQueryParamBeanList();
530
531         //build list
532
ArrayList listNamePairAndValue = new ArrayList();
533         for (int i = 0; i < queryParamBeanList.size(); i++) {
534             QueryParamBean qBean = (QueryParamBean) queryParamBeanList.get(i);
535             String JavaDoc name = qBean.getName();
536             String JavaDoc value = qBean.getDefaultValue();
537             logger.debug("[ Found Param: " + name + " with value " + value + " ]");
538             NameValuePair paramAndValue = new NameValuePair(name, value);
539             listNamePairAndValue.add(paramAndValue);
540         }
541
542         return listNamePairAndValue;
543     }
544
545
546     /**
547      * Gets a default WebClient attribute of the HTMLUnitProcessor object
548      *
549      *@return The NewWebClient value
550      */

551     private WebClient getNewWebClient() {
552         //Get Proxy parameter
553
String JavaDoc proxyHost = getProxyHost();
554         int proxyPort = getProxyPort();
555
556         // build the webClient
557
WebClient webClient = null;
558         if (proxyHost != null && (proxyPort != 0)) {
559             webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_6_0, proxyHost, proxyPort);
560         }
561         else {
562             webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_6_0);
563         }
564
565         // disable throws exception when there is javacscript error
566
webClient.setThrowExceptionOnScriptError(false);
567         webClient.setThrowExceptionOnFailingStatusCode(false);
568
569         // Disable validation. It will be done by the HTMLParser
570
webClient.setRedirectEnabled(true);
571         webClient.setJavaScriptEnabled(isJavascriptEnable());
572
573         return webClient;
574     }
575
576
577     /**
578      * Adds a feature to the Headers attribute of the HTMLUnitProcessor object
579      */

580     private void addHeaders() {
581         logger.debug("[ Add headers ]");
582         Header[] header = this.getHeader();
583         if (header != null) {
584             for (int i = 0; i < header.length; i++) {
585                 Header h = header[i];
586                 if (h != null) {
587                     webClient.addRequestHeader(h.getName(), h.getValue());
588                 }
589             }
590         }
591     }
592
593
594     /**
595      * Build a defaultClient if webClient is null
596      */

597     private void assertNotNullWebClient() {
598         if (webClient == null) {
599             setWebClient(getNewWebClient());
600         }
601
602     }
603
604 }
605
Popular Tags