KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > clipbuilder > html > web > http > impl > HTMLUnitProcessor


1 package org.jahia.clipbuilder.html.web.http.impl;
2
3 import java.io.*;
4 import java.net.*;
5 import java.util.*;
6
7 import com.gargoylesoftware.htmlunit.*;
8 import com.gargoylesoftware.htmlunit.html.*;
9 import org.jahia.clipbuilder.html.bean.*;
10 import org.jahia.clipbuilder.html.util.*;
11 import org.org.apache.commons.httpclient.*;
12 import org.jahia.clipbuilder.html.web.Constant.WebConstants;
13
14 /**
15  * Description of the Class
16  *
17  *@author Tlili Khaled
18  */

19 public class HTMLUnitProcessor extends SimpleHTMLUnitProcessor {
20     private List historyPageList;
21     //private boolean refreshHandlerExceptionOccured_ = false;
22
private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(HTMLUnitProcessor.class);
23
24
25     /**
26      * Constructor for the HTMLUnitProcessor object
27      *
28      *@param headers Description of Parameter
29      *@param httpMethod Description of Parameter
30      *@param uBean Description of Parameter
31      */

32     public HTMLUnitProcessor(Header[] headers, String JavaDoc httpMethod, UrlBean uBean) {
33         super(headers, httpMethod, uBean);
34         assertNotNullWebClient();
35     }
36
37
38     /**
39      * Description of the Method
40      *
41      *@exception WebClippingException Description of Exception
42      */

43     public void execute() throws WebClippingException {
44         String JavaDoc html = "";
45         HtmlPage page = null;
46         try {
47             // first page
48
if (lastPage_ == null) {
49                 logger.debug("First page");
50                 setWebClient(getNewWebClient());
51                 historyPageList = new ArrayList();
52                 super.execute();
53             }
54             else {
55
56                 //continue browsing
57
UrlBean uBean = getUrlBean();
58                 String JavaDoc from = uBean.getFrom();
59
60                 // case of link
61
if (from.equalsIgnoreCase(WebConstants.FROM_LINK)) {
62                     logger.debug("It's a link");
63
64                     //find the html anchor
65
HtmlAnchor anchor = getHtmlAnchor();
66
67                     //click on
68
page = (HtmlPage) anchor.click();
69                 }
70                 else if (from.equalsIgnoreCase(
71                         WebConstants.FROM_AREA)) {
72                     logger.debug("It's an area");
73
74                     //find the html anchor
75
HtmlArea area = getHtmlArea();
76
77                     //click on
78
page = (HtmlPage) area.click();
79                 }
80
81                 else if (from.equalsIgnoreCase(
82                         WebConstants.FROM_FORM)) {
83
84                     logger.debug("It's a submitted form");
85
86                     // find the submitted form
87
HtmlForm form = getSubmittedForm();
88
89                     //fill form
90
fillForm(form);
91
92                     // find the submitted button
93
HtmlInput submitButton = getSubmittedButton(
94                             form);
95
96                     //click on
97
if (submitButton == null) {
98                         logger.warn("Submit button not found. Anyway submit the form");
99                         page = (HtmlPage) form.submit();
100                     }
101                     else {
102                         logger.debug(submitButton.getNameAttribute());
103                         logger.debug(submitButton.getValueAttribute());
104                         page = (HtmlPage) submitButton.click();
105                     }
106                 }
107                 else if (from.equalsIgnoreCase(WebConstants.FROM_MANUAL)) {
108                     logger.debug("Manual: Simlpe HtmlUnit execution");
109                     super.execute();
110                 }
111                 else {
112                     logger.debug("Default: Simlpe HtmlUnit execution");
113                     super.execute();
114                 }
115
116                 // Get the response
117
WebResponse response = page.getWebResponse();
118                 String JavaDoc charSet = response.getContentCharSet();
119                 logger.debug("CharSet: " + charSet);
120
121                 // update url after all redirections
122
logger.debug("[ URL before process is: " + getUrlBean().getAbsoluteURL().toExternalForm() + " ]");
123                 setFinalUrl(response.getUrl().toExternalForm());
124                 getUrlBean().setRedirectUrl(URLUtilities.getURL(
125                         getFinalUrl()));
126
127                 logger.debug("[ URL after process is: " +
128                         getUrlBean().getAbsoluteURL().
129                         toExternalForm() + " ]");
130
131                 // set the reponse as string
132
//html = response.getContentAsString();
133
logger.debug(response.getContentCharSet());
134                                 html = new String JavaDoc(response.getResponseBody(), HTMLUtilities.getEncoding(response.getContentAsString(), "iso-8859-1"));
135
136                 logger.debug("[ Encoding of the response is: " + response.getContentCharSet() + " ]");
137
138                 // set the html
139
setHtmlAsString(html);
140
141                 // updtae
142
refreshHandlerExceptionOccured_ = false;
143             }
144
145             // validate page
146
validate(page);
147
148         }
149         catch (ElementNotFoundException ex) {
150             logger.warn("[ Exception occured: " + ex.getMessage() + " ]");
151         }
152
153         catch (FailingHttpStatusCodeException ex) {
154             logger.error("[ FailingHttpStatusCodeException: " + ex.getMessage() + " ]");
155             ex.printStackTrace();
156             throw new WebClippingException("Failing Http Status Code", ex);
157         }
158         catch (MalformedURLException ex) {
159             logger.error("[ MalformedURLException: " + ex.getMessage() + " ]");
160             ex.printStackTrace();
161             throw new WebClippingException("Malformed URL", ex);
162         }
163         catch (IOException ex) {
164             logger.error("[ IOException occured: " + ex.getMessage() + " ]");
165             ex.printStackTrace();
166             throw new WebClippingException("IOException", ex);
167         }
168         catch (RuntimeException JavaDoc ex) {
169             if (!refreshHandlerExceptionOccured_) {
170                 logger.warn("[ RuntimeException occured: " + ex.getMessage() + " ]");
171                 logger.debug("[ Try whith ThreadedRefreshHandler]");
172                 // ThreadedRefreshHandler();
173
getWebClient().setRefreshHandler(new WaitingRefreshHandler());
174                 refreshHandlerExceptionOccured_ = true;
175                 execute();
176             }
177             else {
178                 logger.error("[ RuntimeException occured: " + ex.getMessage() + " ]");
179                 ex.printStackTrace();
180                 //throw new WebClippingException("RefreshHandler", ex);
181
}
182         }
183         catch (Exception JavaDoc ex) {
184             logger.error("[ Exception occured: " + ex.getMessage() + " ]");
185             ex.printStackTrace();
186             throw new WebClippingException("Exception", ex);
187         }
188
189     }
190
191
192     /**
193      * Description of the Method
194      */

195     public void removeLastUrl() {
196         super.removeLastUrl();
197         int lastIndex = historyPageList.size() - 1;
198         if (!historyPageList.isEmpty()) {
199             // remove the page from the page list
200
historyPageList.remove(lastIndex);
201             //update page_ var
202
lastPage_ = getLastPage();
203         }
204         else {
205             logger.debug("Error: page list is already Empty");
206         }
207     }
208
209
210     /**
211      * Gets the HtmlAnchor attribute of the HTMLUnitProcessor object
212      *
213      *@return The HtmlAnchor value
214      *@exception ElementNotFoundException Description of Exception
215      */

216     private HtmlAnchor getHtmlAnchor() throws ElementNotFoundException {
217         String JavaDoc url = getRelativeUrlFromUrlBean();
218         int position = getUrlBean().getPosition();
219         HtmlPage page = (HtmlPage) historyPageList.get(position);
220         HtmlAnchor anchor = page.getAnchorByHref(url);
221         return anchor;
222     }
223
224
225     /**
226      * Gets the HtmlArea attribute of the HTMLUnitProcessor object
227      *
228      *@return The HtmlArea value
229      *@exception ElementNotFoundException Description of Exception
230      */

231     private HtmlArea getHtmlArea() throws ElementNotFoundException {
232         String JavaDoc url = getRelativeUrlFromUrlBean();
233         Iterator allChildit = lastPage_.getAllHtmlChildElements();
234         while (allChildit.hasNext()) {
235             Object JavaDoc o = allChildit.next();
236             if (o instanceof HtmlArea) {
237                 HtmlArea a = (HtmlArea) o;
238                 String JavaDoc href = a.getHrefAttribute();
239                 if (href.equalsIgnoreCase(url)) {
240                     return a;
241                 }
242             }
243         }
244         return null;
245     }
246
247
248
249     /**
250      * Gets the LastPage attribute of the HTMLUnitProcessor object
251      *
252      *@return The LastPage value
253      */

254     private HtmlPage getLastPage() {
255         int lastIndex = historyPageList.size() - 1;
256         if (!historyPageList.isEmpty()) {
257             logger.debug("Get last page");
258             HtmlPage page = (HtmlPage) historyPageList.remove(lastIndex);
259             return page;
260         }
261         else {
262             logger.warn("Page history list is Empty");
263             return null;
264         }
265     }
266
267
268
269     /**
270      * Gets the SubmittedButton attribute of the HTMLUnitProcessor object
271      *
272      *@param form Description of Parameter
273      *@return The SubmittedButton value
274      *@exception IOException Description of Exception
275      *@exception ElementNotFoundException Description of Exception
276      */

277     private HtmlInput getSubmittedButton(HtmlForm form) throws ElementNotFoundException, IOException {
278         UrlBean uBean = getUrlBean();
279         String JavaDoc fromHash = uBean.getHash();
280
281         // get the submitted Param of the url just before
282
UrlBean uBeanJustBefore = getPreviousUrlBean();
283         logger.debug("Url just before: " + uBeanJustBefore.getAbsoluteUrlValue());
284         logger.debug("New url: " + uBean.getAbsoluteUrlValue());
285
286         FormParamBean fBean = uBeanJustBefore.getSubmittedParamBean(fromHash);
287
288         //look for the HtmlInput that correpond to the submit FormParam
289
HtmlInput submitButton = null;
290         // submitted button found
291
if (fBean != null) {
292             //submit the form
293
String JavaDoc submitButtonId = fBean.getIdAtt();
294             String JavaDoc submitButtonName = fBean.getName();
295             String JavaDoc submitButtonValue = fBean.getUsedValue();
296             if (submitButtonId != null && !submitButtonId.equalsIgnoreCase("")) {
297                 submitButton = (HtmlInput) form.getHtmlElementById(submitButtonId);
298             }
299             else if (submitButtonName != null && !submitButtonName.equalsIgnoreCase("")) {
300                 submitButton = form.getInputByName(submitButtonName);
301             }
302             else if (submitButtonValue != null && !submitButtonValue.equalsIgnoreCase("")) {
303                 submitButton = form.getInputByValue(submitButtonValue);
304             }
305         }
306
307         return submitButton;
308     }
309
310
311     /**
312      * Gets the SubmittedForm attribute of the HTMLUnitProcessor object
313      *
314      *@return The SubmittedForm value
315      *@exception ElementNotFoundException Description of Exception
316      */

317     private HtmlForm getSubmittedForm() throws ElementNotFoundException {
318         String JavaDoc fromHash = getUrlBean().getHash();
319         logger.debug("From Hash: " + fromHash);
320         HtmlForm form = null;
321         String JavaDoc submittedFormName = HashUtilities.getFormNameFromHash(fromHash);
322         String JavaDoc submittedFormId = HashUtilities.getFormIdFromHash(fromHash);
323         // get the submiited form
324
if (submittedFormName == null || submittedFormName.equalsIgnoreCase("")) {
325             if (submittedFormId == null || submittedFormId.equalsIgnoreCase("")) {
326                 List formList = lastPage_.getForms();
327                 int pos = Integer.parseInt(HashUtilities.getFormPositionFromHash(fromHash));
328                 form = (HtmlForm) formList.get(pos);
329
330             }
331             else {
332                 form = (HtmlForm) lastPage_.getHtmlElementById(submittedFormId);
333             }
334         }
335         else {
336             form = (HtmlForm) lastPage_.getFormByName(submittedFormName);
337         }
338
339         return form;
340     }
341
342
343
344     /**
345      * Gets a default WebClient attribute of the HTMLUnitProcessor object
346      *
347      *@return The NewWebClient value
348      */

349     private WebClient getNewWebClient() {
350         //Get Proxy parameter
351
String JavaDoc proxyHost = getProxyHost();
352         int proxyPort = getProxyPort();
353
354         // build the webClient
355
WebClient webClient = null;
356         if (proxyHost != null && (proxyPort != 0)) {
357             webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_6_0, proxyHost, proxyPort);
358         }
359         else {
360             webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_6_0);
361         }
362
363         webClient.setRefreshHandler(new ThreadedRefreshHandler());
364
365         // disable throws exception when there is javacscript error
366
webClient.setThrowExceptionOnScriptError(false);
367         webClient.setThrowExceptionOnFailingStatusCode(false);
368
369
370         // Disable validation. It will be done by the HTMLParser
371
//webClient.setValidateHtml(false);
372

373         return webClient;
374     }
375
376
377     /**
378      * Description of the Method
379      *
380      *@param page Description of Parameter
381      */

382     private void validate(HtmlPage page) {
383         lastPage_ = page;
384         addPageToHistory(lastPage_);
385     }
386
387
388     /**
389      * Adds a feature to the Page attribute of the HTMLUnitProcessor object
390      *
391      *@param page The feature to be added to the Page attribute
392      */

393     private void addPageToHistory(HtmlPage page) {
394         System.err.println("add page size");
395         int position = getUrlBean().getPosition();
396         int lastPos = historyPageList.size() - 1;
397         if (position > lastPos) {
398             historyPageList.add(page);
399         }
400         else {
401             historyPageList.set(position, page);
402         }
403     }
404
405
406
407     /**
408      * Description of the Method
409      *
410      *@param form Description of Parameter
411      */

412     private void fillForm(HtmlForm form) {
413         UrlBean uBean = getUrlBean();
414
415         // get the queryBeanList for the current url <-> formBeanList of the previous url
416
List queryParamList = uBean.getQueryParamBeanList();
417         for (int i = 0; i < queryParamList.size(); i++) {
418             QueryParamBean qBean = (QueryParamBean) queryParamList.get(i);
419             String JavaDoc name = qBean.getName();
420             String JavaDoc value = qBean.getDefaultValue();
421             String JavaDoc type = null;
422             FormParamBean fBean = qBean.getFormParamBean();
423             if (fBean != null) {
424                 type = fBean.getType();
425             }
426             else {
427                 //this may happen during the build of the clipper
428
type = "";
429             }
430             int position = qBean.getPosition();
431
432             // fill input value
433
if (!type.equalsIgnoreCase(WebConstants.TYPE_HIDDEN)) {
434                 fillInputByName(form, name, value, position);
435             }
436
437             //select options
438
fillSelectByName(form, name, value, position);
439         }
440
441     }
442
443
444     /**
445      * Description of the Method
446      *
447      *@param form Description of Parameter
448      *@param name Description of Parameter
449      *@param value Description of Parameter
450      *@param position Description of Parameter
451      */

452     private void fillInputByName(HtmlForm form, String JavaDoc name, String JavaDoc value, int position) {
453         // get all input that have name = qBean.getName()
454
List allInputByName = form.getAllInputsByName(name);
455         if (allInputByName == null || allInputByName.size() == 0) {
456             logger.debug("[ Input List whith name = " + name + " not found ]");
457             return;
458         }
459
460         HtmlInput input = (HtmlInput) allInputByName.get(position);
461         if (input != null) {
462             logger.debug("[ Input param for queryBean: " + name + ", value: " + value + " ]");
463
464             //set the values
465
input.setValueAttribute(value);
466
467             // if it's a check box, check it!
468
if (input instanceof HtmlCheckBoxInput) {
469                 ((HtmlCheckBoxInput) input).setChecked(true);
470             }
471             // if it's a radio button, check it!
472
if (input instanceof HtmlRadioButtonInput) {
473                 ((HtmlRadioButtonInput) input).setChecked(true);
474             }
475
476         }
477         else {
478             logger.error("[ Input param for queryBean: " + name + " ]");
479         }
480     }
481
482
483     /**
484      * Description of the Method
485      *
486      *@param form Description of Parameter
487      *@param name Description of Parameter
488      *@param value Description of Parameter
489      *@param position Description of Parameter
490      */

491     private void fillSelectByName(HtmlForm form, String JavaDoc name, String JavaDoc value, int position) {
492         // get all input that have name = qBean.getName()
493
List allSelectByName = form.getSelectsByName(name);
494         if (allSelectByName == null || allSelectByName.size() == 0) {
495             return;
496         }
497
498         HtmlSelect select = (HtmlSelect) allSelectByName.get(position);
499         if (select != null) {
500             logger.debug("[ Input param for queryBean: " + name + " ]");
501             select.setSelectedAttribute(value, true);
502         }
503         else {
504             logger.error("[ Input param for queryBean: " + name + " ]");
505         }
506     }
507
508
509     /**
510      * Build a defaultClient if webClient is null
511      */

512     private void assertNotNullWebClient() {
513         if (webClient == null) {
514             setWebClient(getNewWebClient());
515         }
516
517     }
518
519 }
520
Popular Tags