KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > clipbuilder > html > web > HTMLDocumentBuilder


1 package org.jahia.clipbuilder.html.web;
2
3 import java.io.*;
4 import java.util.*;
5
6 import javax.servlet.http.*;
7
8 import org.jahia.clipbuilder.html.*;
9 import org.jahia.clipbuilder.html.bean.*;
10 import org.jahia.clipbuilder.html.util.*;
11 import org.jahia.clipbuilder.html.web.Url.*;
12 import org.jahia.clipbuilder.html.web.html.*;
13 import org.jahia.clipbuilder.html.web.html.Impl.*;
14 import org.jahia.clipbuilder.html.web.html.Impl.Dom.*;
15 import org.jahia.clipbuilder.html.web.html.Impl.HTMLParser.*;
16 import org.jahia.clipbuilder.html.web.html.Impl.JDom.*;
17 import org.jahia.clipbuilder.html.web.http.impl.*;
18 import org.jahia.clipbuilder.html.web.http.*;
19
20 import org.org.apache.commons.httpclient.*;
21 import org.xml.sax.*;
22
23 /**
24  * Builder of full HTMLDocument
25  *
26  *@author Tlili Khaled
27  */

28 public class HTMLDocumentBuilder {
29     private HTMLClient client;
30     private UrlEncoderIF urlEncoder;
31     private URLMap urlMap;
32
33     //configuration
34
private int typeClient_ = CLIENT_HTMLUNIT;
35     private int typeParser_ = PARSER_JDOM;
36     private boolean webClientJavascriptEnable_ = true;
37     private int browserJavascriptEvent_;
38     private int browserJavascriptCode_;
39     private Header[] headers_ = null;
40     private boolean enableCSS_ = true;
41
42     private String JavaDoc httpMethod;
43     private UrlBean uBean;
44
45     /**
46      * Description of the Field
47      */

48     public static int URLENCODER_BASIC = 2;
49     /**
50      * Description of the Field
51      */

52     public static final int PARSER_NEKO = 0;
53
54     /**
55      * Description of the Field
56      */

57     public static final int PARSER_HTMLPARSER = 1;
58
59     /**
60      * Description of the Field
61      */

62     public static final int PARSER_JDOM = 2;
63
64     /**
65      * Description of the Field
66      */

67     public static final int CLIENT_HTMLUNIT = 0;
68     /**
69      * Description of the Field
70      */

71     public static final int CLIENT_HTTPCLIENT = 1;
72     /**
73      * Description of the Field
74      */

75     public static final int BROWSER_JAVASCRIPT_REMOVE = 0;
76     /**
77      * Description of the Field
78      */

79     public static final int BROWSER_JAVASCRIPT_REFACTOR = 1;
80     /**
81      * Description of the Field
82      */

83     public static final int BROWSER_JAVASCRIPT_DONT_CHANGE = 2;
84     private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(HTMLDocumentBuilder.class);
85
86
87
88     /**
89      * Constructor for the HTMLDocumentBuilder object
90      *
91      *@param uBean Description of Parameter
92      *@param httpMethod Description of Parameter
93      *@param encoder Description of Parameter
94      */

95     public HTMLDocumentBuilder(UrlBean uBean, String JavaDoc httpMethod, UrlEncoderIF encoder) {
96         this.uBean = uBean;
97         this.httpMethod = httpMethod;
98         this.urlEncoder = encoder;
99         this.urlMap = new URLMap();
100
101     }
102
103
104     /**
105      * Constructor for the HTMLDocumentBuilder object
106      *
107      *@param map Description of Parameter
108      *@param encoder Description of Parameter
109      */

110     public HTMLDocumentBuilder(URLMap map, UrlEncoderIF encoder) {
111         this.urlMap = map;
112         this.urlEncoder = encoder;
113     }
114
115
116
117     /**
118      * Sets the HttpMethod attribute of the HTMLDocumentBuilder object
119      *
120      *@param httpMethod The new HttpMethod value
121      */

122     public void setHttpMethod(String JavaDoc httpMethod) {
123         this.httpMethod = httpMethod;
124     }
125
126
127     /**
128      * Sets the UrlEncoder attribute of the HTMLDocumentBuilder object
129      *
130      *@param encoder The new UrlEncoder value
131      */

132     public void setUrlEncoder(UrlEncoderIF encoder) {
133         this.urlEncoder = encoder;
134     }
135
136
137
138     /**
139      * Sets the UrlMap attribute of the HTMLDocumentBuilder object
140      *
141      *@param urlMap The new UrlMap value
142      */

143     public void setUrlMap(URLMap urlMap) {
144         this.urlMap = urlMap;
145     }
146
147
148     /**
149      * Sets the UBean attribute of the HTMLDocumentBuilder object
150      *
151      *@param uBean The new UBean value
152      */

153     public void setUBean(UrlBean uBean) {
154         this.uBean = uBean;
155     }
156
157
158     /**
159      * Sets the Client attribute of the HTMLDocumentBuilder object
160      *
161      *@param client The new Client value
162      */

163     public void setClient(HTMLClient client) {
164         this.client = client;
165     }
166
167
168     /**
169      * Gets the UrlEncoder attribute of the HTMLDocumentBuilder object
170      *
171      *@return The UrlEncoder value
172      */

173     public UrlEncoderIF getUrlEncoder() {
174         return this.urlEncoder;
175     }
176
177
178     /**
179      * Gets the HttpMethod attribute of the HTMLDocumentBuilder object
180      *
181      *@return The HttpMethod value
182      */

183     public String JavaDoc getHttpMethod() {
184         return httpMethod;
185     }
186
187
188     /**
189      * Gets the UrlMap attribute of the HTMLDocumentBuilder object
190      *
191      *@return The UrlMap value
192      */

193     public URLMap getUrlMap() {
194         return urlMap;
195     }
196
197
198     /**
199      * Gets the UBean attribute of the HTMLDocumentBuilder object
200      *
201      *@return The UBean value
202      */

203     public UrlBean getUBean() {
204         return uBean;
205     }
206
207
208     /**
209      * Gets the Client attribute of the HTMLDocumentBuilder object
210      *
211      *@return The Client value
212      */

213     public HTMLClient getClient() {
214         return client;
215     }
216
217
218     /**
219      * configure the builder
220      *
221      *@param client Description of Parameter
222      *@param htmlParser Description of Parameter
223      *@param browserJavascriptEvent Description of Parameter
224      *@param browserJavascriptCode Description of Parameter
225      *@param webClientJavascriptEnable Description of Parameter
226      *@param enableCSS Description of Parameter
227      *@param headers Description of Parameter
228      */

229     public void configure(int client, int htmlParser, int browserJavascriptEvent, int browserJavascriptCode, boolean webClientJavascriptEnable, boolean enableCSS, Header[] headers) {
230         this.typeClient_ = client;
231         this.typeParser_ = htmlParser;
232         this.webClientJavascriptEnable_ = webClientJavascriptEnable;
233         this.enableCSS_ = enableCSS;
234         this.headers_ = headers;
235         this.browserJavascriptEvent_ = browserJavascriptEvent;
236         this.browserJavascriptCode_ = browserJavascriptCode;
237     }
238
239
240     /**
241      * Description of the Method
242      *
243      *@param request Description of Parameter
244      *@param response Description of Parameter
245      *@return Description of the Returned Value
246      *@exception Exception Description of Exception
247      */

248     public HTMLDocument execute(HttpServletRequest request, HttpServletResponse response) throws Exception JavaDoc {
249         //Execute whith the choosen client
250
logger.debug("[ Get HTML as String ]");
251         String JavaDoc html = getHtmlAsString(request);
252         if (html == null) {
253             logger.error("HTML document as String is null");
254             html = "<html><body> <p>No document loaded </p> </body> <html>";
255             throw new WebClippingException("Html document can't be rendered", new NullPointerException JavaDoc());
256         }
257
258         // Execute whith the choosen parser
259
logger.debug("[ Get HTML as document ]");
260         HTMLDocument htmlDocument = buildHtmlDocument(request, response, html);
261         return htmlDocument;
262     }
263
264
265
266     /**
267      * Gets the HtmlAsString attribute of the HTMLDocumentBuilder object
268      *
269      *@param request Description of Parameter
270      *@return The HtmlAsString value
271      *@exception Exception Description of Exception
272      */

273     private String JavaDoc getHtmlAsString(HttpServletRequest request) throws Exception JavaDoc {
274         String JavaDoc res = null;
275
276         try {
277             // execute webClient
278
switch (typeClient_) {
279                 case (CLIENT_HTMLUNIT):
280                 {
281                     logger.debug("[ Client: HTMLUnit ]");
282
283                     res = executeWhithAdvancedHTMLUnitClient(request);
284                     break;
285                 }
286
287                 case (CLIENT_HTTPCLIENT):
288                 {
289                     logger.debug("[ Client: HttpClient ]");
290                     res = executeWhithHttpClient();
291                     break;
292                 }
293                 default:
294                 {
295                     logger.debug("[ No Client found: " + typeClient_ + " ]");
296                 }
297             }
298         }
299         catch (WebClippingException ex) {
300             ex.printStackTrace();
301             throw ex;
302         }
303         catch (Exception JavaDoc ex) {
304             // add errors
305
ex.printStackTrace();
306             throw new WebClippingException("Exception", ex);
307         }
308         return res;
309     }
310
311
312
313     /**
314      * Gets the HtmlAsDocument attribute of the HTMLDocumentBuilder object
315      *
316      *@param request Description of Parameter
317      *@param response Description of Parameter
318      *@param html Description of Parameter
319      *@return The HtmlAsDocument value
320      *@exception Exception Description of Exception
321      */

322     private HTMLDocument buildHtmlDocument(HttpServletRequest request, HttpServletResponse response, String JavaDoc html) throws Exception JavaDoc {
323         HTMLDocument htmlDocument = new EmptyHTMLDocument(uBean);
324         try {
325
326             switch (typeParser_) {
327                 case (PARSER_NEKO):
328                 {
329                     logger.debug("[ Parser: Neko]");
330                     htmlDocument = executeWhithNekoParser(html);
331                     break;
332                 }
333                 case (PARSER_JDOM):
334                 {
335                     logger.debug("[ Parser: JDom ]");
336                     htmlDocument = executeWhithJDomParser(html);
337                     break;
338                 }
339                 case (PARSER_HTMLPARSER):
340                 {
341                     logger.debug("[ Parser: HTMLParser ]");
342                     htmlDocument = executeWhithHTMLParser(html);
343                     break;
344                 }
345
346                 default:
347                 {
348                     logger.error("[ No parser found:" + typeParser_ + " ]");
349                 }
350
351             }
352
353             //set transformer properties that depend on the client
354
HTMLTransformer transformer = htmlDocument.getTransformer();
355
356             // javascript code
357
switch (browserJavascriptCode_) {
358                 case (BROWSER_JAVASCRIPT_REMOVE):
359                 {
360                     transformer.removeBodyScript(true);
361                     transformer.removeHeadScriptTag(true);
362                     transformer.removeMetaTag(false);
363                     break;
364                 }
365
366                 case (BROWSER_JAVASCRIPT_REFACTOR):
367                 {
368                     transformer.removeBodyScript(false);
369                     transformer.removeHeadScriptTag(false);
370                     transformer.refactorJavascriptCode(true);
371                     transformer.removeMetaTag(false);
372                     break;
373                 }
374                 case (BROWSER_JAVASCRIPT_DONT_CHANGE):
375                 {
376                     transformer.removeBodyScript(false);
377                     transformer.removeHeadScriptTag(false);
378                     transformer.refactorJavascriptCode(false);
379                     transformer.removeMetaTag(false);
380                     break;
381                 }
382
383                 default:
384                 {
385                     transformer.removeBodyScript(false);
386                     transformer.removeHeadScriptTag(false);
387                     transformer.refactorJavascriptCode(true);
388                     transformer.removeMetaTag(false);
389                     break;
390                 }
391             }
392
393             // javascript event
394
switch (browserJavascriptEvent_) {
395                 case (BROWSER_JAVASCRIPT_REMOVE):
396                 {
397                     transformer.removeJavascriptEvent(true);
398                     transformer.refactorJavascriptEvent(false);
399                     break;
400                 }
401
402                 case (BROWSER_JAVASCRIPT_REFACTOR):
403                 {
404                     transformer.removeJavascriptEvent(false);
405                     transformer.refactorJavascriptEvent(true);
406                     break;
407                 }
408                 case (BROWSER_JAVASCRIPT_DONT_CHANGE):
409                 {
410                     transformer.removeJavascriptEvent(false);
411                     transformer.refactorJavascriptEvent(false);
412                     break;
413                 }
414
415                 default:
416                 {
417                     transformer.removeJavascriptEvent(false);
418                     transformer.refactorJavascriptEvent(true);
419                     break;
420                 }
421             }
422
423             //logger.debug(htmlDocument.getOriginalDocumentAsString());
424
return htmlDocument;
425         }
426         catch (WebClippingException ex) {
427             // add errors
428
throw ex;
429         }
430         catch (Exception JavaDoc ex) {
431             // add errors
432
throw new WebClippingException("Failing Http Status Code", ex);
433         }
434     }
435
436
437     /**
438      * Description of the Method
439      *
440      *@param request Description of Parameter
441      *@return Description of the Returned Value
442      *@exception Exception Description of Exception
443      */

444     private String JavaDoc[] executeWhithSimpleHTMLUnitClient(HttpServletRequest request) throws Exception JavaDoc {
445         HTMLUnitProcessor client = new HTMLUnitProcessor(headers_, httpMethod, uBean);
446
447         //set WebClient
448
HttpSession session = request.getSession();
449         client.setWebClient(session.getAttribute(org.jahia.clipbuilder.html.web.Constant.WebConstants.SIMPLE_WEBCLIENT));
450
451         // set javascript
452
client.enabledJavascript(this.webClientJavascriptEnable_);
453
454         // execute
455
client.execute();
456
457         //update WebClient
458
session.setAttribute(org.jahia.clipbuilder.html.web.Constant.WebConstants.SIMPLE_WEBCLIENT, client.getWebClient());
459
460         String JavaDoc html = client.getHtmlAsString();
461         String JavaDoc encoding = client.getResponseCharSet();
462         String JavaDoc[] res = {html, encoding};
463         return res;
464     }
465
466
467     /**
468      * Description of the Method
469      *
470      *@param request Description of Parameter
471      *@return Description of the Returned Value
472      *@exception Exception Description of Exception
473      */

474     private String JavaDoc executeWhithAdvancedHTMLUnitClient(HttpServletRequest request) throws Exception JavaDoc {
475         // get the client
476
HTMLUnitProcessor client = (HTMLUnitProcessor) getClient();
477         if (client == null) {
478             client = new HTMLUnitProcessor(headers_, httpMethod, uBean);
479         }
480         else {
481             //update webClient parameter
482
UrlBean previousUrlBean = SessionManager.getClipperBean(request).getLastRecordedUrlBean();
483             if (previousUrlBean == null) {
484                 previousUrlBean = SessionManager.getRecorderBean(request).getCurrentUrlBean();
485             }
486             client.setUrlBean(uBean);
487             client.setHeaders(headers_);
488             client.setMethod(httpMethod);
489             client.setPreviousUrlBean(previousUrlBean);
490
491         }
492
493         // set javascript
494
client.enabledJavascript(webClientJavascriptEnable_);
495
496         // execute
497
client.execute();
498
499         String JavaDoc html = client.getHtmlAsString();
500         //String encoding = client.getResponseCharSet();
501
setClient(client);
502         return html;
503     }
504
505
506
507     /**
508      * Description of the Method
509      *
510      *@return Description of the Returned Value
511      *@exception Exception Description of Exception
512      */

513     private String JavaDoc executeWhithHttpClient() throws Exception JavaDoc {
514         HttpProcessor client = new HttpProcessor(httpMethod, uBean);
515         HttpProcessor lastClient = (HttpProcessor) getClient();
516
517         //set HttpState
518
if (lastClient != null) {
519             client.setStateBeforeExecution(lastClient.getStateAfterExecution());
520         }
521
522         // set javascript
523
client.enabledJavascript(this.webClientJavascriptEnable_);
524
525         // execute
526
client.execute();
527
528         String JavaDoc html = client.getHtmlAsString();
529         String JavaDoc encoding = client.getCharEncoding();
530
531         setClient(client);
532         return html;
533     }
534
535
536     /**
537      * Description of the Method
538      *
539      *@param html Description of Parameter
540      *@return Description of the Returned Value
541      *@exception Exception Description of Exception
542      */

543     private HTMLDocument executeWhithJDomParser(String JavaDoc html) throws Exception JavaDoc {
544         // HTML --> XHTML
545
JDomHTMLDocument jDoc = null;
546
547         //Get dafautlt parser
548

549         HTMLParser parser = new DefaultHTMLParser();
550         // build the transformed document
551
org.w3c.dom.Document JavaDoc w3cDoc = parser.parse(html);
552
553         logger.debug("Parse finish");
554
555         if (w3cDoc == null) {
556             logger.error("[Parsing failed !!!!]");
557         }
558         else {
559             logger.debug("[Parsing finish whith succes !!!!]");
560         }
561
562         //convert to JDomHTML document
563
jDoc = new JDomHTMLDocument(uBean, w3cDoc);
564         if (jDoc == null) {
565             logger.error("[Get HTMLDocument failed !!!!]");
566         }
567         else {
568             logger.debug("[Get HTMLDocument finish whith succes !!!!]");
569         }
570
571         JDomHTMLTransformer jTransform = new JDomHTMLTransformer(this, jDoc, enableCSS_, true);
572
573         //set transformer
574
jDoc.setTransformer(jTransform);
575
576         return jDoc;
577     }
578
579
580     /**
581      * Description of the Method
582      *
583      *@param html Description of Parameter
584      *@return Description of the Returned Value
585      *@exception Exception Description of Exception
586      */

587     private HTMLDocument executeWhithHTMLParser(String JavaDoc html) throws Exception JavaDoc {
588         // HTML --> XHTML
589
HTMLParserDocument doc = null;
590
591         //convert to JDomHTML document
592
doc = new HTMLParserDocument(uBean, html);
593         if (doc == null) {
594             logger.error("[Get HTMLDocument failed !!!!]");
595         }
596         else {
597             logger.debug("[Get HTMLDocument finish whith succes !!!!]");
598         }
599
600         HTMLParserTransformer transformer = new HTMLParserTransformer(this, doc, enableCSS_);
601
602         //set transformer
603
doc.setTransformer(transformer);
604
605         return doc;
606     }
607
608
609
610     /**
611      * Description of the Method
612      *
613      *@param html Description of Parameter
614      *@return Description of the Returned Value
615      */

616     private HTMLDocument executeWhithNekoParser(String JavaDoc html) {
617         // HTML --> XHTML
618
DomHTMLDocument xDoc = null;
619         try {
620             // Get default parser
621
HTMLParser parser = new DefaultHTMLParser();
622
623             // build the transformed document
624
org.w3c.dom.Document JavaDoc w3cDoc = parser.parse(html);
625
626             if (w3cDoc == null) {
627                 logger.error("[Parsing failed !!!!]");
628             }
629             else {
630                 logger.debug("[Parsing finish whith succes !!!!]");
631             }
632
633             //convert to XercesHTMLDocument
634
xDoc = new DomHTMLDocument(uBean, w3cDoc, html);
635
636             if (xDoc == null) {
637                 logger.error("[Get HTMLDocument failed !!!!]");
638             }
639             else {
640                 logger.debug("[Get HTMLDocument finish whith succes !!!!]");
641             }
642
643             // set the transformer
644
xDoc.setTransformer(new DomHTMLTransformer(this, xDoc, enableCSS_));
645
646         }
647         catch (IOException ex) {
648             logger.error("[ IOException " + ex.getMessage() + " ]");
649             ex.printStackTrace();
650         }
651         catch (SAXException ex) {
652             logger.error("[ SAXException " + ex.getMessage() + " ]");
653             ex.printStackTrace();
654         }
655         catch (Exception JavaDoc ex) {
656             logger.error("[ Exception " + ex.getMessage() + " ]");
657             ex.printStackTrace();
658         }
659
660         finally {
661
662             return xDoc;
663         }
664     }
665
666 }
667
Free Books   Free Magazines  
Popular Tags