1 package org.jahia.clipbuilder.html.web; 2 3 import java.io.*; 4 import java.util.*; 5 6 import javax.servlet.http.*; 7 8 import org.jahia.clipbuilder.html.*; 9 import org.jahia.clipbuilder.html.bean.*; 10 import org.jahia.clipbuilder.html.util.*; 11 import org.jahia.clipbuilder.html.web.Url.*; 12 import org.jahia.clipbuilder.html.web.html.*; 13 import org.jahia.clipbuilder.html.web.html.Impl.*; 14 import org.jahia.clipbuilder.html.web.html.Impl.Dom.*; 15 import org.jahia.clipbuilder.html.web.html.Impl.HTMLParser.*; 16 import org.jahia.clipbuilder.html.web.html.Impl.JDom.*; 17 import org.jahia.clipbuilder.html.web.http.impl.*; 18 import org.jahia.clipbuilder.html.web.http.*; 19 20 import org.org.apache.commons.httpclient.*; 21 import org.xml.sax.*; 22 23 28 public class HTMLDocumentBuilder { 29 private HTMLClient client; 30 private UrlEncoderIF urlEncoder; 31 private URLMap urlMap; 32 33 private int typeClient_ = CLIENT_HTMLUNIT; 35 private int typeParser_ = PARSER_JDOM; 36 private boolean webClientJavascriptEnable_ = true; 37 private int browserJavascriptEvent_; 38 private int browserJavascriptCode_; 39 private Header[] headers_ = null; 40 private boolean enableCSS_ = true; 41 42 private String httpMethod; 43 private UrlBean uBean; 44 45 48 public static int URLENCODER_BASIC = 2; 49 52 public static final int PARSER_NEKO = 0; 53 54 57 public static final int PARSER_HTMLPARSER = 1; 58 59 62 public static final int PARSER_JDOM = 2; 63 64 67 public static final int CLIENT_HTMLUNIT = 0; 68 71 public static final int CLIENT_HTTPCLIENT = 1; 72 75 public static final int BROWSER_JAVASCRIPT_REMOVE = 0; 76 79 public static final int BROWSER_JAVASCRIPT_REFACTOR = 1; 80 83 public static final int BROWSER_JAVASCRIPT_DONT_CHANGE = 2; 84 private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(HTMLDocumentBuilder.class); 85 86 87 88 95 public HTMLDocumentBuilder(UrlBean uBean, String httpMethod, UrlEncoderIF encoder) { 96 this.uBean = uBean; 97 this.httpMethod = httpMethod; 98 this.urlEncoder = encoder; 99 this.urlMap = new URLMap(); 100 101 } 102 103 104 110 public HTMLDocumentBuilder(URLMap map, UrlEncoderIF encoder) { 111 this.urlMap = map; 112 this.urlEncoder = encoder; 113 } 114 115 116 117 122 public void setHttpMethod(String httpMethod) { 123 this.httpMethod = httpMethod; 124 } 125 126 127 132 public void setUrlEncoder(UrlEncoderIF encoder) { 133 this.urlEncoder = encoder; 134 } 135 136 137 138 143 public void setUrlMap(URLMap urlMap) { 144 this.urlMap = urlMap; 145 } 146 147 148 153 public void setUBean(UrlBean uBean) { 154 this.uBean = uBean; 155 } 156 157 158 163 public void setClient(HTMLClient client) { 164 this.client = client; 165 } 166 167 168 173 public UrlEncoderIF getUrlEncoder() { 174 return this.urlEncoder; 175 } 176 177 178 183 public String getHttpMethod() { 184 return httpMethod; 185 } 186 187 188 193 public URLMap getUrlMap() { 194 return urlMap; 195 } 196 197 198 203 public UrlBean getUBean() { 204 return uBean; 205 } 206 207 208 213 public HTMLClient getClient() { 214 return client; 215 } 216 217 218 229 public void configure(int client, int htmlParser, int browserJavascriptEvent, int browserJavascriptCode, boolean webClientJavascriptEnable, boolean enableCSS, Header[] headers) { 230 this.typeClient_ = client; 231 this.typeParser_ = htmlParser; 232 this.webClientJavascriptEnable_ = webClientJavascriptEnable; 233 this.enableCSS_ = enableCSS; 234 this.headers_ = headers; 235 this.browserJavascriptEvent_ = browserJavascriptEvent; 236 this.browserJavascriptCode_ = browserJavascriptCode; 237 } 238 239 240 248 public HTMLDocument execute(HttpServletRequest request, HttpServletResponse response) throws Exception { 249 logger.debug("[ Get HTML as String ]"); 251 String html = getHtmlAsString(request); 252 if (html == null) { 253 logger.error("HTML document as String is null"); 254 html = "<html><body> <p>No document loaded </p> </body> <html>"; 255 throw new WebClippingException("Html document can't be rendered", new NullPointerException ()); 256 } 257 258 logger.debug("[ Get HTML as document ]"); 260 HTMLDocument htmlDocument = buildHtmlDocument(request, response, html); 261 return htmlDocument; 262 } 263 264 265 266 273 private String getHtmlAsString(HttpServletRequest request) throws Exception { 274 String res = null; 275 276 try { 277 switch (typeClient_) { 279 case (CLIENT_HTMLUNIT): 280 { 281 logger.debug("[ Client: HTMLUnit ]"); 282 283 res = executeWhithAdvancedHTMLUnitClient(request); 284 break; 285 } 286 287 case (CLIENT_HTTPCLIENT): 288 { 289 logger.debug("[ Client: HttpClient ]"); 290 res = executeWhithHttpClient(); 291 break; 292 } 293 default: 294 { 295 logger.debug("[ No Client found: " + typeClient_ + " ]"); 296 } 297 } 298 } 299 catch (WebClippingException ex) { 300 ex.printStackTrace(); 301 throw ex; 302 } 303 catch (Exception ex) { 304 ex.printStackTrace(); 306 throw new WebClippingException("Exception", ex); 307 } 308 return res; 309 } 310 311 312 313 322 private HTMLDocument buildHtmlDocument(HttpServletRequest request, HttpServletResponse response, String html) throws Exception { 323 HTMLDocument htmlDocument = new EmptyHTMLDocument(uBean); 324 try { 325 326 switch (typeParser_) { 327 case (PARSER_NEKO): 328 { 329 logger.debug("[ Parser: Neko]"); 330 htmlDocument = executeWhithNekoParser(html); 331 break; 332 } 333 case (PARSER_JDOM): 334 { 335 logger.debug("[ Parser: JDom ]"); 336 htmlDocument = executeWhithJDomParser(html); 337 break; 338 } 339 case (PARSER_HTMLPARSER): 340 { 341 logger.debug("[ Parser: HTMLParser ]"); 342 htmlDocument = executeWhithHTMLParser(html); 343 break; 344 } 345 346 default: 347 { 348 logger.error("[ No parser found:" + typeParser_ + " ]"); 349 } 350 351 } 352 353 HTMLTransformer transformer = htmlDocument.getTransformer(); 355 356 switch (browserJavascriptCode_) { 358 case (BROWSER_JAVASCRIPT_REMOVE): 359 { 360 transformer.removeBodyScript(true); 361 transformer.removeHeadScriptTag(true); 362 transformer.removeMetaTag(false); 363 break; 364 } 365 366 case (BROWSER_JAVASCRIPT_REFACTOR): 367 { 368 transformer.removeBodyScript(false); 369 transformer.removeHeadScriptTag(false); 370 transformer.refactorJavascriptCode(true); 371 transformer.removeMetaTag(false); 372 break; 373 } 374 case (BROWSER_JAVASCRIPT_DONT_CHANGE): 375 { 376 transformer.removeBodyScript(false); 377 transformer.removeHeadScriptTag(false); 378 transformer.refactorJavascriptCode(false); 379 transformer.removeMetaTag(false); 380 break; 381 } 382 383 default: 384 { 385 transformer.removeBodyScript(false); 386 transformer.removeHeadScriptTag(false); 387 transformer.refactorJavascriptCode(true); 388 transformer.removeMetaTag(false); 389 break; 390 } 391 } 392 393 switch (browserJavascriptEvent_) { 395 case (BROWSER_JAVASCRIPT_REMOVE): 396 { 397 transformer.removeJavascriptEvent(true); 398 transformer.refactorJavascriptEvent(false); 399 break; 400 } 401 402 case (BROWSER_JAVASCRIPT_REFACTOR): 403 { 404 transformer.removeJavascriptEvent(false); 405 transformer.refactorJavascriptEvent(true); 406 break; 407 } 408 case (BROWSER_JAVASCRIPT_DONT_CHANGE): 409 { 410 transformer.removeJavascriptEvent(false); 411 transformer.refactorJavascriptEvent(false); 412 break; 413 } 414 415 default: 416 { 417 transformer.removeJavascriptEvent(false); 418 transformer.refactorJavascriptEvent(true); 419 break; 420 } 421 } 422 423 return htmlDocument; 425 } 426 catch (WebClippingException ex) { 427 throw ex; 429 } 430 catch (Exception ex) { 431 throw new WebClippingException("Failing Http Status Code", ex); 433 } 434 } 435 436 437 444 private String [] executeWhithSimpleHTMLUnitClient(HttpServletRequest request) throws Exception { 445 HTMLUnitProcessor client = new HTMLUnitProcessor(headers_, httpMethod, uBean); 446 447 HttpSession session = request.getSession(); 449 client.setWebClient(session.getAttribute(org.jahia.clipbuilder.html.web.Constant.WebConstants.SIMPLE_WEBCLIENT)); 450 451 client.enabledJavascript(this.webClientJavascriptEnable_); 453 454 client.execute(); 456 457 session.setAttribute(org.jahia.clipbuilder.html.web.Constant.WebConstants.SIMPLE_WEBCLIENT, client.getWebClient()); 459 460 String html = client.getHtmlAsString(); 461 String encoding = client.getResponseCharSet(); 462 String [] res = {html, encoding}; 463 return res; 464 } 465 466 467 474 private String executeWhithAdvancedHTMLUnitClient(HttpServletRequest request) throws Exception { 475 HTMLUnitProcessor client = (HTMLUnitProcessor) getClient(); 477 if (client == null) { 478 client = new HTMLUnitProcessor(headers_, httpMethod, uBean); 479 } 480 else { 481 UrlBean previousUrlBean = SessionManager.getClipperBean(request).getLastRecordedUrlBean(); 483 if (previousUrlBean == null) { 484 previousUrlBean = SessionManager.getRecorderBean(request).getCurrentUrlBean(); 485 } 486 client.setUrlBean(uBean); 487 client.setHeaders(headers_); 488 client.setMethod(httpMethod); 489 client.setPreviousUrlBean(previousUrlBean); 490 491 } 492 493 client.enabledJavascript(webClientJavascriptEnable_); 495 496 client.execute(); 498 499 String html = client.getHtmlAsString(); 500 setClient(client); 502 return html; 503 } 504 505 506 507 513 private String executeWhithHttpClient() throws Exception { 514 HttpProcessor client = new HttpProcessor(httpMethod, uBean); 515 HttpProcessor lastClient = (HttpProcessor) getClient(); 516 517 if (lastClient != null) { 519 client.setStateBeforeExecution(lastClient.getStateAfterExecution()); 520 } 521 522 client.enabledJavascript(this.webClientJavascriptEnable_); 524 525 client.execute(); 527 528 String html = client.getHtmlAsString(); 529 String encoding = client.getCharEncoding(); 530 531 setClient(client); 532 return html; 533 } 534 535 536 543 private HTMLDocument executeWhithJDomParser(String html) throws Exception { 544 JDomHTMLDocument jDoc = null; 546 547 549 HTMLParser parser = new DefaultHTMLParser(); 550 org.w3c.dom.Document w3cDoc = parser.parse(html); 552 553 logger.debug("Parse finish"); 554 555 if (w3cDoc == null) { 556 logger.error("[Parsing failed !!!!]"); 557 } 558 else { 559 logger.debug("[Parsing finish whith succes !!!!]"); 560 } 561 562 jDoc = new JDomHTMLDocument(uBean, w3cDoc); 564 if (jDoc == null) { 565 logger.error("[Get HTMLDocument failed !!!!]"); 566 } 567 else { 568 logger.debug("[Get HTMLDocument finish whith succes !!!!]"); 569 } 570 571 JDomHTMLTransformer jTransform = new JDomHTMLTransformer(this, jDoc, enableCSS_, true); 572 573 jDoc.setTransformer(jTransform); 575 576 return jDoc; 577 } 578 579 580 587 private HTMLDocument executeWhithHTMLParser(String html) throws Exception { 588 HTMLParserDocument doc = null; 590 591 doc = new HTMLParserDocument(uBean, html); 593 if (doc == null) { 594 logger.error("[Get HTMLDocument failed !!!!]"); 595 } 596 else { 597 logger.debug("[Get HTMLDocument finish whith succes !!!!]"); 598 } 599 600 HTMLParserTransformer transformer = new HTMLParserTransformer(this, doc, enableCSS_); 601 602 doc.setTransformer(transformer); 604 605 return doc; 606 } 607 608 609 610 616 private HTMLDocument executeWhithNekoParser(String html) { 617 DomHTMLDocument xDoc = null; 619 try { 620 HTMLParser parser = new DefaultHTMLParser(); 622 623 org.w3c.dom.Document w3cDoc = parser.parse(html); 625 626 if (w3cDoc == null) { 627 logger.error("[Parsing failed !!!!]"); 628 } 629 else { 630 logger.debug("[Parsing finish whith succes !!!!]"); 631 } 632 633 xDoc = new DomHTMLDocument(uBean, w3cDoc, html); 635 636 if (xDoc == null) { 637 logger.error("[Get HTMLDocument failed !!!!]"); 638 } 639 else { 640 logger.debug("[Get HTMLDocument finish whith succes !!!!]"); 641 } 642 643 xDoc.setTransformer(new DomHTMLTransformer(this, xDoc, enableCSS_)); 645 646 } 647 catch (IOException ex) { 648 logger.error("[ IOException " + ex.getMessage() + " ]"); 649 ex.printStackTrace(); 650 } 651 catch (SAXException ex) { 652 logger.error("[ SAXException " + ex.getMessage() + " ]"); 653 ex.printStackTrace(); 654 } 655 catch (Exception ex) { 656 logger.error("[ Exception " + ex.getMessage() + " ]"); 657 ex.printStackTrace(); 658 } 659 660 finally { 661 662 return xDoc; 663 } 664 } 665 666 } 667
| Popular Tags
|