1 package org.jahia.clipbuilder.html.web.html.Impl.HTMLParser; 2 3 import java.util.*; 4 5 import javax.swing.text.html.*; 6 7 import org.jahia.clipbuilder.html.util.*; 8 import org.jahia.clipbuilder.html.web.Constant.*; 9 import org.htmlparser.*; 10 import org.htmlparser.Tag; 11 import org.htmlparser.nodes.*; 12 import org.htmlparser.tags.*; 13 import org.htmlparser.util.*; 14 import org.htmlparser.visitors.*; 15 import org.jahia.clipbuilder.html.struts.Util.Constants; 16 import org.htmlparser.scanners.*; 17 22 class TransformBuilderVisitor extends NodeVisitor { 23 26 protected int nbLinks = 0; 27 30 protected String formParentName; 31 34 protected String formParentId; 35 38 protected int formParentPosition = -1; 39 40 43 protected List framesList = new ArrayList(); 44 47 private Node firstNode; 48 private HTMLParserTransformer transformer; 49 50 53 public static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(TransformBuilderVisitor.class); 54 55 56 61 public TransformBuilderVisitor(HTMLParserTransformer transformer) { 62 this.transformer = transformer; 63 ScriptScanner.STRICT = false; 64 } 65 66 67 72 public void setFirstNode(Node firstNode) { 73 this.firstNode = firstNode; 74 } 75 76 77 82 public Node getFirstNode() { 83 return firstNode; 84 } 85 86 87 92 public void visitTag(Tag tag) { 93 if (tag instanceof Html) { 94 setFirstNode(tag); 95 96 } 97 98 if (tag instanceof BaseHrefTag) { 99 processBaseHrefTag((BaseHrefTag) tag); 100 } 101 processTargetAtt(tag); 103 104 processStyleTag(tag); 106 107 if (transformer.isRemoveBodyScript() || transformer.isRemoveHeadScriptTag()) { 109 processEventHandlerAtt(tag); 110 } 111 if (tag.getAttribute("href") != null) { 113 try { 114 nbLinks = processHrefAttribute(tag, nbLinks, Constants.WEB_BROWSER_SHOW_BROWSE); 115 logger.debug("Nb link = " + nbLinks); 116 } 117 catch (WebClippingException ex) { 118 ex.printStackTrace(); 119 transformer.addParsingErrors(ex.getMessage()); 120 } 121 } 122 123 if (tag.getAttribute("src") != null) { 125 try { 126 processSrcAttribute(tag); 127 } 128 catch (WebClippingException ex) { 129 ex.printStackTrace(); 130 131 transformer.addParsingErrors(ex.getMessage()); 132 } 133 } 134 135 if (tag instanceof FrameTag) { 137 processFrameTag((FrameTag) tag); 138 } 139 140 if (tag instanceof MetaTag) { 141 if (transformer.isRemoveMetaTag()) { 142 tag.removeAttribute(HTML.Attribute.HTTPEQUIV.toString()); 143 } 144 try { 145 processMetaTag((MetaTag) tag, Constants.WEB_BROWSER_SHOW_BROWSE); 146 } 147 catch (WebClippingException ex) { 148 ex.printStackTrace(); 149 150 transformer.addParsingErrors(ex.getMessage()); 151 } 152 } 153 154 if (tag instanceof BodyTag) { 156 165 } 166 if (tag instanceof HeadTag) { 168 } 170 171 else if (tag instanceof TitleTag) { 173 } 175 176 else if (tag instanceof FormTag) { 178 try { 179 processFormTag((FormTag) tag, Constants.WEB_BROWSER_SHOW_BROWSE); 180 181 } 182 catch (WebClippingException ex) { 183 ex.printStackTrace(); 184 transformer.addParsingErrors(ex.getMessage()); 185 } 186 } 187 188 else if (tag instanceof InputTag) { 190 try { 191 processInputTag((InputTag) tag); 192 } 193 catch (WebClippingException ex) { 194 ex.printStackTrace(); 195 transformer.addParsingErrors(ex.getMessage()); 196 } 197 } 198 else if (tag instanceof SelectTag) { 199 processSelectTag((SelectTag) tag); 200 } 201 202 else if (tag instanceof StyleTag) { 204 213 } 214 215 else if (tag instanceof ScriptTag) { 217 logger.debug("[ Process script tag ]"); 218 if (transformer.isRemoveBodyScript() || transformer.isRemoveHeadScriptTag()) { 219 tag.setChildren(new NodeList()); 220 tag.removeAttribute("src"); 221 } 222 try { 223 processScriptTag((ScriptTag) tag); 224 } 225 catch (WebClippingException ex) { 226 transformer.addParsingErrors(ex.getMessage()); 227 logger.error("Rewrite javascript error"); 228 ex.printStackTrace(); 229 } 230 } 231 232 } 233 234 235 236 241 protected HTMLParserTransformer getTransformer() { 242 return transformer; 243 } 244 245 246 251 protected void processTargetAtt(Tag tag) { 252 tag.removeAttribute("target"); 253 } 254 255 256 257 258 263 protected void processBaseHrefTag(BaseHrefTag tag) { 264 tag.removeAttribute("href"); 265 } 266 267 268 273 protected void processFrameTag(FrameTag tag) { 274 String name = tag.getAttribute("name"); 275 String id = tag.getAttribute("id"); 276 String src = tag.getAttribute("src"); 277 transformer.addFrame(framesList, name, id, src); 278 transformer.getDocument().setFramesList(framesList); 279 } 280 281 282 283 289 protected void processLinkTag(Tag tag) throws WebClippingException { 290 if (transformer.isEnableCSS()) { 291 try { 292 String media = tag.getAttribute("media"); 293 String href = tag.getAttribute("href"); 295 tag = new StyleTag(); 297 StyleTag endTag = new StyleTag(); 298 endTag.setTagName("/style"); 299 tag.setEndTag(endTag); 300 301 String content = "<!--" + transformer.getImportCssContent(href, media) + "-->"; 303 TextNode t = new TextNode(content); 304 NodeList children = new NodeList(); 305 children.add(t); 306 tag.setChildren(children); 307 308 311 312 } 313 catch (ParserException ex) { 314 ex.printStackTrace(); 315 throw new WebClippingException("Link", ex); 316 } 317 318 catch (Exception ex) { 319 ex.printStackTrace(); 320 throw new WebClippingException("Link", ex); 321 } 322 finally { 323 return; 324 } 325 326 } 327 else { 329 logger.debug("[ CSS removed ]"); 330 return; 331 } 332 333 } 334 335 336 342 protected void processStyleTag(StyleTag tag) throws WebClippingException { 343 344 try { 345 346 String content = transformer.getInlineCssContent(tag.toPlainTextString()); 348 349 logger.debug("[ Process Inline css ]"); 351 Hashtable atts = tag.getAttributes(); 352 tag = new StyleTag(); 353 tag.setAttributes(atts); 354 StyleTag endTag = new StyleTag(); 355 endTag.setTagName("/style"); 356 tag.setEndTag(endTag); 357 358 TextNode t = new TextNode("<!--" + content + "-->"); 359 NodeList children = new NodeList(); 360 children.add(t); 361 tag.setChildren(children); 362 363 } 364 catch (WebClippingException ex) { 365 throw ex; 366 } 367 catch (Exception ex) { 368 throw new WebClippingException("Style", ex); 369 } 370 finally { 371 return; 372 } 373 374 } 375 376 377 378 385 protected void processFormTag(FormTag tag, String webBrowserAction) throws WebClippingException { 386 387 try { 388 formParentPosition++; 390 391 tag = (FormTag) tag.clone(); 393 String url = tag.getFormLocation(); 395 formParentName = tag.getFormName(); 396 formParentId = tag.getAttribute("id"); 397 398 if (formParentId == null) { 399 formParentId = ""; 400 } 401 if (formParentName == null) { 402 formParentName = ""; 403 } 404 405 String method = tag.getFormMethod(); 406 if (method == null) { 407 method = "GET"; 408 } 409 String enctype = tag.getAttribute("enctype"); 410 411 tag.setFormLocation(transformer.getRewritedActionValue(url, method, formParentName, formParentId, enctype, formParentPosition, webBrowserAction)); 412 413 tag.setAttribute("method", "post"); 415 416 tag.removeAttribute("target"); 418 tag.removeAttribute("enctype"); 420 421 transformer.resetInputHash(); 422 } 423 catch (Exception ex) { 424 throw new WebClippingException("Form tag", ex); 425 } 426 finally { 427 return; 428 } 429 430 } 431 432 433 434 440 protected void processHtmlTag(Html tag) throws WebClippingException { 441 try { 442 tag.setTagName(HTML.Tag.DIV.toString()); 443 Div end = new Div(); 444 end.setTagName("/div"); 445 tag.setEndTag(end); 446 String classValue = CssUtilities.HTML_CLASS; 447 tag.setAttribute(HTML.Attribute.CLASS.toString(), classValue); 448 449 } 450 catch (Exception ex) { 451 throw new WebClippingException("Html tag", ex); 452 } 453 finally { 454 return; 455 } 456 457 } 458 459 460 466 protected void processTitleTag(TitleTag tag) throws WebClippingException { 467 try { 468 474 } 475 catch (Exception ex) { 476 throw new WebClippingException("Title tag", ex); 477 } 478 finally { 479 return; 480 } 481 } 482 483 484 487 503 514 protected int processHrefAttribute(Tag tag, int position, String webBrowserAction) throws WebClippingException { 515 logger.debug("[ process href Attribute ]"); 516 try { 517 String href = tag.getAttribute("href"); 518 logger.debug("Found href value: " + href); 519 String id = tag.getAttribute("id"); 522 String pos = "" + position; 523 if (tag.getTagName().equalsIgnoreCase("a")) { 524 if (id == null) { 525 tag.setAttribute("id", HashUtilities.buildLinkHash(href, id, pos)); 526 } 527 tag.removeAttribute("target"); 529 position++; 530 } 531 532 534 href = transformer.getRewritedHrefValue(tag.getTagName(), href, id, pos, webBrowserAction); 535 tag.setAttribute("href", href); 536 537 logger.debug("Href rewrited"); 538 } 539 catch (Exception ex) { 540 logger.error("Error has occured whith href"); 541 ex.printStackTrace(); 542 throw new WebClippingException("Href attribute", ex); 543 } 544 finally { 545 return position; 546 } 547 548 } 549 550 551 557 protected void processSrcAttribute(Tag tag) throws WebClippingException { 558 logger.debug("[ Process scr attribute ]"); 559 try { 560 561 String src = tag.getAttribute("src"); 562 logger.debug("Found src: " + src); 563 tag.setAttribute("src", transformer.relatifToAbsolute(src)); 564 } 565 catch (Exception ex) { 566 throw new WebClippingException("Src Attribute", ex); 567 } 568 finally { 569 return; 570 } 571 572 } 573 574 575 581 protected void processScriptTag(ScriptTag tag) throws WebClippingException { 582 try { 583 String src = tag.getAttribute(HTML.Attribute.SRC.toString()); 585 if (src != null && !src.equals("")) { 586 String content = transformer.getImportJavascriptContent(src, Constants.WEB_BROWSER_SHOW_BROWSE); 588 590 592 599 } 600 else { 601 603 String content = tag.getScriptCode(); 604 if (content != null) { 605 Hashtable atts = tag.getAttributes(); 606 tag = new ScriptTag(); 607 tag.setAttributes(atts); 608 StyleTag endTag = new StyleTag(); 609 endTag.setTagName("/script"); 610 tag.setEndTag(endTag); 611 String baseUrl = transformer.getUrlBean().getAbsoluteUrlValue(); 612 String refactoredJavascript = transformer.getRefactoredJavascript(baseUrl, Constants.WEB_BROWSER_SHOW_BROWSE, content); 613 refactoredJavascript = ""; 614 TextNode t = new TextNode(refactoredJavascript); 615 NodeList children = new NodeList(); 616 children.add(t); 617 tag.setChildren(children); 618 } 619 620 } 621 622 } 623 catch (Exception ex) { 624 throw new WebClippingException("Script", ex); 625 } 626 finally { 627 return; 628 } 629 630 } 631 632 633 640 protected void processMetaTag(MetaTag tag, String webBrowserAction) throws WebClippingException { 641 logger.debug("[process meta tag]"); 642 try { 643 String httpEquiv = tag.getAttribute(HTML.Attribute.HTTPEQUIV.toString()); 645 if (httpEquiv != null) { 646 if ("refresh".equals(httpEquiv.toLowerCase())) { 648 String content = tag.getAttribute(HTML.Attribute.CONTENT.toString()); 649 int urlIndex = content.toLowerCase().indexOf("url"); 651 if (urlIndex != -1) { 652 String contentBeforeURL = content.substring(0, urlIndex).trim(); 654 logger.debug("[ Content before url " + contentBeforeURL + " ]"); 655 656 int equalIndex = urlIndex + 3; 658 String contentURL = content.substring(equalIndex + 1).trim(); 659 logger.debug("[ BEFORE meta http-equiv=[" + httpEquiv + "] content=[" + content + "] ]"); 660 logger.debug("[ Refresh url is [" + contentURL + " ] ]"); 661 String encodedUrl = transformer.getRewritedHrefValue("meta", contentURL, "metaId", "metaLink", webBrowserAction); 663 664 tag.setAttribute(HTML.Attribute.CONTENT.toString(), contentBeforeURL + "url=" + encodedUrl); 666 logger.debug("[ AFTER meta http-equiv=[" + httpEquiv + "] content=[" + contentBeforeURL + "url=" + encodedUrl + "] ]+"); 667 } 668 } 669 } 670 671 672 } 673 catch (Exception ex) { 674 ex.printStackTrace(); 675 throw new WebClippingException("Meta", ex); 676 } 677 finally { 678 return; 679 } 680 681 } 682 683 684 689 private void processStyleTag(Tag tag) { 690 String name = tag.getTagName(); 692 logger.debug(" Tag name: " + name); 693 try { 694 String style = tag.getAttribute("style"); 695 if (style != null && !style.equalsIgnoreCase("")) { 696 style = transformer.refactorCssStyleRule(style); 697 tag.setAttribute("style", style); 698 } 699 } 700 catch (Exception ex) { 701 ex.printStackTrace(); 702 logger.error("Rewrite style att error"); 703 704 } 705 } 706 707 708 713 private void processEventHandlerAtt(Tag tag) { 714 for (int i = 0; i < WebConstants.JAVASCRIPT_EVENT_NAMES_ARRAY.length; i++) { 715 String name = WebConstants.JAVASCRIPT_EVENT_NAMES_ARRAY[i]; 716 String value = tag.getAttribute(name); 717 if (value != null) { 718 boolean refactor = transformer.isRefactorJavascriptEvent(); 719 if (refactor) { 720 String url = transformer.getDocument().getUrlBean().getRedirectUrl().toExternalForm(); 721 String newValue = transformer.getRefactoredJavascript(url, Constants.WEB_BROWSER_SHOW_BROWSE, value); 722 tag.setAttribute(name, newValue); 723 } 724 else { 725 boolean remove = transformer.isRemoveJavascriptEvent(); 726 if (remove) { 727 tag.removeAttribute(name); 728 } 729 } 730 } 731 } 732 } 733 734 735 740 private void processSelectTag(SelectTag tag) { 741 String name = tag.getAttribute("name"); 742 if (name == null) { 743 name = ""; 744 } 745 String type = WebConstants.TYPE_SELECT; 746 int paramPosition = transformer.getPosAndUpdateInputHash(name); 747 String visibility = "true"; 748 NodeList it = tag.getChildren(); 749 if (it != null) { 750 for (int i = 0; i < it.size(); i++) { 751 Node childNode = it.elementAt(i); 752 if (childNode instanceof OptionTag) { 753 OptionTag op = (OptionTag) childNode; 754 String possibleValue = op.getValue(); 755 756 try { 757 transformer.recordFormParam(type, 758 formParentName, formParentId, 759 formParentPosition, name, possibleValue, 760 visibility, paramPosition); 761 } 762 catch (Exception ex) { 763 ex.printStackTrace(); 764 } 765 } 766 } 767 } 768 } 769 770 771 777 private void processInputTag(InputTag tag) throws WebClippingException { 778 try { 779 780 String type = tag.getAttribute(HTML.Attribute.TYPE.toString()); 781 String visibility = Boolean.TRUE.toString(); 783 String name = transformer.notNullValueForType(tag.getAttribute(HTML.Attribute.NAME.toString())); 785 int paramPosition = transformer.getPosAndUpdateInputHash(name); 786 String possibleValue = transformer.notNullValueForType(tag.getAttribute(HTML.Attribute.VALUE.toString())); 787 type = transformer.notNullValueForType(type); 788 transformer.recordFormParam(type, formParentName, formParentId, formParentPosition, name, possibleValue, visibility, paramPosition); 789 790 paramPosition++; 792 } 793 catch (Exception ex) { 794 throw new WebClippingException("Parser, Input Element", ex); 795 } 796 finally { 797 return; 798 } 799 800 } 801 802 } 803 | Popular Tags |