1 28 29 package com.opencms.htmlconverter; 30 31 import org.opencms.i18n.CmsEncoder; 32 33 import java.io.BufferedInputStream ; 34 import java.io.ByteArrayInputStream ; 35 import java.io.ByteArrayOutputStream ; 36 import java.io.FileInputStream ; 37 import java.io.IOException ; 38 import java.io.InputStream ; 39 import java.io.OutputStream ; 40 import java.io.PrintWriter ; 41 import java.io.Reader ; 42 import java.io.StringReader ; 43 import java.io.StringWriter ; 44 import java.io.UnsupportedEncodingException ; 45 import java.io.Writer ; 46 import java.net.URL ; 47 import java.util.ArrayList ; 48 import java.util.StringTokenizer ; 49 import java.util.Vector ; 50 51 import org.w3c.dom.Document ; 52 import org.w3c.dom.NamedNodeMap ; 53 import org.w3c.dom.Node ; 54 import org.w3c.dom.NodeList ; 55 import org.w3c.tidy.Tidy; 56 57 66 public final class CmsHtmlConverter implements I_CmsHtmlConverterInterface { 67 68 69 private String m_tidyConfFile = ""; 70 71 private boolean m_tidyConfFileDefined; 72 73 private String m_converterConfFile = ""; 74 75 private boolean m_converterConfFileDefined; 76 77 private boolean m_converterConfigDefined; 78 79 private int m_numberReplaceTags; 80 81 private int m_numberReplaceBlocks; 82 83 private StringBuffer m_tempString; 84 85 private Tidy m_tidy = new Tidy(); 86 87 private CmsHtmlConverterTools m_tools = new CmsHtmlConverterTools(); 88 89 private CmsHtmlConverterConfig m_configuration = new CmsHtmlConverterConfig(); 90 91 private CmsHtmlConverterObjectReplaceTags m_tagObject = new CmsHtmlConverterObjectReplaceTags(); 92 93 private CmsHtmlConverterObjectReplaceBlocks m_blockObject = new CmsHtmlConverterObjectReplaceBlocks(); 94 private String m_servletPrefix; 96 private String m_relativeRoot; 98 private URL m_url; 100 101 private Vector m_enterTags = new Vector (); 102 103 106 public CmsHtmlConverter() { 107 108 m_tidy.setTidyMark(false); 109 m_tidy.setShowWarnings(false); 110 m_tidy.setQuiet(true); 111 initialiseTags(); 112 } 113 114 119 public CmsHtmlConverter(String tidyConfFileName) { 120 121 this.setTidyConfFile(tidyConfFileName); 122 initialiseTags(); 123 } 124 125 131 public CmsHtmlConverter(String tidyConfFileName, String confFile) { 132 133 this.setTidyConfFile(tidyConfFileName); 134 this.setConverterConfFile(confFile); 135 initialiseTags(); 136 } 137 138 141 private void initialiseTags() { 142 143 StringTokenizer T = new StringTokenizer ( 144 "p,table,tr,td,body,head,script,pre,title,style,h1,h2,h3,h4,h5,h6,ul,ol,li", 145 ","); 146 while (T.hasMoreTokens()) { 147 m_enterTags.addElement(new String (T.nextToken())); 148 } 149 } 150 151 157 public void setServletPrefix(String prefix, String relativeRoot) { 158 159 m_servletPrefix = prefix; 160 m_relativeRoot = relativeRoot; 161 } 162 163 168 public void setOriginalUrl(URL orgUrl) { 169 170 m_url = orgUrl; 171 } 172 173 178 public void setTidyConfFile(String fileName) { 179 180 m_tidyConfFile = fileName; 181 m_tidyConfFileDefined = true; 182 m_tidy.setConfigurationFromFile(m_tidyConfFile); 183 } 184 185 190 public String getTidyConfFile() { 191 192 if (m_tidyConfFileDefined) { 193 return m_tidyConfFile; 194 } else { 195 return ""; 196 } 197 } 198 199 204 public boolean tidyConfigured() { 205 206 return m_tidyConfFileDefined; 207 } 208 209 214 public void setConverterConfFile(String confFile) { 215 216 try { 217 InputStream in = new FileInputStream (confFile); 218 m_configuration.init(in); 219 } catch (IOException e) { 220 System.err.println("Configuration error: Configuration file no found!"); 221 return; 222 } 223 m_converterConfFileDefined = true; 224 m_converterConfigDefined = true; 225 m_numberReplaceTags = m_configuration.getReplaceTags().size(); 226 m_numberReplaceBlocks = m_configuration.getReplaceBlocks().size(); 227 } 228 229 234 public void setConverterConfString(String configuration) { 235 236 m_configuration.init(configuration); 237 m_converterConfFileDefined = false; 238 m_converterConfigDefined = true; 239 m_numberReplaceTags = m_configuration.getReplaceTags().size(); 240 m_numberReplaceBlocks = m_configuration.getReplaceBlocks().size(); 241 } 242 243 248 public String getConverterConfFile() { 249 250 if (m_converterConfFileDefined) { 251 return m_converterConfFile; 252 } else { 253 return ""; 254 } 255 } 256 257 262 public boolean converterConfigured() { 263 264 return m_converterConfigDefined; 265 } 266 267 273 public boolean hasErrors(String inString) { 274 275 InputStream in = new ByteArrayInputStream (inString.getBytes()); 276 return this.hasErrors(in); 277 } 278 279 285 public boolean hasErrors(InputStream input) { 286 287 288 m_tidy.setOnlyErrors(true); 289 m_tidy.setShowWarnings(false); 290 m_tidy.setQuiet(true); 291 m_tidy.setErrout(null); 292 293 m_tidy.parse(input, null); 294 295 return m_tidy.getParseErrors() != 0; 296 } 297 298 303 public int getNumberErrors() { 304 305 return m_tidy.getParseErrors(); 306 } 307 308 314 public String showErrors(String inString) { 315 316 InputStream in = new ByteArrayInputStream (inString.getBytes()); 317 OutputStream out = new ByteArrayOutputStream (); 318 this.showErrors(in, out); 319 return out.toString(); 320 } 321 322 328 public void showErrors(InputStream input, OutputStream output) { 329 330 331 m_tidy.setOnlyErrors(true); 332 m_tidy.setQuiet(true); 333 m_tidy.setShowWarnings(false); 334 InputStream in = new BufferedInputStream (input); 335 PrintWriter errorLog = new PrintWriter (output); 336 m_tidy.setErrout(errorLog); 337 m_tidy.parse(in, null); 338 if (m_tidy.getParseErrors() == 0) { 339 errorLog.println("HTML code ok!\nNo errors detected."); 340 } 341 errorLog.close(); 342 } 343 344 350 public String convertHTML(String inString) { 351 352 Reader in = new StringReader (inString); 353 Writer out = new StringWriter (); 354 this.convertHTML(in, out); 355 return out.toString(); 356 } 357 358 364 public void convertHTML(Reader input, Writer output) { 365 366 367 StringBuffer htmlString = new StringBuffer (); 368 Node node; 369 String outString = ""; 370 371 m_tidy.setShowWarnings(false); 372 m_tidy.setQuiet(true); 373 if (!m_tidyConfFileDefined) { 374 m_tidy.setOnlyErrors(false); 375 m_tidy.setTidyMark(false); 376 } 377 378 PrintWriter errorLog = new PrintWriter (new ByteArrayOutputStream (), true); 379 m_tidy.setErrout(errorLog); 380 try { 381 382 int c; 383 while ((c = input.read()) != -1) { 384 htmlString.append((char)c); 385 } 386 } catch (IOException e) { 387 System.err.println("Conversion error: " + e.toString()); 388 return; 389 } 390 outString = htmlString.toString(); 391 392 outString = m_tools.scanContent(outString, m_configuration.getReplaceContent()); 393 394 InputStream in; 395 try { 396 in = new ByteArrayInputStream (outString.getBytes(CmsEncoder.ENCODING_UTF_8)); 397 m_tidy.setOutputEncoding(CmsEncoder.ENCODING_UTF_8); 399 m_tidy.setInputEncoding(CmsEncoder.ENCODING_UTF_8); 400 } catch (UnsupportedEncodingException e) { 401 in = new ByteArrayInputStream (outString.getBytes()); 402 m_tidy.setOutputEncoding("LATIN1"); 404 m_tidy.setInputEncoding("LATIN1"); 405 } 406 node = m_tidy.parseDOM(in, null); 407 408 if (m_tidy.getParseErrors() != 0) { 409 System.err.println("Conversion error: HTML code has errors!"); 410 } 411 412 this.printDocument(node); 413 414 outString = m_tools.scanString(m_tempString.toString(), m_configuration.getReplaceStrings()); 415 outString = this.cleanOutput(outString); 416 try { 417 output.write(outString); 418 output.close(); 419 } catch (IOException e) { 420 System.err.println("Conversion error: " + e.toString()); 421 return; 422 } 423 } 424 425 430 private void printDocument(Node node) { 431 432 if (node == null) { 434 return; 435 } 436 int type = node.getNodeType(); 438 int replaceTag = -1; 439 int replaceBlock = -1; 440 switch (type) { 442 case Node.DOCUMENT_NODE: 443 m_tempString = new StringBuffer (m_configuration.getGlobalPrefix()); 445 this.printDocument(((Document )node).getDocumentElement()); 446 break; 447 case Node.ELEMENT_NODE: 448 replaceBlock = this.indexReplaceBlock(node); 450 replaceTag = this.indexReplaceTag(node); 451 if (this.transformStartElement(node, replaceBlock, replaceTag)) { 454 break; 455 } 456 NodeList children = node.getChildNodes(); 458 if (children != null) { 459 int len = children.getLength(); 460 for (int i = 0; i < len; i++) { 461 this.printDocument(children.item(i)); 463 } 464 } 465 break; 466 case Node.TEXT_NODE: 467 this.transformTextNode(node); 469 break; 470 default: 471 break; 473 } 474 switch (type) { 476 case Node.ELEMENT_NODE: 477 this.transformEndElement(node, replaceBlock, replaceTag); 479 break; 480 case Node.DOCUMENT_NODE: 481 this.transformEndDocument(); 483 break; 484 default: 485 break; 487 } 488 } 489 490 498 private boolean transformStartElement(Node node, int replaceBlock, int replaceTag) { 499 500 String tempReplaceString, valueParam; 501 502 if (m_tools.checkTag(node.getNodeName(), m_configuration.getRemoveBlocks())) { 503 return true; 504 } 505 506 if (!m_tools.checkTag(node.getNodeName(), m_configuration.getRemoveTags())) { 507 508 if (replaceBlock != -1) { 509 m_blockObject = (CmsHtmlConverterObjectReplaceBlocks)m_configuration.getReplaceBlocks().get( 510 replaceBlock); 511 513 tempReplaceString = m_blockObject.getReplaceString(); 514 if (!m_blockObject.getParameter().equals("")) { 515 valueParam = m_tools.scanNodeAttrs(node, m_blockObject.getParameter()); 516 tempReplaceString = m_tools.replaceString(tempReplaceString, "$parameter$", valueParam); 517 } 518 m_tempString.append(tempReplaceString); 519 520 if (replaceBlock > (m_numberReplaceBlocks - 1)) { 521 m_configuration.removeObjectReplaceBlock(replaceBlock); 522 } 523 524 return true; 525 } else { 526 527 if (replaceTag != -1) { 528 m_tagObject = (CmsHtmlConverterObjectReplaceTags)m_configuration.getReplaceTags().get(replaceTag); 529 tempReplaceString = m_tagObject.getReplaceStartTag(); 530 532 if (!m_tagObject.getParameter().equals("")) { 533 valueParam = m_tools.scanNodeAttrs(node, m_tagObject.getParameter()); 534 if (m_tagObject.getReplaceParamAttr()) { 536 if (!m_tools.shouldReplaceUrl(m_url, valueParam, m_servletPrefix)) { 537 tempReplaceString = "$parameter$"; 538 } else { 539 valueParam = m_tools.modifyParameter(m_url, valueParam, m_servletPrefix, m_relativeRoot); 540 } 541 tempReplaceString = m_tools.reconstructTag( 542 tempReplaceString, 543 node, 544 m_tagObject.getParameter(), 545 m_configuration.getQuotationmark()); 546 } 547 tempReplaceString = m_tools.replaceString(tempReplaceString, "$parameter$", valueParam); 548 } 549 m_tempString.append(tempReplaceString); 550 } else { 551 552 m_tempString.append("<"); 553 m_tempString.append(node.getNodeName()); 554 NamedNodeMap attrs = node.getAttributes(); 555 for (int i = attrs.getLength() - 1; i >= 0; i--) { 556 m_tempString.append(" " 557 + attrs.item(i).getNodeName() 558 + "=" 559 + m_configuration.getQuotationmark()); 560 561 String helpString = attrs.item(i).getNodeValue(); 562 helpString = m_tools.scanString(helpString, m_configuration.getReplaceStrings()); 563 m_tempString.append(helpString + m_configuration.getQuotationmark()); 564 } 565 if (m_configuration.getXhtmlOutput() 566 && m_tools.checkTag(node.getNodeName(), m_configuration.getInlineTags())) { 567 m_tempString.append("/"); 568 } 569 m_tempString.append(">"); 570 } 571 } 572 } 573 return false; 574 } 575 576 583 private void transformEndElement(Node node, int replaceBlock, int replaceTag) { 584 585 586 if (!m_tools.checkTag(node.getNodeName(), m_configuration.getRemoveBlocks())) { 587 588 if (!m_tools.checkTag(node.getNodeName(), m_configuration.getRemoveTags())) { 589 590 if (replaceBlock == -1) { 591 592 if (replaceTag != -1) { 593 m_tagObject = (CmsHtmlConverterObjectReplaceTags)m_configuration.getReplaceTags().get( 594 replaceTag); 595 if (!m_tagObject.getInline()) { 596 String tempReplaceString = m_tagObject.getReplaceEndTag(); 597 599 if (!m_tagObject.getParameter().equals("")) { 600 String valueParam = m_tools.scanNodeAttrs(node, m_tagObject.getParameter()); 601 tempReplaceString = m_tools.replaceString(tempReplaceString, "$parameter$", valueParam); 602 } 603 m_tempString.append(tempReplaceString); 604 } 605 606 if (replaceTag > (m_numberReplaceTags - 1)) { 607 m_configuration.removeObjectReplaceTag(replaceTag); 608 } 609 } else { 610 611 if (!m_tools.checkTag(node.getNodeName(), m_configuration.getInlineTags())) { 612 m_tempString.append("</"); 613 m_tempString.append(node.getNodeName()); 614 m_tempString.append(">"); 615 if (m_configuration.getGlobalAddEveryLine()) { 617 boolean added = false; 619 for (int i = 0; i < m_enterTags.size(); i++) { 620 if (!added && node.getNodeName().equalsIgnoreCase((String )m_enterTags.elementAt(i))) { 621 m_tempString.append(m_configuration.getGlobalSuffix() 622 + "\n" 623 + m_configuration.getGlobalPrefix()); 624 added = true; 625 } 626 if (added) { 628 return; 629 } 630 } 631 } else { 632 boolean added = false; 634 for (int i = 0; i < m_enterTags.size(); i++) { 635 if (!added && node.getNodeName().equalsIgnoreCase((String )m_enterTags.elementAt(i))) { 636 m_tempString.append("\n"); 637 added = true; 638 } 639 if (added) { 641 return; 642 } 643 } 644 } 645 } 646 } 647 } 648 } } } 651 652 655 private void transformEndDocument() { 656 657 m_tempString.append(m_configuration.getGlobalSuffix()); 658 } 659 660 665 private void transformTextNode(Node node) { 666 667 String helpString = node.getNodeValue(); 668 669 if (!node.getParentNode().getNodeName().equalsIgnoreCase("script") 670 && !node.getParentNode().getNodeName().equalsIgnoreCase("style")) { 671 helpString = m_tools.scanChar(helpString, m_configuration.getReplaceExtendedChars()); 672 } 673 674 if (m_configuration.getEncodeQuotationmarks()) { 675 helpString = m_tools.replaceString(helpString, "\"", m_configuration.getQuotationmark()); 676 } 677 678 if (m_configuration.getGlobalAddEveryLine()) { 679 helpString = m_tools.replaceString( 680 helpString, 681 "\n", 682 (m_configuration.getGlobalSuffix() + "\n" + m_configuration.getGlobalPrefix())); 683 } 684 m_tempString.append(helpString); 685 } 686 687 693 private String cleanOutput(String cleanString) { 694 695 if (m_configuration.getGlobalAddEveryLine()) { 696 cleanString += "\n"; 697 String cutString = m_configuration.getGlobalPrefix() + m_configuration.getGlobalSuffix() + "\n"; 698 699 if (!m_configuration.getGlobalPrefix().equals("") && !m_configuration.getGlobalSuffix().equals("")) { 700 cleanString = m_tools.replaceString(cleanString, cutString, ""); 701 } 702 } 703 return cleanString; 704 } 705 706 713 private int indexReplaceTag(Node node) { 714 715 ArrayList replaceTags = m_configuration.getReplaceTags(); 716 NamedNodeMap attrs = node.getAttributes(); 717 CmsHtmlConverterObjectReplaceTags testObject = new CmsHtmlConverterObjectReplaceTags(); 718 for (int index = 0; index < replaceTags.size(); index++) { 719 testObject = (CmsHtmlConverterObjectReplaceTags)(replaceTags.get(index)); 720 if (node.getNodeName().equals(testObject.getTagName()) || "*".equals(testObject.getTagName())) { 722 723 if (testObject.getTagAttrib().equals("")) { 724 725 if (testObject.getReplaceFromAttrs()) { 726 return scanTagElementAttrs(node, testObject); 727 } 728 return index; 729 } 730 for (int i = attrs.getLength() - 1; i >= 0; i--) { 731 if (attrs.item(i).getNodeName().equals(testObject.getTagAttrib()) 732 && (attrs.item(i).getNodeValue().equals(testObject.getTagAttribValue()) || testObject.getTagAttribValue().equals( 733 ""))) { 734 735 if (testObject.getReplaceFromAttrs()) { 736 return scanTagElementAttrs(node, testObject); 737 } 738 return index; 739 } 740 } 741 } 742 } 743 return -1; 744 } 745 746 753 private int scanTagElementAttrs(Node node, CmsHtmlConverterObjectReplaceTags testObject) { 754 755 NamedNodeMap attrs = node.getAttributes(); 756 String prefix = testObject.getPrefix(); 757 String suffix = testObject.getSuffix(); 758 String name = testObject.getTagName(); 759 String attrib = testObject.getTagAttrib(); 760 String attrValue = testObject.getTagAttribValue(); 761 String startAttribute = testObject.getStartAttribute(); 762 String endAttribute = testObject.getEndAttribute(); 763 String replaceStartTag = ""; 764 String replaceEndTag = ""; 765 String parameter = testObject.getParameter(); 766 String attrName = ""; 767 boolean replaceParamAttr = testObject.getReplaceParamAttr(); 768 769 for (int i = 0; i < attrs.getLength(); i++) { 770 attrName = attrs.item(i).getNodeName(); 771 if (attrName.equalsIgnoreCase(startAttribute)) { 772 replaceStartTag = attrs.item(i).getNodeValue(); 773 } 774 if (attrName.equalsIgnoreCase(endAttribute)) { 775 replaceEndTag = attrs.item(i).getNodeValue(); 776 } 777 } 778 779 if (m_configuration.getUseBrackets()) { 780 replaceStartTag = m_configuration.scanBrackets(replaceStartTag); 781 replaceEndTag = m_configuration.scanBrackets(replaceEndTag); 782 } 783 784 m_configuration.addObjectReplaceTag( 785 prefix, 786 name, 787 attrib, 788 attrValue, 789 replaceStartTag, 790 replaceEndTag, 791 suffix, 792 false, 793 "", 794 "", 795 parameter, 796 replaceParamAttr); 797 return m_configuration.getReplaceTags().size() - 1; 798 } 799 800 807 private int indexReplaceBlock(Node node) { 808 809 ArrayList replaceBlocks = m_configuration.getReplaceBlocks(); 810 NamedNodeMap attrs = node.getAttributes(); 811 CmsHtmlConverterObjectReplaceBlocks testObject = new CmsHtmlConverterObjectReplaceBlocks(); 812 for (int index = 0; index < replaceBlocks.size(); index++) { 813 testObject = (CmsHtmlConverterObjectReplaceBlocks)(replaceBlocks.get(index)); 814 if (node.getNodeName().equals(testObject.getTagName()) || "*".equals(testObject.getTagName())) { 816 if (testObject.getTagAttrib().equals("")) { 817 818 if (testObject.getReplaceFromAttrs()) { 819 return scanBlockElementAttrs(node, testObject); 820 } 821 return index; 822 } 823 for (int i = (attrs.getLength() - 1); i >= 0; i--) { 824 if (attrs.item(i).getNodeName().equals(testObject.getTagAttrib()) 825 && (attrs.item(i).getNodeValue().equals(testObject.getTagAttribValue()) || testObject.getTagAttribValue().equals( 826 ""))) { 827 828 if (testObject.getReplaceFromAttrs()) { 829 return scanBlockElementAttrs(node, testObject); 830 } 831 return index; 832 } 833 } 834 } 835 } 836 return -1; 837 } 838 839 846 private int scanBlockElementAttrs(Node node, CmsHtmlConverterObjectReplaceBlocks testObject) { 847 848 NamedNodeMap attrs = node.getAttributes(); 849 String prefix = testObject.getPrefix(); 850 String suffix = testObject.getSuffix(); 851 String name = testObject.getTagName(); 852 String attrib = testObject.getTagAttrib(); 853 String attrValue = testObject.getTagAttribValue(); 854 String replaceString = ""; 855 String replaceAttribute = testObject.getReplaceAttribute(); 856 String attrName = ""; 857 String parameter = testObject.getParameter(); 858 859 for (int i = 0; i < attrs.getLength(); i++) { 860 attrName = attrs.item(i).getNodeName(); 861 if (attrName.equalsIgnoreCase(replaceAttribute)) { 862 replaceString = attrs.item(i).getNodeValue(); 863 } 864 } 865 866 if (m_configuration.getUseBrackets()) { 867 replaceString = m_configuration.scanBrackets(replaceString); 868 } 869 870 m_configuration.addObjectReplaceBlock( 871 prefix, 872 name, 873 attrib, 874 attrValue, 875 replaceString, 876 suffix, 877 false, 878 "", 879 parameter); 880 return m_configuration.getReplaceBlocks().size() - 1; 881 } 882 } | Popular Tags |