1 7 8 package org.dom4j.io; 9 10 import java.io.IOException ; 11 import java.io.OutputStream ; 12 import java.io.StringWriter ; 13 import java.io.UnsupportedEncodingException ; 14 import java.io.Writer ; 15 import java.util.HashSet ; 16 import java.util.Iterator ; 17 import java.util.Set ; 18 import java.util.Stack ; 19 20 import org.dom4j.Document; 21 import org.dom4j.DocumentHelper; 22 import org.dom4j.Element; 23 import org.dom4j.Entity; 24 import org.dom4j.Node; 25 26 import org.xml.sax.SAXException ; 27 28 181 public class HTMLWriter extends XMLWriter { 182 private static String lineSeparator = System.getProperty("line.separator"); 183 184 protected static final HashSet DEFAULT_PREFORMATTED_TAGS; 185 186 static { 187 DEFAULT_PREFORMATTED_TAGS = new HashSet (); 190 DEFAULT_PREFORMATTED_TAGS.add("PRE"); 191 DEFAULT_PREFORMATTED_TAGS.add("SCRIPT"); 192 DEFAULT_PREFORMATTED_TAGS.add("STYLE"); 193 DEFAULT_PREFORMATTED_TAGS.add("TEXTAREA"); 194 } 195 196 protected static final OutputFormat DEFAULT_HTML_FORMAT; 197 198 static { 199 DEFAULT_HTML_FORMAT = new OutputFormat(" ", true); 200 DEFAULT_HTML_FORMAT.setTrimText(true); 201 DEFAULT_HTML_FORMAT.setSuppressDeclaration(true); 202 } 203 204 private Stack formatStack = new Stack (); 205 206 private String lastText = ""; 207 208 private int tagsOuput = 0; 209 210 private int newLineAfterNTags = -1; 212 213 private HashSet preformattedTags = DEFAULT_PREFORMATTED_TAGS; 214 215 219 private HashSet omitElementCloseSet; 220 221 public HTMLWriter(Writer writer) { 222 super(writer, DEFAULT_HTML_FORMAT); 223 } 224 225 public HTMLWriter(Writer writer, OutputFormat format) { 226 super(writer, format); 227 } 228 229 public HTMLWriter() throws UnsupportedEncodingException { 230 super(DEFAULT_HTML_FORMAT); 231 } 232 233 public HTMLWriter(OutputFormat format) throws UnsupportedEncodingException { 234 super(format); 235 } 236 237 public HTMLWriter(OutputStream out) throws UnsupportedEncodingException { 238 super(out, DEFAULT_HTML_FORMAT); 239 } 240 241 public HTMLWriter(OutputStream out, OutputFormat format) 242 throws UnsupportedEncodingException { 243 super(out, format); 244 } 245 246 public void startCDATA() throws SAXException { 247 } 248 249 public void endCDATA() throws SAXException { 250 } 251 252 protected void writeCDATA(String text) throws IOException { 255 if (getOutputFormat().isXHTML()) { 258 super.writeCDATA(text); 259 } else { 260 writer.write(text); 261 } 262 263 lastOutputNodeType = Node.CDATA_SECTION_NODE; 264 } 265 266 protected void writeEntity(Entity entity) throws IOException { 267 writer.write(entity.getText()); 268 lastOutputNodeType = Node.ENTITY_REFERENCE_NODE; 269 } 270 271 protected void writeDeclaration() throws IOException { 272 } 273 274 protected void writeString(String text) throws IOException { 275 287 if (text.equals("\n")) { 288 if (!formatStack.empty()) { 289 super.writeString(lineSeparator); 290 } 291 292 return; 293 } 294 295 lastText = text; 296 297 if (formatStack.empty()) { 298 super.writeString(text.trim()); 299 } else { 300 super.writeString(text); 301 } 302 } 303 304 314 protected void writeClose(String qualifiedName) throws IOException { 315 if (!omitElementClose(qualifiedName)) { 316 super.writeClose(qualifiedName); 317 } 318 } 319 320 protected void writeEmptyElementClose(String qualifiedName) 321 throws IOException { 322 if (getOutputFormat().isXHTML()) { 323 if (omitElementClose(qualifiedName)) { 325 writer.write(" />"); 331 } else { 332 super.writeEmptyElementClose(qualifiedName); 333 } 334 } else { 335 if (omitElementClose(qualifiedName)) { 337 writer.write(">"); 339 } else { 340 super.writeEmptyElementClose(qualifiedName); 343 } 344 } 345 } 346 347 protected boolean omitElementClose(String qualifiedName) { 348 return internalGetOmitElementCloseSet().contains( 349 qualifiedName.toUpperCase()); 350 } 351 352 private HashSet internalGetOmitElementCloseSet() { 353 if (omitElementCloseSet == null) { 354 omitElementCloseSet = new HashSet (); 355 loadOmitElementCloseSet(omitElementCloseSet); 356 } 357 358 return omitElementCloseSet; 359 } 360 361 protected void loadOmitElementCloseSet(Set set) { 363 set.add("AREA"); 364 set.add("BASE"); 365 set.add("BR"); 366 set.add("COL"); 367 set.add("HR"); 368 set.add("IMG"); 369 set.add("INPUT"); 370 set.add("LINK"); 371 set.add("META"); 372 set.add("P"); 373 set.add("PARAM"); 374 } 375 376 378 385 public Set getOmitElementCloseSet() { 386 return (Set ) (internalGetOmitElementCloseSet().clone()); 387 } 388 389 405 public void setOmitElementCloseSet(Set newSet) { 406 omitElementCloseSet = new HashSet (); 408 409 if (newSet != null) { 410 omitElementCloseSet = new HashSet (); 411 412 Object aTag; 413 Iterator iter = newSet.iterator(); 414 415 while (iter.hasNext()) { 416 aTag = iter.next(); 417 418 if (aTag != null) { 419 omitElementCloseSet.add(aTag.toString().toUpperCase()); 420 } 421 } 422 } 423 } 424 425 428 public Set getPreformattedTags() { 429 return (Set ) (preformattedTags.clone()); 430 } 431 432 529 public void setPreformattedTags(Set newSet) { 530 preformattedTags = new HashSet (); 535 536 if (newSet != null) { 537 Object aTag; 538 Iterator iter = newSet.iterator(); 539 540 while (iter.hasNext()) { 541 aTag = iter.next(); 542 543 if (aTag != null) { 544 preformattedTags.add(aTag.toString().toUpperCase()); 545 } 546 } 547 } 548 } 549 550 562 public boolean isPreformattedTag(String qualifiedName) { 563 return (preformattedTags != null) 566 && (preformattedTags.contains(qualifiedName.toUpperCase())); 567 } 568 569 583 protected void writeElement(Element element) throws IOException { 584 if (newLineAfterNTags == -1) { lazyInitNewLinesAfterNTags(); 586 } 587 588 if (newLineAfterNTags > 0) { 589 if ((tagsOuput > 0) && ((tagsOuput % newLineAfterNTags) == 0)) { 590 super.writer.write(lineSeparator); 591 } 592 } 593 594 tagsOuput++; 595 596 String qualifiedName = element.getQualifiedName(); 597 String saveLastText = lastText; 598 int size = element.nodeCount(); 599 600 if (isPreformattedTag(qualifiedName)) { 601 OutputFormat currentFormat = getOutputFormat(); 602 boolean saveNewlines = currentFormat.isNewlines(); 603 boolean saveTrimText = currentFormat.isTrimText(); 604 String currentIndent = currentFormat.getIndent(); 605 606 formatStack.push(new FormatState(saveNewlines, saveTrimText, 609 currentIndent)); 610 611 try { 612 super.writePrintln(); 615 616 if ((saveLastText.trim().length() == 0) 617 && (currentIndent != null) 618 && (currentIndent.length() > 0)) { 619 super.writer.write(justSpaces(saveLastText)); 625 } 626 627 currentFormat.setNewlines(false); 630 currentFormat.setTrimText(false); 631 currentFormat.setIndent(""); 632 633 super.writeElement(element); 635 } finally { 636 FormatState state = (FormatState) formatStack.pop(); 637 currentFormat.setNewlines(state.isNewlines()); 638 currentFormat.setTrimText(state.isTrimText()); 639 currentFormat.setIndent(state.getIndent()); 640 } 641 } else { 642 super.writeElement(element); 643 } 644 } 645 646 private String justSpaces(String text) { 647 int size = text.length(); 648 StringBuffer res = new StringBuffer (size); 649 char c; 650 651 for (int i = 0; i < size; i++) { 652 c = text.charAt(i); 653 654 switch (c) { 655 case '\r': 656 case '\n': 657 658 continue; 659 660 default: 661 res.append(c); 662 } 663 } 664 665 return res.toString(); 666 } 667 668 private void lazyInitNewLinesAfterNTags() { 669 if (getOutputFormat().isNewlines()) { 670 newLineAfterNTags = 0; 672 } else { 673 newLineAfterNTags = getOutputFormat().getNewLineAfterNTags(); 674 } 675 } 676 677 679 694 public static String prettyPrintHTML(String html) 695 throws java.io.IOException , java.io.UnsupportedEncodingException , 696 org.dom4j.DocumentException { 697 return prettyPrintHTML(html, true, true, false, true); 698 } 699 700 716 public static String prettyPrintXHTML(String html) 717 throws java.io.IOException , java.io.UnsupportedEncodingException , 718 org.dom4j.DocumentException { 719 return prettyPrintHTML(html, true, true, true, false); 720 } 721 722 746 public static String prettyPrintHTML(String html, boolean newlines, 747 boolean trim, boolean isXHTML, boolean expandEmpty) 748 throws java.io.IOException , java.io.UnsupportedEncodingException , 749 org.dom4j.DocumentException { 750 StringWriter sw = new StringWriter (); 751 OutputFormat format = OutputFormat.createPrettyPrint(); 752 format.setNewlines(newlines); 753 format.setTrimText(trim); 754 format.setXHTML(isXHTML); 755 format.setExpandEmptyElements(expandEmpty); 756 757 HTMLWriter writer = new HTMLWriter(sw, format); 758 Document document = DocumentHelper.parseText(html); 759 writer.write(document); 760 writer.flush(); 761 762 return sw.toString(); 763 } 764 765 private class FormatState { 768 private boolean newlines = false; 769 770 private boolean trimText = false; 771 772 private String indent = ""; 773 774 public FormatState(boolean newLines, boolean trimText, String indent) { 775 this.newlines = newLines; 776 this.trimText = trimText; 777 this.indent = indent; 778 } 779 780 public boolean isNewlines() { 781 return newlines; 782 } 783 784 public boolean isTrimText() { 785 return trimText; 786 } 787 788 public String getIndent() { 789 return indent; 790 } 791 } 792 } 793 794 805 806 842 | Popular Tags |