1 16 17 18 32 33 package org.apache.xml.serialize; 34 35 import org.apache.xerces.dom.DOMMessageFormatter; 36 37 import java.io.IOException ; 38 import java.io.OutputStream ; 39 import java.io.Writer ; 40 import java.util.Enumeration ; 41 import java.util.Locale ; 42 43 import org.w3c.dom.Attr ; 44 import org.w3c.dom.Element ; 45 import org.w3c.dom.NamedNodeMap ; 46 import org.w3c.dom.Node ; 47 import org.xml.sax.AttributeList ; 48 import org.xml.sax.Attributes ; 49 import org.xml.sax.SAXException ; 50 51 52 93 public class HTMLSerializer 94 extends BaseMarkupSerializer 95 { 96 97 98 101 private boolean _xhtml; 102 103 104 public static final String XHTMLNamespace = "http://www.w3.org/1999/xhtml"; 105 106 private String fUserXHTMLNamespace = null; 108 109 110 117 protected HTMLSerializer( boolean xhtml, OutputFormat format ) 118 { 119 super( format ); 120 _xhtml = xhtml; 121 } 122 123 124 129 public HTMLSerializer() 130 { 131 this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 132 } 133 134 135 140 public HTMLSerializer( OutputFormat format ) 141 { 142 this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 143 } 144 145 146 147 155 public HTMLSerializer( Writer writer, OutputFormat format ) 156 { 157 this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 158 setOutputCharStream( writer ); 159 } 160 161 162 170 public HTMLSerializer( OutputStream output, OutputFormat format ) 171 { 172 this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 173 setOutputByteStream( output ); 174 } 175 176 177 public void setOutputFormat( OutputFormat format ) 178 { 179 super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 180 } 181 182 public void setXHTMLNamespace(String newNamespace) { 184 fUserXHTMLNamespace = newNamespace; 185 } 187 191 192 public void startElement( String namespaceURI, String localName, 193 String rawName, Attributes attrs ) 194 throws SAXException 195 { 196 int i; 197 boolean preserveSpace; 198 ElementState state; 199 String name; 200 String value; 201 String htmlName; 202 boolean addNSAttr = false; 203 204 try { 205 if ( _printer == null ) 206 throw new IllegalStateException ( 207 DOMMessageFormatter.formatMessage( 208 DOMMessageFormatter.SERIALIZER_DOMAIN, 209 "NoWriterSupplied", null)); 210 211 state = getElementState(); 212 if ( isDocumentState() ) { 213 if ( ! _started ) 218 startDocument( (localName == null || localName.length() == 0) 219 ? rawName : localName ); 220 } else { 221 if ( state.empty ) 225 _printer.printText( '>' ); 226 if ( _indenting && ! state.preserveSpace && 230 ( state.empty || state.afterElement ) ) 231 _printer.breakLine(); 232 } 233 preserveSpace = state.preserveSpace; 234 235 238 boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0); 242 243 if ( rawName == null || rawName.length() == 0) { 246 rawName = localName; 247 if ( hasNamespaceURI ) { 248 String prefix; 249 prefix = getPrefix( namespaceURI ); 250 if ( prefix != null && prefix.length() != 0 ) 251 rawName = prefix + ":" + localName; 252 } 253 addNSAttr = true; 254 } 255 if ( !hasNamespaceURI ) 256 htmlName = rawName; 257 else { 258 if ( namespaceURI.equals( XHTMLNamespace ) || 259 (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) ) 260 htmlName = localName; 261 else 262 htmlName = null; 263 } 264 265 _printer.printText( '<' ); 267 if ( _xhtml ) 268 _printer.printText( rawName.toLowerCase(Locale.ENGLISH) ); 269 else 270 _printer.printText( rawName ); 271 _printer.indent(); 272 273 if ( attrs != null ) { 277 for ( i = 0 ; i < attrs.getLength() ; ++i ) { 278 _printer.printSpace(); 279 name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH); 280 value = attrs.getValue( i ); 281 if ( _xhtml || hasNamespaceURI ) { 282 if ( value == null ) { 284 _printer.printText( name ); 285 _printer.printText( "=\"\"" ); 286 } else { 287 _printer.printText( name ); 288 _printer.printText( "=\"" ); 289 printEscaped( value ); 290 _printer.printText( '"' ); 291 } 292 } else { 293 if ( value == null ) { 296 value = ""; 297 } 298 if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 ) 299 _printer.printText( name ); 300 else if ( HTMLdtd.isURI( rawName, name ) ) { 301 _printer.printText( name ); 302 _printer.printText( "=\"" ); 303 _printer.printText( escapeURI( value ) ); 304 _printer.printText( '"' ); 305 } else if ( HTMLdtd.isBoolean( rawName, name ) ) 306 _printer.printText( name ); 307 else { 308 _printer.printText( name ); 309 _printer.printText( "=\"" ); 310 printEscaped( value ); 311 _printer.printText( '"' ); 312 } 313 } 314 } 315 } 316 if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) ) 317 preserveSpace = true; 318 319 if ( addNSAttr ) { 320 Enumeration keys; 321 322 keys = _prefixes.keys(); 323 while ( keys.hasMoreElements() ) { 324 _printer.printSpace(); 325 value = (String ) keys.nextElement(); 326 name = (String ) _prefixes.get( value ); 327 if ( name.length() == 0 ) { 328 _printer.printText( "xmlns=\"" ); 329 printEscaped( value ); 330 _printer.printText( '"' ); 331 } else { 332 _printer.printText( "xmlns:" ); 333 _printer.printText( name ); 334 _printer.printText( "=\"" ); 335 printEscaped( value ); 336 _printer.printText( '"' ); 337 } 338 } 339 } 340 341 state = enterElementState( namespaceURI, localName, rawName, preserveSpace ); 345 346 348 if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) || 349 htmlName.equalsIgnoreCase( "TD" ) ) ) { 350 state.empty = false; 351 _printer.printText( '>' ); 352 } 353 354 if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) || 358 rawName.equalsIgnoreCase( "STYLE" ) ) ) { 359 if ( _xhtml ) { 360 state.doCData = true; 362 } else { 363 state.unescaped = true; 365 } 366 } 367 } catch ( IOException except ) { 368 throw new SAXException ( except ); 369 } 370 } 371 372 373 public void endElement( String namespaceURI, String localName, 374 String rawName ) 375 throws SAXException 376 { 377 try { 378 endElementIO( namespaceURI, localName, rawName ); 379 } catch ( IOException except ) { 380 throw new SAXException ( except ); 381 } 382 } 383 384 385 public void endElementIO( String namespaceURI, String localName, 386 String rawName ) 387 throws IOException 388 { 389 ElementState state; 390 String htmlName; 391 392 _printer.unindent(); 396 state = getElementState(); 397 398 if ( state.namespaceURI == null || state.namespaceURI.length() == 0 ) 399 htmlName = state.rawName; 400 else { 401 if ( state.namespaceURI.equals( XHTMLNamespace ) || 402 (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(state.namespaceURI)) ) 403 htmlName = state.localName; 404 else 405 htmlName = null; 406 } 407 408 if ( _xhtml) { 409 if ( state.empty ) { 410 _printer.printText( " />" ); 411 } else { 412 if ( state.inCData ) 414 _printer.printText( "]]>" ); 415 _printer.printText( "</" ); 417 _printer.printText( state.rawName.toLowerCase(Locale.ENGLISH) ); 418 _printer.printText( '>' ); 419 } 420 } else { 421 if ( state.empty ) 422 _printer.printText( '>' ); 423 if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) { 429 if ( _indenting && ! state.preserveSpace && state.afterElement ) 430 _printer.breakLine(); 431 if ( state.inCData ) 433 _printer.printText( "]]>" ); 434 _printer.printText( "</" ); 435 _printer.printText( state.rawName ); 436 _printer.printText( '>' ); 437 } 438 } 439 state = leaveElementState(); 442 if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) && 444 ! htmlName.equalsIgnoreCase( "TD" ) ) ) 445 446 state.afterElement = true; 447 state.empty = false; 448 if ( isDocumentState() ) 449 _printer.flush(); 450 } 451 452 453 457 458 public void characters( char[] chars, int start, int length ) 459 throws SAXException 460 { 461 ElementState state; 462 463 try { 464 state = content(); 466 state.doCData = false; 467 super.characters( chars, start, length ); 468 } catch ( IOException except ) { 469 throw new SAXException ( except ); 470 } 471 } 472 473 474 public void startElement( String tagName, AttributeList attrs ) 475 throws SAXException 476 { 477 int i; 478 boolean preserveSpace; 479 ElementState state; 480 String name; 481 String value; 482 483 try { 484 if ( _printer == null ) 485 throw new IllegalStateException ( 486 DOMMessageFormatter.formatMessage( 487 DOMMessageFormatter.SERIALIZER_DOMAIN, 488 "NoWriterSupplied", null)); 489 490 491 state = getElementState(); 492 if ( isDocumentState() ) { 493 if ( ! _started ) 498 startDocument( tagName ); 499 } else { 500 if ( state.empty ) 504 _printer.printText( '>' ); 505 if ( _indenting && ! state.preserveSpace && 509 ( state.empty || state.afterElement ) ) 510 _printer.breakLine(); 511 } 512 preserveSpace = state.preserveSpace; 513 514 517 _printer.printText( '<' ); 519 if ( _xhtml ) 520 _printer.printText( tagName.toLowerCase(Locale.ENGLISH) ); 521 else 522 _printer.printText( tagName ); 523 _printer.indent(); 524 525 if ( attrs != null ) { 529 for ( i = 0 ; i < attrs.getLength() ; ++i ) { 530 _printer.printSpace(); 531 name = attrs.getName( i ).toLowerCase(Locale.ENGLISH); 532 value = attrs.getValue( i ); 533 if ( _xhtml ) { 534 if ( value == null ) { 536 _printer.printText( name ); 537 _printer.printText( "=\"\"" ); 538 } else { 539 _printer.printText( name ); 540 _printer.printText( "=\"" ); 541 printEscaped( value ); 542 _printer.printText( '"' ); 543 } 544 } else { 545 if ( value == null ) { 548 value = ""; 549 } 550 if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 ) 551 _printer.printText( name ); 552 else if ( HTMLdtd.isURI( tagName, name ) ) { 553 _printer.printText( name ); 554 _printer.printText( "=\"" ); 555 _printer.printText( escapeURI( value ) ); 556 _printer.printText( '"' ); 557 } else if ( HTMLdtd.isBoolean( tagName, name ) ) 558 _printer.printText( name ); 559 else { 560 _printer.printText( name ); 561 _printer.printText( "=\"" ); 562 printEscaped( value ); 563 _printer.printText( '"' ); 564 } 565 } 566 } 567 } 568 if ( HTMLdtd.isPreserveSpace( tagName ) ) 569 preserveSpace = true; 570 571 state = enterElementState( null, null, tagName, preserveSpace ); 575 576 if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) { 578 state.empty = false; 579 _printer.printText( '>' ); 580 } 581 582 if ( tagName.equalsIgnoreCase( "SCRIPT" ) || 586 tagName.equalsIgnoreCase( "STYLE" ) ) { 587 if ( _xhtml ) { 588 state.doCData = true; 590 } else { 591 state.unescaped = true; 593 } 594 } 595 } catch ( IOException except ) { 596 throw new SAXException ( except ); 597 } 598 } 599 600 601 public void endElement( String tagName ) 602 throws SAXException 603 { 604 endElement( null, null, tagName ); 605 } 606 607 608 612 613 625 protected void startDocument( String rootTagName ) 626 throws IOException 627 { 628 StringBuffer buffer; 629 630 _printer.leaveDTD(); 633 if ( ! _started ) { 634 if ( _docTypePublicId == null && _docTypeSystemId == null ) { 638 if ( _xhtml ) { 639 _docTypePublicId = HTMLdtd.XHTMLPublicId; 640 _docTypeSystemId = HTMLdtd.XHTMLSystemId; 641 } else { 642 _docTypePublicId = HTMLdtd.HTMLPublicId; 643 _docTypeSystemId = HTMLdtd.HTMLSystemId; 644 } 645 } 646 647 if ( ! _format.getOmitDocumentType() ) { 648 if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null ) ) { 655 if (_xhtml) { 656 _printer.printText( "<!DOCTYPE html PUBLIC " ); 657 } 658 else { 659 _printer.printText( "<!DOCTYPE HTML PUBLIC " ); 660 } 661 printDoctypeURL( _docTypePublicId ); 662 if ( _docTypeSystemId != null ) { 663 if ( _indenting ) { 664 _printer.breakLine(); 665 _printer.printText( " " ); 666 } else 667 _printer.printText( ' ' ); 668 printDoctypeURL( _docTypeSystemId ); 669 } 670 _printer.printText( '>' ); 671 _printer.breakLine(); 672 } else if ( _docTypeSystemId != null ) { 673 if (_xhtml) { 674 _printer.printText( "<!DOCTYPE html SYSTEM " ); 675 } 676 else { 677 _printer.printText( "<!DOCTYPE HTML SYSTEM " ); 678 } 679 printDoctypeURL( _docTypeSystemId ); 680 _printer.printText( '>' ); 681 _printer.breakLine(); 682 } 683 } 684 } 685 686 _started = true; 687 serializePreRoot(); 689 } 690 691 692 697 protected void serializeElement( Element elem ) 698 throws IOException 699 { 700 Attr attr; 701 NamedNodeMap attrMap; 702 int i; 703 Node child; 704 ElementState state; 705 boolean preserveSpace; 706 String name; 707 String value; 708 String tagName; 709 710 tagName = elem.getTagName(); 711 state = getElementState(); 712 if ( isDocumentState() ) { 713 if ( ! _started ) 718 startDocument( tagName ); 719 } else { 720 if ( state.empty ) 724 _printer.printText( '>' ); 725 if ( _indenting && ! state.preserveSpace && 729 ( state.empty || state.afterElement ) ) 730 _printer.breakLine(); 731 } 732 preserveSpace = state.preserveSpace; 733 734 737 _printer.printText( '<' ); 739 if ( _xhtml ) 740 _printer.printText( tagName.toLowerCase(Locale.ENGLISH) ); 741 else 742 _printer.printText( tagName ); 743 _printer.indent(); 744 745 attrMap = elem.getAttributes(); 751 if ( attrMap != null ) { 752 for ( i = 0 ; i < attrMap.getLength() ; ++i ) { 753 attr = (Attr ) attrMap.item( i ); 754 name = attr.getName().toLowerCase(Locale.ENGLISH); 755 value = attr.getValue(); 756 if ( attr.getSpecified() ) { 757 _printer.printSpace(); 758 if ( _xhtml ) { 759 if ( value == null ) { 761 _printer.printText( name ); 762 _printer.printText( "=\"\"" ); 763 } else { 764 _printer.printText( name ); 765 _printer.printText( "=\"" ); 766 printEscaped( value ); 767 _printer.printText( '"' ); 768 } 769 } else { 770 if ( value == null ) { 773 value = ""; 774 } 775 if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 ) 776 _printer.printText( name ); 777 else if ( HTMLdtd.isURI( tagName, name ) ) { 778 _printer.printText( name ); 779 _printer.printText( "=\"" ); 780 _printer.printText( escapeURI( value ) ); 781 _printer.printText( '"' ); 782 } else if ( HTMLdtd.isBoolean( tagName, name ) ) 783 _printer.printText( name ); 784 else { 785 _printer.printText( name ); 786 _printer.printText( "=\"" ); 787 printEscaped( value ); 788 _printer.printText( '"' ); 789 } 790 } 791 } 792 } 793 } 794 if ( HTMLdtd.isPreserveSpace( tagName ) ) 795 preserveSpace = true; 796 797 if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) { 800 state = enterElementState( null, null, tagName, preserveSpace ); 803 804 if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) { 806 state.empty = false; 807 _printer.printText( '>' ); 808 } 809 810 if ( tagName.equalsIgnoreCase( "SCRIPT" ) || 814 tagName.equalsIgnoreCase( "STYLE" ) ) { 815 if ( _xhtml ) { 816 state.doCData = true; 818 } else { 819 state.unescaped = true; 821 } 822 } 823 child = elem.getFirstChild(); 824 while ( child != null ) { 825 serializeNode( child ); 826 child = child.getNextSibling(); 827 } 828 endElementIO( null, null, tagName ); 829 } else { 830 _printer.unindent(); 831 if ( _xhtml ) 834 _printer.printText( " />" ); 835 else 836 _printer.printText( '>' ); 837 state.afterElement = true; 839 state.empty = false; 840 if ( isDocumentState() ) 841 _printer.flush(); 842 } 843 } 844 845 846 847 protected void characters( String text ) 848 throws IOException 849 { 850 ElementState state; 851 852 state = content(); 854 super.characters( text ); 855 } 856 857 858 protected String getEntityRef( int ch ) 859 { 860 return HTMLdtd.fromChar( ch ); 861 } 862 863 864 protected String escapeURI( String uri ) 865 { 866 int index; 867 868 index = uri.indexOf( "\"" ); 871 if ( index >= 0 ) 872 return uri.substring( 0, index ); 873 else 874 return uri; 875 } 876 877 878 } 879 880 881 882 883 | Popular Tags |