1 57 58 59 73 74 package org.enhydra.apache.xml.serialize; 75 76 77 import java.io.IOException ; 78 import java.io.OutputStream ; 79 import java.io.Writer ; 80 import java.util.Enumeration ; 81 82 import org.w3c.dom.Attr ; 83 import org.w3c.dom.Element ; 84 import org.w3c.dom.NamedNodeMap ; 85 import org.w3c.dom.Node ; 86 import org.xml.sax.AttributeList ; 87 import org.xml.sax.Attributes ; 88 import org.xml.sax.SAXException ; 89 90 91 129 public class HTMLSerializer 130 extends BaseMarkupSerializer 131 { 132 133 134 137 private static boolean _xhtml; 138 139 140 public static String XHTMLNamespace = ""; 141 142 143 144 145 152 protected HTMLSerializer( boolean xhtml, OutputFormat format ) 153 { 154 super( format ); 155 _xhtml = xhtml; 156 } 157 158 159 164 public HTMLSerializer() 165 { 166 this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 167 } 168 169 170 175 public HTMLSerializer( OutputFormat format ) 176 { 177 this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 178 } 179 180 181 182 190 public HTMLSerializer( Writer writer, OutputFormat format ) 191 { 192 this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 193 setOutputCharStream( writer ); 194 } 195 196 197 205 public HTMLSerializer( OutputStream output, OutputFormat format ) 206 { 207 this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 208 setOutputByteStream( output ); 209 } 210 211 212 public void setOutputFormat( OutputFormat format ) 213 { 214 super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) ); 215 } 216 217 218 222 223 public void startElement( String namespaceURI, String localName, 224 String rawName, Attributes attrs ) 225 throws SAXException 226 { 227 int i; 228 boolean preserveSpace; 229 ElementState state; 230 String name; 231 String value; 232 String htmlName; 233 boolean addNSAttr = false; 234 235 try { 236 if ( _printer == null ) 237 throw new IllegalStateException ( "SER002 No writer supplied for serializer" ); 238 239 state = getElementState(); 240 if ( isDocumentState() ) { 241 if ( ! _started ) 246 startDocument( localName == null ? rawName : localName ); 247 } else { 248 if ( state.empty ) 252 _printer.printText( '>' ); 253 if ( _indenting && ! state.preserveSpace && 257 ( state.empty || state.afterElement ) ) 258 _printer.breakLine(); 259 } 260 preserveSpace = state.preserveSpace; 261 262 265 if ( rawName == null ) { 266 rawName = localName; 267 if ( namespaceURI != null ) { 268 String prefix; 269 prefix = getPrefix( namespaceURI ); 270 if ( prefix.length() > 0 ) 271 rawName = prefix + ":" + localName; 272 } 273 addNSAttr = true; 274 } 275 if ( namespaceURI == null ) 276 htmlName = rawName; 277 else { 278 if ( namespaceURI.equals( XHTMLNamespace ) ) 279 htmlName = localName; 280 else 281 htmlName = null; 282 } 283 284 _printer.printText( '<' ); 286 if ( _xhtml ) 287 _printer.printText( rawName.toLowerCase() ); 288 else 289 _printer.printText( rawName ); 290 _printer.indent(); 291 292 if ( attrs != null ) { 296 for ( i = 0 ; i < attrs.getLength() ; ++i ) { 297 _printer.printSpace(); 298 name = attrs.getQName( i ).toLowerCase();; 299 value = attrs.getValue( i ); 300 if ( _xhtml || namespaceURI != null ) { 301 if ( value == null ) { 303 _printer.printText( name ); 304 _printer.printText( "=\"\"" ); 305 } else { 306 _printer.printText( name ); 307 _printer.printText( "=\"" ); 308 printEscaped( value ); 309 _printer.printText( '"' ); 310 } 311 } else { 312 if ( value == null ) { 315 value = ""; 316 } 317 if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 ) 318 _printer.printText( name ); 319 else if ( HTMLdtd.isURI( rawName, name ) ) { 320 _printer.printText( name ); 321 _printer.printText( "=\"" ); 322 _printer.printText( escapeURI( value ) ); 323 _printer.printText( '"' ); 324 } else if ( HTMLdtd.isBoolean( rawName, name ) ) 325 _printer.printText( name ); 326 else { 327 _printer.printText( name ); 328 _printer.printText( "=\"" ); 329 printEscaped( value ); 330 _printer.printText( '"' ); 331 } 332 } 333 } 334 } 335 if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) ) 336 preserveSpace = true; 337 338 if ( addNSAttr ) { 339 Enumeration enumer; 340 341 enumer = _prefixes.keys(); 342 while ( enumer.hasMoreElements() ) { 343 _printer.printSpace(); 344 value = (String ) enumer.nextElement(); 345 name = (String ) _prefixes.get( value ); 346 if ( name.length() == 0 ) { 347 _printer.printText( "xmlns=\"" ); 348 printEscaped( value ); 349 _printer.printText( '"' ); 350 } else { 351 _printer.printText( "xmlns:" ); 352 _printer.printText( name ); 353 _printer.printText( "=\"" ); 354 printEscaped( value ); 355 _printer.printText( '"' ); 356 } 357 } 358 } 359 360 state = enterElementState( namespaceURI, localName, rawName, preserveSpace ); 364 365 367 if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) || 368 htmlName.equalsIgnoreCase( "TD" ) ) ) { 369 state.empty = false; 370 _printer.printText( '>' ); 371 } 372 373 if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) || 377 rawName.equalsIgnoreCase( "STYLE" ) ) ) { 378 if ( _xhtml ) { 379 state.doCData = true; 381 } else { 382 state.unescaped = true; 384 } 385 } 386 } catch ( IOException except ) { 387 throw new SAXException ( except ); 388 } 389 } 390 391 392 public void endElement( String namespaceURI, String localName, 393 String rawName ) 394 throws SAXException 395 { 396 try { 397 endElementIO( namespaceURI, localName, rawName ); 398 } catch ( IOException except ) { 399 throw new SAXException ( except ); 400 } 401 } 402 403 404 public void endElementIO( String namespaceURI, String localName, 405 String rawName ) 406 throws IOException 407 { 408 ElementState state; 409 String htmlName; 410 411 _printer.unindent(); 415 state = getElementState(); 416 417 if ( state.namespaceURI == null ) 418 htmlName = state.rawName; 419 else { 420 if ( state.namespaceURI.equals( XHTMLNamespace ) ) 421 htmlName = state.localName; 422 else 423 htmlName = null; 424 } 425 426 if ( _xhtml) { 427 if ( state.empty ) { 428 _printer.printText( " />" ); 429 } else { 430 if ( state.inCData ) 432 _printer.printText( "]]>" ); 433 _printer.printText( "</" ); 435 _printer.printText( state.rawName.toLowerCase() ); 436 _printer.printText( '>' ); 437 } 438 } else { 439 if ( state.empty ) 440 _printer.printText( '>' ); 441 if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) { 447 if ( _indenting && ! state.preserveSpace && state.afterElement ) 448 _printer.breakLine(); 449 if ( state.inCData ) 451 _printer.printText( "]]>" ); 452 _printer.printText( "</" ); 453 _printer.printText( state.rawName ); 454 _printer.printText( '>' ); 455 } 456 } 457 state = leaveElementState(); 460 if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) && 462 ! htmlName.equalsIgnoreCase( "TD" ) ) ) 463 464 state.afterElement = true; 465 state.empty = false; 466 if ( isDocumentState() ) 467 _printer.flush(); 468 } 469 470 471 475 476 public void characters( char[] chars, int start, int length ) 477 throws SAXException 478 { 479 ElementState state; 480 481 try { 482 state = content(); 484 state.doCData = false; 485 super.characters( chars, start, length ); 486 } catch ( IOException except ) { 487 throw new SAXException ( except ); 488 } 489 } 490 491 492 public void startElement( String tagName, AttributeList attrs ) 493 throws SAXException 494 { 495 int i; 496 boolean preserveSpace; 497 ElementState state; 498 String name; 499 String value; 500 501 try { 502 if ( _printer == null ) 503 throw new IllegalStateException ( "SER002 No writer supplied for serializer" ); 504 505 state = getElementState(); 506 if ( isDocumentState() ) { 507 if ( ! _started ) 512 startDocument( tagName ); 513 } else { 514 if ( state.empty ) 518 _printer.printText( '>' ); 519 if ( _indenting && ! state.preserveSpace && 523 ( state.empty || state.afterElement ) ) 524 _printer.breakLine(); 525 } 526 preserveSpace = state.preserveSpace; 527 528 531 _printer.printText( '<' ); 533 if ( _xhtml ) 534 _printer.printText( tagName.toLowerCase() ); 535 else 536 _printer.printText( tagName ); 537 _printer.indent(); 538 539 if ( attrs != null ) { 543 for ( i = 0 ; i < attrs.getLength() ; ++i ) { 544 _printer.printSpace(); 545 name = attrs.getName( i ).toLowerCase();; 546 value = attrs.getValue( i ); 547 if ( _xhtml ) { 548 if ( value == null ) { 550 _printer.printText( name ); 551 _printer.printText( "=\"\"" ); 552 } else { 553 _printer.printText( name ); 554 _printer.printText( "=\"" ); 555 printEscaped( value ); 556 _printer.printText( '"' ); 557 } 558 } else { 559 if ( value == null ) { 562 value = ""; 563 } 564 if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 ) 565 _printer.printText( name ); 566 else if ( HTMLdtd.isURI( tagName, name ) ) { 567 _printer.printText( name ); 568 _printer.printText( "=\"" ); 569 _printer.printText( escapeURI( value ) ); 570 _printer.printText( '"' ); 571 } else if ( HTMLdtd.isBoolean( tagName, name ) ) 572 _printer.printText( name ); 573 else { 574 _printer.printText( name ); 575 _printer.printText( "=\"" ); 576 printEscaped( value ); 577 _printer.printText( '"' ); 578 } 579 } 580 } 581 } 582 if ( HTMLdtd.isPreserveSpace( tagName ) ) 583 preserveSpace = true; 584 585 state = enterElementState( null, null, tagName, preserveSpace ); 589 590 if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) { 592 state.empty = false; 593 _printer.printText( '>' ); 594 } 595 596 if ( tagName.equalsIgnoreCase( "SCRIPT" ) || 600 tagName.equalsIgnoreCase( "STYLE" ) ) { 601 if ( _xhtml ) { 602 state.doCData = true; 604 } else { 605 state.unescaped = true; 607 } 608 } 609 } catch ( IOException except ) { 610 throw new SAXException ( except ); 611 } 612 } 613 614 615 public void endElement( String tagName ) 616 throws SAXException 617 { 618 endElement( null, null, tagName ); 619 } 620 621 622 626 627 639 protected void startDocument( String rootTagName ) 640 throws IOException 641 { 642 StringBuffer buffer; 643 644 _printer.leaveDTD(); 647 if ( ! _started ) { 648 if ( _docTypePublicId == null && _docTypeSystemId == null ) { 652 if ( _xhtml ) { 653 _docTypePublicId = HTMLdtd.XHTMLPublicId; 654 _docTypeSystemId = HTMLdtd.XHTMLSystemId; 655 } else { 656 _docTypePublicId = HTMLdtd.HTMLPublicId; 657 _docTypeSystemId = HTMLdtd.HTMLSystemId; 658 } 659 } 660 661 if ( ! _format.getOmitDocumentType() ) { 662 if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null ) ) { 667 _printer.printText( "<!DOCTYPE HTML PUBLIC " ); 668 printDoctypeURL( _docTypePublicId ); 669 if ( _docTypeSystemId != null ) { 670 if ( _indenting ) { 671 _printer.breakLine(); 672 _printer.printText( " " ); 673 } else 674 _printer.printText( ' ' ); 675 printDoctypeURL( _docTypeSystemId ); 676 } 677 _printer.printText( '>' ); 678 _printer.breakLine(); 679 } else if ( _docTypeSystemId != null ) { 680 _printer.printText( "<!DOCTYPE HTML SYSTEM " ); 681 printDoctypeURL( _docTypeSystemId ); 682 _printer.printText( '>' ); 683 _printer.breakLine(); 684 } 685 } 686 } 687 688 _started = true; 689 serializePreRoot(); 691 } 692 693 694 699 protected void serializeElement( Element elem ) 700 throws IOException 701 { 702 Attr attr; 703 NamedNodeMap attrMap; 704 int i; 705 Node child; 706 ElementState state; 707 boolean preserveSpace; 708 String name; 709 String value; 710 String tagName; 711 712 tagName = elem.getTagName(); 713 state = getElementState(); 714 if ( isDocumentState() ) { 715 if ( ! _started ) 720 startDocument( tagName ); 721 } else { 722 if ( state.empty ) 726 _printer.printText( '>' ); 727 if ( _indenting && ! state.preserveSpace && 731 ( state.empty || state.afterElement ) ) 732 _printer.breakLine(); 733 } 734 preserveSpace = state.preserveSpace; 735 736 739 _printer.printText( '<' ); 741 if ( _xhtml ) 742 _printer.printText( tagName.toLowerCase() ); 743 else 744 _printer.printText( tagName ); 745 _printer.indent(); 746 747 attrMap = elem.getAttributes(); 753 if ( attrMap != null ) { 754 for ( i = 0 ; i < attrMap.getLength() ; ++i ) { 755 attr = (Attr ) attrMap.item( i ); 756 name = attr.getName().toLowerCase(); 757 value = attr.getValue(); 758 if ( attr.getSpecified() ) { 759 _printer.printSpace(); 760 if ( _xhtml ) { 761 if ( value == null ) { 763 _printer.printText( name ); 764 _printer.printText( "=\"\"" ); 765 } else { 766 _printer.printText( name ); 767 _printer.printText( "=\"" ); 768 printEscaped( value ); 769 _printer.printText( '"' ); 770 } 771 } else { 772 if ( value == null ) { 775 value = ""; 776 } 777 if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 ) 778 _printer.printText( name ); 779 else if ( HTMLdtd.isURI( tagName, name ) ) { 780 _printer.printText( name ); 781 _printer.printText( "=\"" ); 782 _printer.printText( escapeURI( value ) ); 783 _printer.printText( '"' ); 784 } else if ( HTMLdtd.isBoolean( tagName, name ) ) 785 _printer.printText( name ); 786 else { 787 _printer.printText( name ); 788 _printer.printText( "=\"" ); 789 printEscaped( value ); 790 _printer.printText( '"' ); 791 } 792 } 793 } 794 } 795 } 796 if ( HTMLdtd.isPreserveSpace( tagName ) ) 797 preserveSpace = true; 798 799 if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) { 802 state = enterElementState( null, null, tagName, preserveSpace ); 805 806 if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) { 808 state.empty = false; 809 _printer.printText( '>' ); 810 } 811 812 if ( tagName.equalsIgnoreCase( "SCRIPT" ) || 816 tagName.equalsIgnoreCase( "STYLE" ) ) { 817 if ( _xhtml ) { 818 state.doCData = true; 820 } else { 821 state.unescaped = true; 823 } 824 } 825 child = elem.getFirstChild(); 826 while ( child != null ) { 827 serializeNode( child ); 828 child = child.getNextSibling(); 829 } 830 endElementIO( null, null, tagName ); 831 } else { 832 _printer.unindent(); 833 if ( _xhtml ) 836 _printer.printText( " />" ); 837 else 838 _printer.printText( '>' ); 839 state.afterElement = true; 841 state.empty = false; 842 if ( isDocumentState() ) 843 _printer.flush(); 844 } 845 } 846 847 848 849 protected void characters( String text ) 850 throws IOException 851 { 852 ElementState state; 853 854 state = content(); 856 super.characters( text ); 857 } 858 859 860 protected String getEntityRef( int ch ) 861 { 862 return HTMLdtd.fromChar( ch ); 863 } 864 865 866 protected String escapeURI( String uri ) 867 { 868 int index; 869 870 index = uri.indexOf( "\"" ); 873 if ( index >= 0 ) 874 return uri.substring( 0, index ); 875 else 876 return uri; 877 } 878 879 880 } 881 882 883 884 885 | Popular Tags |