HTMLSerializer


1   /*
2    * The Apache Software License, Version 1.1
3    *
4    *
5    * Copyright (c) 1999-2004 The Apache Software Foundation.  All rights
6    * reserved.
7    *
8    * Redistribution and use in source and binary forms, with or without
9    * modification, are permitted provided that the following conditions
10   * are met:
11   *
12   * 1. Redistributions of source code must retain the above copyright
13   *    notice, this list of conditions and the following disclaimer.
14   *
15   * 2. Redistributions in binary form must reproduce the above copyright
16   *    notice, this list of conditions and the following disclaimer in
17   *    the documentation and/or other materials provided with the
18   *    distribution.
19   *
20   * 3. The end-user documentation included with the redistribution,
21   *    if any, must include the following acknowledgment:
22   *       "This product includes software developed by the
23   *        Apache Software Foundation (http://www.apache.org/)."
24   *    Alternately, this acknowledgment may appear in the software itself,
25   *    if and wherever such third-party acknowledgments normally appear.
26   *
27   * 4. The names "Xerces" and "Apache Software Foundation" must
28   *    not be used to endorse or promote products derived from this
29   *    software without prior written permission. For written
30   *    permission, please contact apache@apache.org.
31   *
32   * 5. Products derived from this software may not be called "Apache",
33   *    nor may "Apache" appear in their name, without prior written
34   *    permission of the Apache Software Foundation.
35   *
36   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47   * SUCH DAMAGE.
48   * ====================================================================
49   *
50   * This software consists of voluntary contributions made by many
51   * individuals on behalf of the Apache Software Foundation and was
52   * originally based on software copyright (c) 1999, International
53   * Business Machines, Inc., http://www.apache.org.  For more
54   * information on the Apache Software Foundation, please see
55   * <http://www.apache.org/>.
56   */
57  
58  
59  // Sep 14, 2000:
60  //  Fixed serializer to report IO exception directly, instead at
61  //  the end of document processing.
62  //  Reported by Patrick Higgins <phiggins@transzap.com>
63  // Aug 21, 2000:
64  //  Fixed bug in startDocument not calling prepare.
65  //  Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
66  // Aug 21, 2000:
67  //  Added ability to omit DOCTYPE declaration.
68  // Sep 1, 2000:
69  //   If no output format is provided the serializer now defaults
70  //   to ISO-8859-1 encoding. Reported by Mikael Staldal
71  //   <d96-mst@d.kth.se>
72  
73  
74  package com.sun.org.apache.xml.internal.serialize;
75  
76  import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
77  
78  import java.io.IOException  ;
79  import java.io.OutputStream  ;
80  import java.io.Writer  ;
81  import java.util.Enumeration  ;
82  import java.util.Locale  ;
83  
84  import org.w3c.dom.Attr  ;
85  import org.w3c.dom.Element  ;
86  import org.w3c.dom.NamedNodeMap  ;
87  import org.w3c.dom.Node  ;
88  import org.xml.sax.AttributeList  ;
89  import org.xml.sax.Attributes  ;
90  import org.xml.sax.SAXException  ;
91  
92  
93  /**
94   * Implements an HTML/XHTML serializer supporting both DOM and SAX
95   * pretty serializing. HTML/XHTML mode is determined in the
96   * constructor.  For usage instructions see {@link Serializer}.
97   * <p>
98   * If an output stream is used, the encoding is taken from the
99   * output format (defaults to <tt>UTF-8</tt>). If a writer is
100  * used, make sure the writer uses the same encoding (if applies)
101  * as specified in the output format.
102  * <p>
103  * The serializer supports both DOM and SAX. DOM serializing is done
104  * by calling {@link #serialize} and SAX serializing is done by firing
105  * SAX events and using the serializer as a document handler.
106  * <p>
107  * If an I/O exception occurs while serializing, the serializer
108  * will not throw an exception directly, but only throw it
109  * at the end of serializing (either DOM or SAX's {@link
110  * org.xml.sax.DocumentHandler#endDocument}.
111  * <p>
112  * For elements that are not specified as whitespace preserving,
113  * the serializer will potentially break long text lines at space
114  * boundaries, indent lines, and serialize elements on separate
115  * lines. Line terminators will be regarded as spaces, and
116  * spaces at beginning of line will be stripped.
117  * <p>
118  * XHTML is slightly different than HTML:
119  * <ul>
120  * <li>Element/attribute names are lower case and case matters
121  * <li>Attributes must specify value, even if empty string
122  * <li>Empty elements must have '/' in empty tag
123  * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
124  * </ul>
125  *
126  * @deprecated
127  * @version $Revision: 1.26 $ $Date: 2004/02/16 05:24:55 $
128  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
129  * @see Serializer
130  */
131 public class HTMLSerializer
132     extends BaseMarkupSerializer
133 {
134 
135 
136     /**
137      * True if serializing in XHTML format.
138      */
139     private boolean _xhtml;
140 
141 
142     public static final String   XHTMLNamespace = "http://www.w3.org/1999/xhtml";
143 
144     // for users to override XHTMLNamespace if need be.
145     private String   fUserXHTMLNamespace = null;
146 
147 
148     /**
149      * Constructs a new HTML/XHTML serializer depending on the value of
150      * <tt>xhtml</tt>. The serializer cannot be used without calling
151      * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
152      *
153      * @param xhtml True if XHTML serializing
154      */
155     protected HTMLSerializer( boolean xhtml, OutputFormat format )
156     {
157         super( format );
158         _xhtml = xhtml;
159     }
160 
161 
162     /**
163      * Constructs a new serializer. The serializer cannot be used without
164      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
165      * first.
166      */
167     public HTMLSerializer()
168     {
169         this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
170     }
171 
172 
173     /**
174      * Constructs a new serializer. The serializer cannot be used without
175      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
176      * first.
177      */
178     public HTMLSerializer( OutputFormat format )
179     {
180         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
181     }
182 
183 
184 
185     /**
186      * Constructs a new serializer that writes to the specified writer
187      * using the specified output format. If <tt>format</tt> is null,
188      * will use a default output format.
189      *
190      * @param writer The writer to use
191      * @param format The output format to use, null for the default
192      */
193     public HTMLSerializer( Writer   writer, OutputFormat format )
194     {
195         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
196         setOutputCharStream( writer );
197     }
198 
199 
200     /**
201      * Constructs a new serializer that writes to the specified output
202      * stream using the specified output format. If <tt>format</tt>
203      * is null, will use a default output format.
204      *
205      * @param output The output stream to use
206      * @param format The output format to use, null for the default
207      */
208     public HTMLSerializer( OutputStream   output, OutputFormat format )
209     {
210         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
211         setOutputByteStream( output );
212     }
213 
214 
215     public void setOutputFormat( OutputFormat format )
216     {
217         super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
218     }
219 
220     // Set  value for alternate XHTML namespace.
221     public void setXHTMLNamespace(String   newNamespace) {
222         fUserXHTMLNamespace = newNamespace;
223     } // setXHTMLNamespace(String)
224 
225     //-----------------------------------------//
226     // SAX content handler serializing methods //
227     //-----------------------------------------//
228 
229 
230     public void startElement( String   namespaceURI, String   localName,
231                               String   rawName, Attributes   attrs )
232         throws SAXException  
233     {
234         int          i;
235         boolean      preserveSpace;
236         ElementState state;
237         String         name;
238         String         value;
239         String         htmlName;
240         boolean      addNSAttr = false;
241 
242         try {
243             if ( _printer == null )
244                 throw new IllegalStateException  ( 
245                     DOMMessageFormatter.formatMessage(
246                     DOMMessageFormatter.SERIALIZER_DOMAIN,
247                     "NoWriterSupplied", null));
248 
249             state = getElementState();
250             if ( isDocumentState() ) {
251                 // If this is the root element handle it differently.
252                 // If the first root element in the document, serialize
253                 // the document's DOCTYPE. Space preserving defaults
254                 // to that of the output format.
255                 if ( ! _started )
256                     startDocument( (localName == null || localName.length() == 0) 
257                         ? rawName : localName );
258             } else {
259                 // For any other element, if first in parent, then
260                 // close parent's opening tag and use the parnet's
261                 // space preserving.
262                 if ( state.empty )
263                     _printer.printText( '>' );
264                 // Indent this element on a new line if the first
265                 // content of the parent element or immediately
266                 // following an element.
267                 if ( _indenting && ! state.preserveSpace &&
268                      ( state.empty || state.afterElement ) )
269                     _printer.breakLine();
270             }
271             preserveSpace = state.preserveSpace;
272 
273             // Do not change the current element state yet.
274             // This only happens in endElement().
275             
276             // As per SAX2, the namespace URI is an empty string if the element has no
277             // namespace URI, or namespaces is turned off. The check against null protects
278             // against broken SAX implementations, so I've left it there. - mrglavas
279             boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0);
280 
281             // SAX2: rawName (QName) could be empty string if 
282             // namespace-prefixes property is false.
283             if ( rawName == null || rawName.length() == 0) {
284                 rawName = localName;
285                 if ( hasNamespaceURI ) {
286                     String   prefix;
287                     prefix = getPrefix( namespaceURI );
288                     if ( prefix != null && prefix.length() != 0 )
289                         rawName = prefix + ":" + localName;
290                 }
291                 addNSAttr = true;
292             }
293             if ( !hasNamespaceURI )
294                 htmlName = rawName;
295             else {
296                 if ( namespaceURI.equals( XHTMLNamespace ) ||
297                         (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) )
298                     htmlName = localName;
299                 else
300                     htmlName = null;
301             }
302 
303             // XHTML: element names are lower case, DOM will be different
304             _printer.printText( '<' );
305             if ( _xhtml )
306                 _printer.printText( rawName.toLowerCase(Locale.ENGLISH) );
307             else
308                 _printer.printText( rawName );
309             _printer.indent();
310 
311             // For each attribute serialize it's name and value as one part,
312             // separated with a space so the element can be broken on
313             // multiple lines.
314             if ( attrs != null ) {
315                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
316                     _printer.printSpace();
317                     name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH);
318                     value = attrs.getValue( i );
319                     if ( _xhtml || hasNamespaceURI ) {
320                         // XHTML: print empty string for null values.
321                         if ( value == null ) {
322                             _printer.printText( name );
323                             _printer.printText( "=\"\"" );
324                         } else {
325                             _printer.printText( name );
326                             _printer.printText( "=\"" );
327                             printEscaped( value );
328                             _printer.printText( '"' );
329                         }
330                     } else {
331                         // HTML: Empty values print as attribute name, no value.
332                         // HTML: URI attributes will print unescaped
333                         if ( value == null ) {
334                             value = "";
335                         }
336                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
337                             _printer.printText( name );
338                         else if ( HTMLdtd.isURI( rawName, name ) ) {
339                             _printer.printText( name );
340                             _printer.printText( "=\"" );
341                             _printer.printText( escapeURI( value ) );
342                             _printer.printText( '"' );
343                         } else if ( HTMLdtd.isBoolean( rawName, name ) )
344                             _printer.printText( name );
345                         else {
346                             _printer.printText( name );
347                             _printer.printText( "=\"" );
348                             printEscaped( value );
349                             _printer.printText( '"' );
350                         }
351                     }
352                 }
353             }
354             if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) )
355                 preserveSpace = true;
356 
357             if ( addNSAttr ) {
358                 Enumeration   keys;
359 
360                 keys = _prefixes.keys();
361                 while ( keys.hasMoreElements() ) {
362                     _printer.printSpace();
363                     value = (String  ) keys.nextElement();
364                     name = (String  ) _prefixes.get( value );
365                     if ( name.length() == 0 ) {
366                         _printer.printText( "xmlns=\"" );
367                         printEscaped( value );
368                         _printer.printText( '"' );
369                     } else {
370                         _printer.printText( "xmlns:" );
371                         _printer.printText( name );
372                         _printer.printText( "=\"" );
373                         printEscaped( value );
374                         _printer.printText( '"' );
375                     }
376                 }
377             }
378 
379             // Now it's time to enter a new element state
380             // with the tag name and space preserving.
381             // We still do not change the curent element state.
382             state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
383 
384             // Prevents line breaks inside A/TD
385 
386             if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) ||
387                                        htmlName.equalsIgnoreCase( "TD" ) ) ) {
388                 state.empty = false;
389                 _printer.printText( '>' );
390             }
391 
392             // Handle SCRIPT and STYLE specifically by changing the
393             // state of the current element to CDATA (XHTML) or
394             // unescaped (HTML).
395             if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) ||
396                                        rawName.equalsIgnoreCase( "STYLE" ) ) ) {
397                 if ( _xhtml ) {
398                     // XHTML: Print contents as CDATA section
399                     state.doCData = true;
400                 } else {
401                     // HTML: Print contents unescaped
402                     state.unescaped = true;
403                 }
404             }
405         } catch ( IOException   except ) {
406             throw new SAXException  ( except );
407         }
408     }
409 
410 
411     public void endElement( String   namespaceURI, String   localName,
412                             String   rawName )
413         throws SAXException  
414     {
415         try {
416             endElementIO( namespaceURI, localName, rawName );
417         } catch ( IOException   except ) {
418             throw new SAXException  ( except );
419         }
420     }
421 
422 
423     public void endElementIO( String   namespaceURI, String   localName,
424                               String   rawName )
425         throws IOException  
426     {
427         ElementState state;
428         String         htmlName;
429 
430         // Works much like content() with additions for closing
431         // an element. Note the different checks for the closed
432         // element's state and the parent element's state.
433         _printer.unindent();
434         state = getElementState();
435 
436         if ( state.namespaceURI == null || state.namespaceURI.length() == 0 )
437             htmlName = state.rawName;
438         else {
439             if ( state.namespaceURI.equals( XHTMLNamespace ) ||
440                         (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(state.namespaceURI)) )
441                 htmlName = state.localName;
442             else
443                 htmlName = null;
444         }
445 
446         if ( _xhtml) {
447             if ( state.empty ) {
448                 _printer.printText( " />" );
449             } else {
450                 // Must leave CData section first
451                 if ( state.inCData )
452                     _printer.printText( "]]>" );
453                 // XHTML: element names are lower case, DOM will be different
454                 _printer.printText( "</" );
455                 _printer.printText( state.rawName.toLowerCase(Locale.ENGLISH) );
456                 _printer.printText( '>' );
457             }
458         } else {
459             if ( state.empty )
460                 _printer.printText( '>' );
461             // This element is not empty and that last content was
462             // another element, so print a line break before that
463             // last element and this element's closing tag.
464             // [keith] Provided this is not an anchor.
465             // HTML: some elements do not print closing tag (e.g. LI)
466             if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) {
467                 if ( _indenting && ! state.preserveSpace && state.afterElement )
468                     _printer.breakLine();
469                 // Must leave CData section first (Illegal in HTML, but still)
470                 if ( state.inCData )
471                     _printer.printText( "]]>" );
472                 _printer.printText( "</" );
473                 _printer.printText( state.rawName );
474                 _printer.printText( '>' );
475             }
476         }
477         // Leave the element state and update that of the parent
478         // (if we're not root) to not empty and after element.
479         state = leaveElementState();
480         // Temporary hack to prevent line breaks inside A/TD
481         if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) &&
482                                    ! htmlName.equalsIgnoreCase( "TD" ) ) )
483 
484             state.afterElement = true;
485         state.empty = false;
486         if ( isDocumentState() )
487             _printer.flush();
488     }
489 
490 
491     //------------------------------------------//
492     // SAX document handler serializing methods //
493     //------------------------------------------//
494 
495 
496     public void characters( char[] chars, int start, int length )
497         throws SAXException  
498     {
499         ElementState state;
500 
501         try {
502             // HTML: no CDATA section
503             state = content();
504             state.doCData = false;
505             super.characters( chars, start, length );
506         } catch ( IOException   except ) {
507             throw new SAXException  ( except );
508         }
509     }
510 
511 
512     public void startElement( String   tagName, AttributeList   attrs )
513         throws SAXException  
514     {
515         int          i;
516         boolean      preserveSpace;
517         ElementState state;
518         String         name;
519         String         value;
520 
521         try {
522             if ( _printer == null )
523                 throw new IllegalStateException  ( 
524                     DOMMessageFormatter.formatMessage(
525                     DOMMessageFormatter.SERIALIZER_DOMAIN,
526                     "NoWriterSupplied", null));
527 
528 
529             state = getElementState();
530             if ( isDocumentState() ) {
531                 // If this is the root element handle it differently.
532                 // If the first root element in the document, serialize
533                 // the document's DOCTYPE. Space preserving defaults
534                 // to that of the output format.
535                 if ( ! _started )
536                     startDocument( tagName );
537             } else {
538                 // For any other element, if first in parent, then
539                 // close parent's opening tag and use the parnet's
540                 // space preserving.
541                 if ( state.empty )
542                     _printer.printText( '>' );
543                 // Indent this element on a new line if the first
544                 // content of the parent element or immediately
545                 // following an element.
546                 if ( _indenting && ! state.preserveSpace &&
547                      ( state.empty || state.afterElement ) )
548                     _printer.breakLine();
549             }
550             preserveSpace = state.preserveSpace;
551 
552             // Do not change the current element state yet.
553             // This only happens in endElement().
554 
555             // XHTML: element names are lower case, DOM will be different
556             _printer.printText( '<' );
557             if ( _xhtml )
558                 _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
559             else
560                 _printer.printText( tagName );
561             _printer.indent();
562 
563             // For each attribute serialize it's name and value as one part,
564             // separated with a space so the element can be broken on
565             // multiple lines.
566             if ( attrs != null ) {
567                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
568                     _printer.printSpace();
569                     name = attrs.getName( i ).toLowerCase(Locale.ENGLISH);
570                     value = attrs.getValue( i );
571                     if ( _xhtml ) {
572                         // XHTML: print empty string for null values.
573                         if ( value == null ) {
574                             _printer.printText( name );
575                             _printer.printText( "=\"\"" );
576                         } else {
577                             _printer.printText( name );
578                             _printer.printText( "=\"" );
579                             printEscaped( value );
580                             _printer.printText( '"' );
581                         }
582                     } else {
583                         // HTML: Empty values print as attribute name, no value.
584                         // HTML: URI attributes will print unescaped
585                         if ( value == null ) {
586                             value = "";
587                         }
588                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
589                             _printer.printText( name );
590                         else if ( HTMLdtd.isURI( tagName, name ) ) {
591                             _printer.printText( name );
592                             _printer.printText( "=\"" );
593                             _printer.printText( escapeURI( value ) );
594                             _printer.printText( '"' );
595                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
596                             _printer.printText( name );
597                         else {
598                             _printer.printText( name );
599                             _printer.printText( "=\"" );
600                             printEscaped( value );
601                             _printer.printText( '"' );
602                         }
603                     }
604                 }
605             }
606             if ( HTMLdtd.isPreserveSpace( tagName ) )
607                 preserveSpace = true;
608 
609             // Now it's time to enter a new element state
610             // with the tag name and space preserving.
611             // We still do not change the curent element state.
612             state = enterElementState( null, null, tagName, preserveSpace );
613 
614             // Prevents line breaks inside A/TD
615             if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
616                 state.empty = false;
617                 _printer.printText( '>' );
618             }
619 
620             // Handle SCRIPT and STYLE specifically by changing the
621             // state of the current element to CDATA (XHTML) or
622             // unescaped (HTML).
623             if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
624                  tagName.equalsIgnoreCase( "STYLE" ) ) {
625                 if ( _xhtml ) {
626                     // XHTML: Print contents as CDATA section
627                     state.doCData = true;
628                 } else {
629                     // HTML: Print contents unescaped
630                     state.unescaped = true;
631                 }
632             }
633         } catch ( IOException   except ) {
634             throw new SAXException  ( except );
635         }
636     }
637 
638 
639     public void endElement( String   tagName )
640         throws SAXException  
641     {
642         endElement( null, null, tagName );
643     }
644 
645 
646     //------------------------------------------//
647     // Generic node serializing methods methods //
648     //------------------------------------------//
649 
650 
651     /**
652      * Called to serialize the document's DOCTYPE by the root element.
653      * The document type declaration must name the root element,
654      * but the root element is only known when that element is serialized,
655      * and not at the start of the document.
656      * <p>
657      * This method will check if it has not been called before ({@link #_started}),
658      * will serialize the document type declaration, and will serialize all
659      * pre-root comments and PIs that were accumulated in the document
660      * (see {@link #serializePreRoot}). Pre-root will be serialized even if
661      * this is not the first root element of the document.
662      */
663     protected void startDocument( String   rootTagName )
664         throws IOException  
665     {
666         StringBuffer   buffer;
667 
668         // Not supported in HTML/XHTML, but we still have to switch
669         // out of DTD mode.
670         _printer.leaveDTD();
671         if ( ! _started ) {
672             // If the public and system identifiers were not specified
673             // in the output format, use the appropriate ones for HTML
674             // or XHTML.
675             if ( _docTypePublicId == null && _docTypeSystemId == null ) {
676                 if ( _xhtml ) {
677                     _docTypePublicId = HTMLdtd.XHTMLPublicId;
678                     _docTypeSystemId = HTMLdtd.XHTMLSystemId;
679                 } else {
680                     _docTypePublicId = HTMLdtd.HTMLPublicId;
681                     _docTypeSystemId = HTMLdtd.HTMLSystemId;
682                 }
683             }
684 
685             if ( ! _format.getOmitDocumentType() ) {
686                 // XHTML: If public identifier and system identifier
687                 //  specified, print them, else print just system identifier
688                 // HTML: If public identifier specified, print it with
689                 //  system identifier, if specified.
690                 // XHTML requires that all element names are lower case, so the
691                 // root on the DOCTYPE must be 'html'. - mrglavas
692                 if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null )  ) {
693                     if (_xhtml) {
694                         _printer.printText( "<!DOCTYPE html PUBLIC " );
695                     }
696                     else {
697                         _printer.printText( "<!DOCTYPE HTML PUBLIC " );
698                     }
699                     printDoctypeURL( _docTypePublicId );
700                     if ( _docTypeSystemId != null ) {
701                         if ( _indenting ) {
702                             _printer.breakLine();
703                             _printer.printText( "                      " );
704                         } else
705                         _printer.printText( ' ' );
706                         printDoctypeURL( _docTypeSystemId );
707                     }
708                     _printer.printText( '>' );
709                     _printer.breakLine();
710                 } else if ( _docTypeSystemId != null ) {
711                     if (_xhtml) {
712                         _printer.printText( "<!DOCTYPE html SYSTEM " );
713                     }
714                     else {
715                         _printer.printText( "<!DOCTYPE HTML SYSTEM " );
716                     }
717                     printDoctypeURL( _docTypeSystemId );
718                     _printer.printText( '>' );
719                     _printer.breakLine();
720                 }
721             }
722         }
723 
724         _started = true;
725         // Always serialize these, even if not te first root element.
726         serializePreRoot();
727     }
728 
729 
730     /**
731      * Called to serialize a DOM element. Equivalent to calling {@link
732      * #startElement}, {@link #endElement} and serializing everything
733      * inbetween, but better optimized.
734      */
735     protected void serializeElement( Element   elem )
736         throws IOException  
737     {
738         Attr           attr;
739         NamedNodeMap   attrMap;
740         int          i;
741         Node           child;
742         ElementState state;
743         boolean      preserveSpace;
744         String         name;
745         String         value;
746         String         tagName;
747 
748         tagName = elem.getTagName();
749         state = getElementState();
750         if ( isDocumentState() ) {
751             // If this is the root element handle it differently.
752             // If the first root element in the document, serialize
753             // the document's DOCTYPE. Space preserving defaults
754             // to that of the output format.
755             if ( ! _started )
756                 startDocument( tagName );
757         } else {
758             // For any other element, if first in parent, then
759             // close parent's opening tag and use the parnet's
760             // space preserving.
761             if ( state.empty )
762                 _printer.printText( '>' );
763             // Indent this element on a new line if the first
764             // content of the parent element or immediately
765             // following an element.
766             if ( _indenting && ! state.preserveSpace &&
767                  ( state.empty || state.afterElement ) )
768                 _printer.breakLine();
769         }
770         preserveSpace = state.preserveSpace;
771 
772         // Do not change the current element state yet.
773         // This only happens in endElement().
774 
775         // XHTML: element names are lower case, DOM will be different
776         _printer.printText( '<' );
777         if ( _xhtml )
778             _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
779         else
780             _printer.printText( tagName );
781         _printer.indent();
782 
783         // Lookup the element's attribute, but only print specified
784         // attributes. (Unspecified attributes are derived from the DTD.
785         // For each attribute print it's name and value as one part,
786         // separated with a space so the element can be broken on
787         // multiple lines.
788         attrMap = elem.getAttributes();
789         if ( attrMap != null ) {
790             for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
791                 attr = (Attr  ) attrMap.item( i );
792                 name = attr.getName().toLowerCase(Locale.ENGLISH);
793                 value = attr.getValue();
794                 if ( attr.getSpecified() ) {
795                     _printer.printSpace();
796                     if ( _xhtml ) {
797                         // XHTML: print empty string for null values.
798                         if ( value == null ) {
799                             _printer.printText( name );
800                             _printer.printText( "=\"\"" );
801                         } else {
802                             _printer.printText( name );
803                             _printer.printText( "=\"" );
804                             printEscaped( value );
805                             _printer.printText( '"' );
806                         }
807                     } else {
808                         // HTML: Empty values print as attribute name, no value.
809                         // HTML: URI attributes will print unescaped
810                         if ( value == null ) {
811                             value = "";
812                         }
813                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
814                             _printer.printText( name );
815                         else if ( HTMLdtd.isURI( tagName, name ) ) {
816                             _printer.printText( name );
817                             _printer.printText( "=\"" );
818                             _printer.printText( escapeURI( value ) );
819                             _printer.printText( '"' );
820                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
821                             _printer.printText( name );
822                         else {
823                             _printer.printText( name );
824                             _printer.printText( "=\"" );
825                             printEscaped( value );
826                             _printer.printText( '"' );
827                         }
828                     }
829                 }
830             }
831         }
832         if ( HTMLdtd.isPreserveSpace( tagName ) )
833             preserveSpace = true;
834 
835         // If element has children, or if element is not an empty tag,
836         // serialize an opening tag.
837         if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) {
838             // Enter an element state, and serialize the children
839             // one by one. Finally, end the element.
840             state = enterElementState( null, null, tagName, preserveSpace );
841 
842             // Prevents line breaks inside A/TD
843             if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
844                 state.empty = false;
845                 _printer.printText( '>' );
846             }
847 
848             // Handle SCRIPT and STYLE specifically by changing the
849             // state of the current element to CDATA (XHTML) or
850             // unescaped (HTML).
851             if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
852                  tagName.equalsIgnoreCase( "STYLE" ) ) {
853                 if ( _xhtml ) {
854                     // XHTML: Print contents as CDATA section
855                     state.doCData = true;
856                 } else {
857                     // HTML: Print contents unescaped
858                     state.unescaped = true;
859                 }
860             }
861             child = elem.getFirstChild();
862             while ( child != null ) {
863                 serializeNode( child );
864                 child = child.getNextSibling();
865             }
866             endElementIO( null, null, tagName );
867         } else {
868             _printer.unindent();
869             // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
870             // HTML: Empty tags are defined as such in DTD no in document.
871             if ( _xhtml )
872                 _printer.printText( " />" );
873             else
874                 _printer.printText( '>' );
875             // After element but parent element is no longer empty.
876             state.afterElement = true;
877             state.empty = false;
878             if ( isDocumentState() )
879                 _printer.flush();
880         }
881     }
882 
883 
884 
885     protected void characters( String   text )
886         throws IOException  
887     {
888         ElementState state;
889 
890         // HTML: no CDATA section
891         state = content();
892         super.characters( text );
893     }
894 
895 
896     protected String   getEntityRef( int ch )
897     {
898         return HTMLdtd.fromChar( ch );
899     }
900 
901 
902     protected String   escapeURI( String   uri )
903     {
904         int index;
905 
906         // XXX  Apparently Netscape doesn't like if we escape the URI
907         //      using %nn, so we leave it as is, just remove any quotes.
908         index = uri.indexOf( "\"" );
909         if ( index >= 0 )
910             return uri.substring( 0, index );
911         else
912             return uri;
913     }
914 
915 
916 }
917 
918 
919 
920 
921
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags