HTMLSerializer


1   /*
2    * Copyright 1999-2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  
18  // Sep 14, 2000:
19  //  Fixed serializer to report IO exception directly, instead at
20  //  the end of document processing.
21  //  Reported by Patrick Higgins <phiggins@transzap.com>
22  // Aug 21, 2000:
23  //  Fixed bug in startDocument not calling prepare.
24  //  Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
25  // Aug 21, 2000:
26  //  Added ability to omit DOCTYPE declaration.
27  // Sep 1, 2000:
28  //   If no output format is provided the serializer now defaults
29  //   to ISO-8859-1 encoding. Reported by Mikael Staldal
30  //   <d96-mst@d.kth.se>
31  
32  
33  package org.apache.xml.serialize;
34  
35  import org.apache.xerces.dom.DOMMessageFormatter;
36  
37  import java.io.IOException  ;
38  import java.io.OutputStream  ;
39  import java.io.Writer  ;
40  import java.util.Enumeration  ;
41  import java.util.Locale  ;
42  
43  import org.w3c.dom.Attr  ;
44  import org.w3c.dom.Element  ;
45  import org.w3c.dom.NamedNodeMap  ;
46  import org.w3c.dom.Node  ;
47  import org.xml.sax.AttributeList  ;
48  import org.xml.sax.Attributes  ;
49  import org.xml.sax.SAXException  ;
50  
51  
52  /**
53   * Implements an HTML/XHTML serializer supporting both DOM and SAX
54   * pretty serializing. HTML/XHTML mode is determined in the
55   * constructor.  For usage instructions see {@link Serializer}.
56   * <p>
57   * If an output stream is used, the encoding is taken from the
58   * output format (defaults to <tt>UTF-8</tt>). If a writer is
59   * used, make sure the writer uses the same encoding (if applies)
60   * as specified in the output format.
61   * <p>
62   * The serializer supports both DOM and SAX. DOM serializing is done
63   * by calling {@link #serialize} and SAX serializing is done by firing
64   * SAX events and using the serializer as a document handler.
65   * <p>
66   * If an I/O exception occurs while serializing, the serializer
67   * will not throw an exception directly, but only throw it
68   * at the end of serializing (either DOM or SAX's {@link
69   * org.xml.sax.DocumentHandler#endDocument}.
70   * <p>
71   * For elements that are not specified as whitespace preserving,
72   * the serializer will potentially break long text lines at space
73   * boundaries, indent lines, and serialize elements on separate
74   * lines. Line terminators will be regarded as spaces, and
75   * spaces at beginning of line will be stripped.
76   * <p>
77   * XHTML is slightly different than HTML:
78   * <ul>
79   * <li>Element/attribute names are lower case and case matters
80   * <li>Attributes must specify value, even if empty string
81   * <li>Empty elements must have '/' in empty tag
82   * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
83   * </ul>
84   *
85   * @deprecated This class was deprecated in Xerces 2.6.2. It is
86   * recommended that new applications use JAXP's Transformation API 
87   * for XML (TrAX) for serializing HTML. See the Xerces documentation
88   * for more information.
89   * @version $Revision: 1.28 $ $Date: 2004/09/06 22:57:13 $
90   * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
91   * @see Serializer
92   */
93  public class HTMLSerializer
94      extends BaseMarkupSerializer
95  {
96  
97  
98      /**
99       * True if serializing in XHTML format.
100      */
101     private boolean _xhtml;
102 
103 
104     public static final String   XHTMLNamespace = "http://www.w3.org/1999/xhtml";
105 
106     // for users to override XHTMLNamespace if need be.
107     private String   fUserXHTMLNamespace = null;
108 
109 
110     /**
111      * Constructs a new HTML/XHTML serializer depending on the value of
112      * <tt>xhtml</tt>. The serializer cannot be used without calling
113      * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
114      *
115      * @param xhtml True if XHTML serializing
116      */
117     protected HTMLSerializer( boolean xhtml, OutputFormat format )
118     {
119         super( format );
120         _xhtml = xhtml;
121     }
122 
123 
124     /**
125      * Constructs a new serializer. The serializer cannot be used without
126      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
127      * first.
128      */
129     public HTMLSerializer()
130     {
131         this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
132     }
133 
134 
135     /**
136      * Constructs a new serializer. The serializer cannot be used without
137      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
138      * first.
139      */
140     public HTMLSerializer( OutputFormat format )
141     {
142         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
143     }
144 
145 
146 
147     /**
148      * Constructs a new serializer that writes to the specified writer
149      * using the specified output format. If <tt>format</tt> is null,
150      * will use a default output format.
151      *
152      * @param writer The writer to use
153      * @param format The output format to use, null for the default
154      */
155     public HTMLSerializer( Writer   writer, OutputFormat format )
156     {
157         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
158         setOutputCharStream( writer );
159     }
160 
161 
162     /**
163      * Constructs a new serializer that writes to the specified output
164      * stream using the specified output format. If <tt>format</tt>
165      * is null, will use a default output format.
166      *
167      * @param output The output stream to use
168      * @param format The output format to use, null for the default
169      */
170     public HTMLSerializer( OutputStream   output, OutputFormat format )
171     {
172         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
173         setOutputByteStream( output );
174     }
175 
176 
177     public void setOutputFormat( OutputFormat format )
178     {
179         super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
180     }
181 
182     // Set  value for alternate XHTML namespace.
183     public void setXHTMLNamespace(String   newNamespace) {
184         fUserXHTMLNamespace = newNamespace;
185     } // setXHTMLNamespace(String)
186 
187     //-----------------------------------------//
188     // SAX content handler serializing methods //
189     //-----------------------------------------//
190 
191 
192     public void startElement( String   namespaceURI, String   localName,
193                               String   rawName, Attributes   attrs )
194         throws SAXException  
195     {
196         int          i;
197         boolean      preserveSpace;
198         ElementState state;
199         String         name;
200         String         value;
201         String         htmlName;
202         boolean      addNSAttr = false;
203 
204         try {
205             if ( _printer == null )
206                 throw new IllegalStateException  ( 
207                     DOMMessageFormatter.formatMessage(
208                     DOMMessageFormatter.SERIALIZER_DOMAIN,
209                     "NoWriterSupplied", null));
210 
211             state = getElementState();
212             if ( isDocumentState() ) {
213                 // If this is the root element handle it differently.
214                 // If the first root element in the document, serialize
215                 // the document's DOCTYPE. Space preserving defaults
216                 // to that of the output format.
217                 if ( ! _started )
218                     startDocument( (localName == null || localName.length() == 0) 
219                         ? rawName : localName );
220             } else {
221                 // For any other element, if first in parent, then
222                 // close parent's opening tag and use the parnet's
223                 // space preserving.
224                 if ( state.empty )
225                     _printer.printText( '>' );
226                 // Indent this element on a new line if the first
227                 // content of the parent element or immediately
228                 // following an element.
229                 if ( _indenting && ! state.preserveSpace &&
230                      ( state.empty || state.afterElement ) )
231                     _printer.breakLine();
232             }
233             preserveSpace = state.preserveSpace;
234 
235             // Do not change the current element state yet.
236             // This only happens in endElement().
237             
238             // As per SAX2, the namespace URI is an empty string if the element has no
239             // namespace URI, or namespaces is turned off. The check against null protects
240             // against broken SAX implementations, so I've left it there. - mrglavas
241             boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0);
242 
243             // SAX2: rawName (QName) could be empty string if 
244             // namespace-prefixes property is false.
245             if ( rawName == null || rawName.length() == 0) {
246                 rawName = localName;
247                 if ( hasNamespaceURI ) {
248                     String   prefix;
249                     prefix = getPrefix( namespaceURI );
250                     if ( prefix != null && prefix.length() != 0 )
251                         rawName = prefix + ":" + localName;
252                 }
253                 addNSAttr = true;
254             }
255             if ( !hasNamespaceURI )
256                 htmlName = rawName;
257             else {
258                 if ( namespaceURI.equals( XHTMLNamespace ) ||
259                         (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) )
260                     htmlName = localName;
261                 else
262                     htmlName = null;
263             }
264 
265             // XHTML: element names are lower case, DOM will be different
266             _printer.printText( '<' );
267             if ( _xhtml )
268                 _printer.printText( rawName.toLowerCase(Locale.ENGLISH) );
269             else
270                 _printer.printText( rawName );
271             _printer.indent();
272 
273             // For each attribute serialize it's name and value as one part,
274             // separated with a space so the element can be broken on
275             // multiple lines.
276             if ( attrs != null ) {
277                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
278                     _printer.printSpace();
279                     name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH);
280                     value = attrs.getValue( i );
281                     if ( _xhtml || hasNamespaceURI ) {
282                         // XHTML: print empty string for null values.
283                         if ( value == null ) {
284                             _printer.printText( name );
285                             _printer.printText( "=\"\"" );
286                         } else {
287                             _printer.printText( name );
288                             _printer.printText( "=\"" );
289                             printEscaped( value );
290                             _printer.printText( '"' );
291                         }
292                     } else {
293                         // HTML: Empty values print as attribute name, no value.
294                         // HTML: URI attributes will print unescaped
295                         if ( value == null ) {
296                             value = "";
297                         }
298                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
299                             _printer.printText( name );
300                         else if ( HTMLdtd.isURI( rawName, name ) ) {
301                             _printer.printText( name );
302                             _printer.printText( "=\"" );
303                             _printer.printText( escapeURI( value ) );
304                             _printer.printText( '"' );
305                         } else if ( HTMLdtd.isBoolean( rawName, name ) )
306                             _printer.printText( name );
307                         else {
308                             _printer.printText( name );
309                             _printer.printText( "=\"" );
310                             printEscaped( value );
311                             _printer.printText( '"' );
312                         }
313                     }
314                 }
315             }
316             if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) )
317                 preserveSpace = true;
318 
319             if ( addNSAttr ) {
320                 Enumeration   keys;
321 
322                 keys = _prefixes.keys();
323                 while ( keys.hasMoreElements() ) {
324                     _printer.printSpace();
325                     value = (String  ) keys.nextElement();
326                     name = (String  ) _prefixes.get( value );
327                     if ( name.length() == 0 ) {
328                         _printer.printText( "xmlns=\"" );
329                         printEscaped( value );
330                         _printer.printText( '"' );
331                     } else {
332                         _printer.printText( "xmlns:" );
333                         _printer.printText( name );
334                         _printer.printText( "=\"" );
335                         printEscaped( value );
336                         _printer.printText( '"' );
337                     }
338                 }
339             }
340 
341             // Now it's time to enter a new element state
342             // with the tag name and space preserving.
343             // We still do not change the curent element state.
344             state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
345 
346             // Prevents line breaks inside A/TD
347 
348             if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) ||
349                                        htmlName.equalsIgnoreCase( "TD" ) ) ) {
350                 state.empty = false;
351                 _printer.printText( '>' );
352             }
353 
354             // Handle SCRIPT and STYLE specifically by changing the
355             // state of the current element to CDATA (XHTML) or
356             // unescaped (HTML).
357             if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) ||
358                                        rawName.equalsIgnoreCase( "STYLE" ) ) ) {
359                 if ( _xhtml ) {
360                     // XHTML: Print contents as CDATA section
361                     state.doCData = true;
362                 } else {
363                     // HTML: Print contents unescaped
364                     state.unescaped = true;
365                 }
366             }
367         } catch ( IOException   except ) {
368             throw new SAXException  ( except );
369         }
370     }
371 
372 
373     public void endElement( String   namespaceURI, String   localName,
374                             String   rawName )
375         throws SAXException  
376     {
377         try {
378             endElementIO( namespaceURI, localName, rawName );
379         } catch ( IOException   except ) {
380             throw new SAXException  ( except );
381         }
382     }
383 
384 
385     public void endElementIO( String   namespaceURI, String   localName,
386                               String   rawName )
387         throws IOException  
388     {
389         ElementState state;
390         String         htmlName;
391 
392         // Works much like content() with additions for closing
393         // an element. Note the different checks for the closed
394         // element's state and the parent element's state.
395         _printer.unindent();
396         state = getElementState();
397 
398         if ( state.namespaceURI == null || state.namespaceURI.length() == 0 )
399             htmlName = state.rawName;
400         else {
401             if ( state.namespaceURI.equals( XHTMLNamespace ) ||
402                         (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(state.namespaceURI)) )
403                 htmlName = state.localName;
404             else
405                 htmlName = null;
406         }
407 
408         if ( _xhtml) {
409             if ( state.empty ) {
410                 _printer.printText( " />" );
411             } else {
412                 // Must leave CData section first
413                 if ( state.inCData )
414                     _printer.printText( "]]>" );
415                 // XHTML: element names are lower case, DOM will be different
416                 _printer.printText( "</" );
417                 _printer.printText( state.rawName.toLowerCase(Locale.ENGLISH) );
418                 _printer.printText( '>' );
419             }
420         } else {
421             if ( state.empty )
422                 _printer.printText( '>' );
423             // This element is not empty and that last content was
424             // another element, so print a line break before that
425             // last element and this element's closing tag.
426             // [keith] Provided this is not an anchor.
427             // HTML: some elements do not print closing tag (e.g. LI)
428             if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) {
429                 if ( _indenting && ! state.preserveSpace && state.afterElement )
430                     _printer.breakLine();
431                 // Must leave CData section first (Illegal in HTML, but still)
432                 if ( state.inCData )
433                     _printer.printText( "]]>" );
434                 _printer.printText( "</" );
435                 _printer.printText( state.rawName );
436                 _printer.printText( '>' );
437             }
438         }
439         // Leave the element state and update that of the parent
440         // (if we're not root) to not empty and after element.
441         state = leaveElementState();
442         // Temporary hack to prevent line breaks inside A/TD
443         if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) &&
444                                    ! htmlName.equalsIgnoreCase( "TD" ) ) )
445 
446             state.afterElement = true;
447         state.empty = false;
448         if ( isDocumentState() )
449             _printer.flush();
450     }
451 
452 
453     //------------------------------------------//
454     // SAX document handler serializing methods //
455     //------------------------------------------//
456 
457 
458     public void characters( char[] chars, int start, int length )
459         throws SAXException  
460     {
461         ElementState state;
462 
463         try {
464             // HTML: no CDATA section
465             state = content();
466             state.doCData = false;
467             super.characters( chars, start, length );
468         } catch ( IOException   except ) {
469             throw new SAXException  ( except );
470         }
471     }
472 
473 
474     public void startElement( String   tagName, AttributeList   attrs )
475         throws SAXException  
476     {
477         int          i;
478         boolean      preserveSpace;
479         ElementState state;
480         String         name;
481         String         value;
482 
483         try {
484             if ( _printer == null )
485                 throw new IllegalStateException  ( 
486                     DOMMessageFormatter.formatMessage(
487                     DOMMessageFormatter.SERIALIZER_DOMAIN,
488                     "NoWriterSupplied", null));
489 
490 
491             state = getElementState();
492             if ( isDocumentState() ) {
493                 // If this is the root element handle it differently.
494                 // If the first root element in the document, serialize
495                 // the document's DOCTYPE. Space preserving defaults
496                 // to that of the output format.
497                 if ( ! _started )
498                     startDocument( tagName );
499             } else {
500                 // For any other element, if first in parent, then
501                 // close parent's opening tag and use the parnet's
502                 // space preserving.
503                 if ( state.empty )
504                     _printer.printText( '>' );
505                 // Indent this element on a new line if the first
506                 // content of the parent element or immediately
507                 // following an element.
508                 if ( _indenting && ! state.preserveSpace &&
509                      ( state.empty || state.afterElement ) )
510                     _printer.breakLine();
511             }
512             preserveSpace = state.preserveSpace;
513 
514             // Do not change the current element state yet.
515             // This only happens in endElement().
516 
517             // XHTML: element names are lower case, DOM will be different
518             _printer.printText( '<' );
519             if ( _xhtml )
520                 _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
521             else
522                 _printer.printText( tagName );
523             _printer.indent();
524 
525             // For each attribute serialize it's name and value as one part,
526             // separated with a space so the element can be broken on
527             // multiple lines.
528             if ( attrs != null ) {
529                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
530                     _printer.printSpace();
531                     name = attrs.getName( i ).toLowerCase(Locale.ENGLISH);
532                     value = attrs.getValue( i );
533                     if ( _xhtml ) {
534                         // XHTML: print empty string for null values.
535                         if ( value == null ) {
536                             _printer.printText( name );
537                             _printer.printText( "=\"\"" );
538                         } else {
539                             _printer.printText( name );
540                             _printer.printText( "=\"" );
541                             printEscaped( value );
542                             _printer.printText( '"' );
543                         }
544                     } else {
545                         // HTML: Empty values print as attribute name, no value.
546                         // HTML: URI attributes will print unescaped
547                         if ( value == null ) {
548                             value = "";
549                         }
550                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
551                             _printer.printText( name );
552                         else if ( HTMLdtd.isURI( tagName, name ) ) {
553                             _printer.printText( name );
554                             _printer.printText( "=\"" );
555                             _printer.printText( escapeURI( value ) );
556                             _printer.printText( '"' );
557                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
558                             _printer.printText( name );
559                         else {
560                             _printer.printText( name );
561                             _printer.printText( "=\"" );
562                             printEscaped( value );
563                             _printer.printText( '"' );
564                         }
565                     }
566                 }
567             }
568             if ( HTMLdtd.isPreserveSpace( tagName ) )
569                 preserveSpace = true;
570 
571             // Now it's time to enter a new element state
572             // with the tag name and space preserving.
573             // We still do not change the curent element state.
574             state = enterElementState( null, null, tagName, preserveSpace );
575 
576             // Prevents line breaks inside A/TD
577             if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
578                 state.empty = false;
579                 _printer.printText( '>' );
580             }
581 
582             // Handle SCRIPT and STYLE specifically by changing the
583             // state of the current element to CDATA (XHTML) or
584             // unescaped (HTML).
585             if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
586                  tagName.equalsIgnoreCase( "STYLE" ) ) {
587                 if ( _xhtml ) {
588                     // XHTML: Print contents as CDATA section
589                     state.doCData = true;
590                 } else {
591                     // HTML: Print contents unescaped
592                     state.unescaped = true;
593                 }
594             }
595         } catch ( IOException   except ) {
596             throw new SAXException  ( except );
597         }
598     }
599 
600 
601     public void endElement( String   tagName )
602         throws SAXException  
603     {
604         endElement( null, null, tagName );
605     }
606 
607 
608     //------------------------------------------//
609     // Generic node serializing methods methods //
610     //------------------------------------------//
611 
612 
613     /**
614      * Called to serialize the document's DOCTYPE by the root element.
615      * The document type declaration must name the root element,
616      * but the root element is only known when that element is serialized,
617      * and not at the start of the document.
618      * <p>
619      * This method will check if it has not been called before ({@link #_started}),
620      * will serialize the document type declaration, and will serialize all
621      * pre-root comments and PIs that were accumulated in the document
622      * (see {@link #serializePreRoot}). Pre-root will be serialized even if
623      * this is not the first root element of the document.
624      */
625     protected void startDocument( String   rootTagName )
626         throws IOException  
627     {
628         StringBuffer   buffer;
629 
630         // Not supported in HTML/XHTML, but we still have to switch
631         // out of DTD mode.
632         _printer.leaveDTD();
633         if ( ! _started ) {
634             // If the public and system identifiers were not specified
635             // in the output format, use the appropriate ones for HTML
636             // or XHTML.
637             if ( _docTypePublicId == null && _docTypeSystemId == null ) {
638                 if ( _xhtml ) {
639                     _docTypePublicId = HTMLdtd.XHTMLPublicId;
640                     _docTypeSystemId = HTMLdtd.XHTMLSystemId;
641                 } else {
642                     _docTypePublicId = HTMLdtd.HTMLPublicId;
643                     _docTypeSystemId = HTMLdtd.HTMLSystemId;
644                 }
645             }
646 
647             if ( ! _format.getOmitDocumentType() ) {
648                 // XHTML: If public identifier and system identifier
649                 //  specified, print them, else print just system identifier
650                 // HTML: If public identifier specified, print it with
651                 //  system identifier, if specified.
652                 // XHTML requires that all element names are lower case, so the
653                 // root on the DOCTYPE must be 'html'. - mrglavas
654                 if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null )  ) {
655                     if (_xhtml) {
656                         _printer.printText( "<!DOCTYPE html PUBLIC " );
657                     }
658                     else {
659                         _printer.printText( "<!DOCTYPE HTML PUBLIC " );
660                     }
661                     printDoctypeURL( _docTypePublicId );
662                     if ( _docTypeSystemId != null ) {
663                         if ( _indenting ) {
664                             _printer.breakLine();
665                             _printer.printText( "                      " );
666                         } else
667                         _printer.printText( ' ' );
668                         printDoctypeURL( _docTypeSystemId );
669                     }
670                     _printer.printText( '>' );
671                     _printer.breakLine();
672                 } else if ( _docTypeSystemId != null ) {
673                     if (_xhtml) {
674                         _printer.printText( "<!DOCTYPE html SYSTEM " );
675                     }
676                     else {
677                         _printer.printText( "<!DOCTYPE HTML SYSTEM " );
678                     }
679                     printDoctypeURL( _docTypeSystemId );
680                     _printer.printText( '>' );
681                     _printer.breakLine();
682                 }
683             }
684         }
685 
686         _started = true;
687         // Always serialize these, even if not te first root element.
688         serializePreRoot();
689     }
690 
691 
692     /**
693      * Called to serialize a DOM element. Equivalent to calling {@link
694      * #startElement}, {@link #endElement} and serializing everything
695      * inbetween, but better optimized.
696      */
697     protected void serializeElement( Element   elem )
698         throws IOException  
699     {
700         Attr           attr;
701         NamedNodeMap   attrMap;
702         int          i;
703         Node           child;
704         ElementState state;
705         boolean      preserveSpace;
706         String         name;
707         String         value;
708         String         tagName;
709 
710         tagName = elem.getTagName();
711         state = getElementState();
712         if ( isDocumentState() ) {
713             // If this is the root element handle it differently.
714             // If the first root element in the document, serialize
715             // the document's DOCTYPE. Space preserving defaults
716             // to that of the output format.
717             if ( ! _started )
718                 startDocument( tagName );
719         } else {
720             // For any other element, if first in parent, then
721             // close parent's opening tag and use the parnet's
722             // space preserving.
723             if ( state.empty )
724                 _printer.printText( '>' );
725             // Indent this element on a new line if the first
726             // content of the parent element or immediately
727             // following an element.
728             if ( _indenting && ! state.preserveSpace &&
729                  ( state.empty || state.afterElement ) )
730                 _printer.breakLine();
731         }
732         preserveSpace = state.preserveSpace;
733 
734         // Do not change the current element state yet.
735         // This only happens in endElement().
736 
737         // XHTML: element names are lower case, DOM will be different
738         _printer.printText( '<' );
739         if ( _xhtml )
740             _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
741         else
742             _printer.printText( tagName );
743         _printer.indent();
744 
745         // Lookup the element's attribute, but only print specified
746         // attributes. (Unspecified attributes are derived from the DTD.
747         // For each attribute print it's name and value as one part,
748         // separated with a space so the element can be broken on
749         // multiple lines.
750         attrMap = elem.getAttributes();
751         if ( attrMap != null ) {
752             for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
753                 attr = (Attr  ) attrMap.item( i );
754                 name = attr.getName().toLowerCase(Locale.ENGLISH);
755                 value = attr.getValue();
756                 if ( attr.getSpecified() ) {
757                     _printer.printSpace();
758                     if ( _xhtml ) {
759                         // XHTML: print empty string for null values.
760                         if ( value == null ) {
761                             _printer.printText( name );
762                             _printer.printText( "=\"\"" );
763                         } else {
764                             _printer.printText( name );
765                             _printer.printText( "=\"" );
766                             printEscaped( value );
767                             _printer.printText( '"' );
768                         }
769                     } else {
770                         // HTML: Empty values print as attribute name, no value.
771                         // HTML: URI attributes will print unescaped
772                         if ( value == null ) {
773                             value = "";
774                         }
775                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
776                             _printer.printText( name );
777                         else if ( HTMLdtd.isURI( tagName, name ) ) {
778                             _printer.printText( name );
779                             _printer.printText( "=\"" );
780                             _printer.printText( escapeURI( value ) );
781                             _printer.printText( '"' );
782                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
783                             _printer.printText( name );
784                         else {
785                             _printer.printText( name );
786                             _printer.printText( "=\"" );
787                             printEscaped( value );
788                             _printer.printText( '"' );
789                         }
790                     }
791                 }
792             }
793         }
794         if ( HTMLdtd.isPreserveSpace( tagName ) )
795             preserveSpace = true;
796 
797         // If element has children, or if element is not an empty tag,
798         // serialize an opening tag.
799         if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) {
800             // Enter an element state, and serialize the children
801             // one by one. Finally, end the element.
802             state = enterElementState( null, null, tagName, preserveSpace );
803 
804             // Prevents line breaks inside A/TD
805             if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
806                 state.empty = false;
807                 _printer.printText( '>' );
808             }
809 
810             // Handle SCRIPT and STYLE specifically by changing the
811             // state of the current element to CDATA (XHTML) or
812             // unescaped (HTML).
813             if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
814                  tagName.equalsIgnoreCase( "STYLE" ) ) {
815                 if ( _xhtml ) {
816                     // XHTML: Print contents as CDATA section
817                     state.doCData = true;
818                 } else {
819                     // HTML: Print contents unescaped
820                     state.unescaped = true;
821                 }
822             }
823             child = elem.getFirstChild();
824             while ( child != null ) {
825                 serializeNode( child );
826                 child = child.getNextSibling();
827             }
828             endElementIO( null, null, tagName );
829         } else {
830             _printer.unindent();
831             // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
832             // HTML: Empty tags are defined as such in DTD no in document.
833             if ( _xhtml )
834                 _printer.printText( " />" );
835             else
836                 _printer.printText( '>' );
837             // After element but parent element is no longer empty.
838             state.afterElement = true;
839             state.empty = false;
840             if ( isDocumentState() )
841                 _printer.flush();
842         }
843     }
844 
845 
846 
847     protected void characters( String   text )
848         throws IOException  
849     {
850         ElementState state;
851 
852         // HTML: no CDATA section
853         state = content();
854         super.characters( text );
855     }
856 
857 
858     protected String   getEntityRef( int ch )
859     {
860         return HTMLdtd.fromChar( ch );
861     }
862 
863 
864     protected String   escapeURI( String   uri )
865     {
866         int index;
867 
868         // XXX  Apparently Netscape doesn't like if we escape the URI
869         //      using %nn, so we leave it as is, just remove any quotes.
870         index = uri.indexOf( "\"" );
871         if ( index >= 0 )
872             return uri.substring( 0, index );
873         else
874             return uri;
875     }
876 
877 
878 }
879 
880 
881 
882 
883
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags