HTMLSerializer


1   /*
2    * The Apache Software License, Version 1.1
3    *
4    *
5    * Copyright (c) 1999 The Apache Software Foundation.  All rights
6    * reserved.
7    *
8    * Redistribution and use in source and binary forms, with or without
9    * modification, are permitted provided that the following conditions
10   * are met:
11   *
12   * 1. Redistributions of source code must retain the above copyright
13   *    notice, this list of conditions and the following disclaimer.
14   *
15   * 2. Redistributions in binary form must reproduce the above copyright
16   *    notice, this list of conditions and the following disclaimer in
17   *    the documentation and/or other materials provided with the
18   *    distribution.
19   *
20   * 3. The end-user documentation included with the redistribution,
21   *    if any, must include the following acknowledgment:
22   *       "This product includes software developed by the
23   *        Apache Software Foundation (http://www.apache.org/)."
24   *    Alternately, this acknowledgment may appear in the software itself,
25   *    if and wherever such third-party acknowledgments normally appear.
26   *
27   * 4. The names "Xerces" and "Apache Software Foundation" must
28   *    not be used to endorse or promote products derived from this
29   *    software without prior written permission. For written
30   *    permission, please contact apache@apache.org.
31   *
32   * 5. Products derived from this software may not be called "Apache",
33   *    nor may "Apache" appear in their name, without prior written
34   *    permission of the Apache Software Foundation.
35   *
36   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47   * SUCH DAMAGE.
48   * ====================================================================
49   *
50   * This software consists of voluntary contributions made by many
51   * individuals on behalf of the Apache Software Foundation and was
52   * originally based on software copyright (c) 1999, International
53   * Business Machines, Inc., http://www.apache.org.  For more
54   * information on the Apache Software Foundation, please see
55   * <http://www.apache.org/>.
56   */
57  
58  
59  // Sep 14, 2000:
60  //  Fixed serializer to report IO exception directly, instead at
61  //  the end of document processing.
62  //  Reported by Patrick Higgins <phiggins@transzap.com>
63  // Aug 21, 2000:
64  //  Fixed bug in startDocument not calling prepare.
65  //  Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
66  // Aug 21, 2000:
67  //  Added ability to omit DOCTYPE declaration.
68  // Sep 1, 2000:
69  //   If no output format is provided the serializer now defaults
70  //   to ISO-8859-1 encoding. Reported by Mikael Staldal
71  //   <d96-mst@d.kth.se>
72  
73  
74  package org.enhydra.apache.xml.serialize;
75  
76  
77  import java.io.IOException  ;
78  import java.io.OutputStream  ;
79  import java.io.Writer  ;
80  import java.util.Enumeration  ;
81  
82  import org.w3c.dom.Attr  ;
83  import org.w3c.dom.Element  ;
84  import org.w3c.dom.NamedNodeMap  ;
85  import org.w3c.dom.Node  ;
86  import org.xml.sax.AttributeList  ;
87  import org.xml.sax.Attributes  ;
88  import org.xml.sax.SAXException  ;
89  
90  
91  /**
92   * Implements an HTML/XHTML serializer supporting both DOM and SAX
93   * pretty serializing. HTML/XHTML mode is determined in the
94   * constructor.  For usage instructions see {@link Serializer}.
95   * <p>
96   * If an output stream is used, the encoding is taken from the
97   * output format (defaults to <tt>UTF-8</tt>). If a writer is
98   * used, make sure the writer uses the same encoding (if applies)
99   * as specified in the output format.
100  * <p>
101  * The serializer supports both DOM and SAX. DOM serializing is done
102  * by calling {@link #serialize} and SAX serializing is done by firing
103  * SAX events and using the serializer as a document handler.
104  * <p>
105  * If an I/O exception occurs while serializing, the serializer
106  * will not throw an exception directly, but only throw it
107  * at the end of serializing (either DOM or SAX's {@link
108  * org.xml.sax.DocumentHandler#endDocument}.
109  * <p>
110  * For elements that are not specified as whitespace preserving,
111  * the serializer will potentially break long text lines at space
112  * boundaries, indent lines, and serialize elements on separate
113  * lines. Line terminators will be regarded as spaces, and
114  * spaces at beginning of line will be stripped.
115  * <p>
116  * XHTML is slightly different than HTML:
117  * <ul>
118  * <li>Element/attribute names are lower case and case matters
119  * <li>Attributes must specify value, even if empty string
120  * <li>Empty elements must have '/' in empty tag
121  * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
122  * </ul>
123  *
124  *
125  * @version $Revision: 1.2 $ $Date: 2005/01/26 08:28:45 $
126  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
127  * @see Serializer
128  */
129 public class HTMLSerializer
130     extends BaseMarkupSerializer
131 {
132 
133 
134     /**
135      * True if serializing in XHTML format.
136      */
137     private static boolean _xhtml;
138 
139 
140     public static String   XHTMLNamespace = "";
141 
142 
143 
144 
145     /**
146      * Constructs a new HTML/XHTML serializer depending on the value of
147      * <tt>xhtml</tt>. The serializer cannot be used without calling
148      * {@link #init} first.
149      *
150      * @param xhtml True if XHTML serializing
151      */
152     protected HTMLSerializer( boolean xhtml, OutputFormat format )
153     {
154         super( format );
155         _xhtml = xhtml;
156     }
157 
158 
159     /**
160      * Constructs a new serializer. The serializer cannot be used without
161      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
162      * first.
163      */
164     public HTMLSerializer()
165     {
166         this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
167     }
168 
169 
170     /**
171      * Constructs a new serializer. The serializer cannot be used without
172      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
173      * first.
174      */
175     public HTMLSerializer( OutputFormat format )
176     {
177         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
178     }
179 
180 
181 
182     /**
183      * Constructs a new serializer that writes to the specified writer
184      * using the specified output format. If <tt>format</tt> is null,
185      * will use a default output format.
186      *
187      * @param writer The writer to use
188      * @param format The output format to use, null for the default
189      */
190     public HTMLSerializer( Writer   writer, OutputFormat format )
191     {
192         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
193         setOutputCharStream( writer );
194     }
195 
196 
197     /**
198      * Constructs a new serializer that writes to the specified output
199      * stream using the specified output format. If <tt>format</tt>
200      * is null, will use a default output format.
201      *
202      * @param output The output stream to use
203      * @param format The output format to use, null for the default
204      */
205     public HTMLSerializer( OutputStream   output, OutputFormat format )
206     {
207         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
208         setOutputByteStream( output );
209     }
210 
211 
212     public void setOutputFormat( OutputFormat format )
213     {
214         super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
215     }
216 
217 
218     //-----------------------------------------//
219     // SAX content handler serializing methods //
220     //-----------------------------------------//
221 
222 
223     public void startElement( String   namespaceURI, String   localName,
224                               String   rawName, Attributes   attrs )
225         throws SAXException  
226     {
227         int          i;
228         boolean      preserveSpace;
229         ElementState state;
230         String         name;
231         String         value;
232         String         htmlName;
233         boolean      addNSAttr = false;
234 
235         try {
236             if ( _printer == null )
237                 throw new IllegalStateException  ( "SER002 No writer supplied for serializer" );
238 
239             state = getElementState();
240             if ( isDocumentState() ) {
241                 // If this is the root element handle it differently.
242                 // If the first root element in the document, serialize
243                 // the document's DOCTYPE. Space preserving defaults
244                 // to that of the output format.
245                 if ( ! _started )
246                     startDocument( localName == null ? rawName : localName );
247             } else {
248                 // For any other element, if first in parent, then
249                 // close parent's opening tag and use the parnet's
250                 // space preserving.
251                 if ( state.empty )
252                     _printer.printText( '>' );
253                 // Indent this element on a new line if the first
254                 // content of the parent element or immediately
255                 // following an element.
256                 if ( _indenting && ! state.preserveSpace &&
257                      ( state.empty || state.afterElement ) )
258                     _printer.breakLine();
259             }
260             preserveSpace = state.preserveSpace;
261 
262             // Do not change the current element state yet.
263             // This only happens in endElement().
264 
265             if ( rawName == null ) {
266                 rawName = localName;
267                 if ( namespaceURI != null ) {
268                     String   prefix;
269                     prefix = getPrefix( namespaceURI );
270                     if ( prefix.length() > 0 )
271                         rawName = prefix + ":" + localName;
272                 }
273                 addNSAttr = true;
274             }
275             if ( namespaceURI == null )
276                 htmlName = rawName;
277             else {
278                 if ( namespaceURI.equals( XHTMLNamespace ) )
279                     htmlName = localName;
280                 else
281                     htmlName = null;
282             }
283 
284             // XHTML: element names are lower case, DOM will be different
285             _printer.printText( '<' );
286             if ( _xhtml )
287                 _printer.printText( rawName.toLowerCase() );
288             else
289                 _printer.printText( rawName );
290             _printer.indent();
291 
292             // For each attribute serialize it's name and value as one part,
293             // separated with a space so the element can be broken on
294             // multiple lines.
295             if ( attrs != null ) {
296                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
297                     _printer.printSpace();
298                     name = attrs.getQName( i ).toLowerCase();;
299                     value = attrs.getValue( i );
300                     if ( _xhtml || namespaceURI != null ) {
301                         // XHTML: print empty string for null values.
302                         if ( value == null ) {
303                             _printer.printText( name );
304                             _printer.printText( "=\"\"" );
305                         } else {
306                             _printer.printText( name );
307                             _printer.printText( "=\"" );
308                             printEscaped( value );
309                             _printer.printText( '"' );
310                         }
311                     } else {
312                         // HTML: Empty values print as attribute name, no value.
313                         // HTML: URI attributes will print unescaped
314                         if ( value == null ) {
315                             value = "";
316                         }
317                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
318                             _printer.printText( name );
319                         else if ( HTMLdtd.isURI( rawName, name ) ) {
320                             _printer.printText( name );
321                             _printer.printText( "=\"" );
322                             _printer.printText( escapeURI( value ) );
323                             _printer.printText( '"' );
324                         } else if ( HTMLdtd.isBoolean( rawName, name ) )
325                             _printer.printText( name );
326                         else {
327                             _printer.printText( name );
328                             _printer.printText( "=\"" );
329                             printEscaped( value );
330                             _printer.printText( '"' );
331                         }
332                     }
333                 }
334             }
335             if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) )
336                 preserveSpace = true;
337 
338             if ( addNSAttr ) {
339                 Enumeration   enumer;
340 
341                 enumer = _prefixes.keys();
342                 while ( enumer.hasMoreElements() ) {
343                     _printer.printSpace();
344                     value = (String  ) enumer.nextElement();
345                     name = (String  ) _prefixes.get( value );
346                     if ( name.length() == 0 ) {
347                         _printer.printText( "xmlns=\"" );
348                         printEscaped( value );
349                         _printer.printText( '"' );
350                     } else {
351                         _printer.printText( "xmlns:" );
352                         _printer.printText( name );
353                         _printer.printText( "=\"" );
354                         printEscaped( value );
355                         _printer.printText( '"' );
356                     }
357                 }
358             }
359 
360             // Now it's time to enter a new element state
361             // with the tag name and space preserving.
362             // We still do not change the curent element state.
363             state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
364 
365             // Prevents line breaks inside A/TD
366 
367             if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) ||
368                                        htmlName.equalsIgnoreCase( "TD" ) ) ) {
369                 state.empty = false;
370                 _printer.printText( '>' );
371             }
372 
373             // Handle SCRIPT and STYLE specifically by changing the
374             // state of the current element to CDATA (XHTML) or
375             // unescaped (HTML).
376             if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) ||
377                                        rawName.equalsIgnoreCase( "STYLE" ) ) ) {
378                 if ( _xhtml ) {
379                     // XHTML: Print contents as CDATA section
380                     state.doCData = true;
381                 } else {
382                     // HTML: Print contents unescaped
383                     state.unescaped = true;
384                 }
385             }
386         } catch ( IOException   except ) {
387             throw new SAXException  ( except );
388         }
389     }
390 
391 
392     public void endElement( String   namespaceURI, String   localName,
393                             String   rawName )
394         throws SAXException  
395     {
396         try {
397             endElementIO( namespaceURI, localName, rawName );
398         } catch ( IOException   except ) {
399             throw new SAXException  ( except );
400         }
401     }
402 
403 
404     public void endElementIO( String   namespaceURI, String   localName,
405                               String   rawName )
406         throws IOException  
407     {
408         ElementState state;
409         String         htmlName;
410 
411         // Works much like content() with additions for closing
412         // an element. Note the different checks for the closed
413         // element's state and the parent element's state.
414         _printer.unindent();
415         state = getElementState();
416 
417         if ( state.namespaceURI == null )
418             htmlName = state.rawName;
419         else {
420             if ( state.namespaceURI.equals( XHTMLNamespace ) )
421                 htmlName = state.localName;
422             else
423                 htmlName = null;
424         }
425 
426         if ( _xhtml) {
427             if ( state.empty ) {
428                 _printer.printText( " />" );
429             } else {
430                 // Must leave CData section first
431                 if ( state.inCData )
432                     _printer.printText( "]]>" );
433                 // XHTML: element names are lower case, DOM will be different
434                 _printer.printText( "</" );
435                 _printer.printText( state.rawName.toLowerCase() );
436                 _printer.printText( '>' );
437             }
438         } else {
439             if ( state.empty )
440                 _printer.printText( '>' );
441             // This element is not empty and that last content was
442             // another element, so print a line break before that
443             // last element and this element's closing tag.
444             // [keith] Provided this is not an anchor.
445             // HTML: some elements do not print closing tag (e.g. LI)
446             if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) {
447                 if ( _indenting && ! state.preserveSpace && state.afterElement )
448                     _printer.breakLine();
449                 // Must leave CData section first (Illegal in HTML, but still)
450                 if ( state.inCData )
451                     _printer.printText( "]]>" );
452                 _printer.printText( "</" );
453                 _printer.printText( state.rawName );
454                 _printer.printText( '>' );
455             }
456         }
457         // Leave the element state and update that of the parent
458         // (if we're not root) to not empty and after element.
459         state = leaveElementState();
460         // Temporary hack to prevent line breaks inside A/TD
461         if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) &&
462                                    ! htmlName.equalsIgnoreCase( "TD" ) ) )
463 
464             state.afterElement = true;
465         state.empty = false;
466         if ( isDocumentState() )
467             _printer.flush();
468     }
469 
470 
471     //------------------------------------------//
472     // SAX document handler serializing methods //
473     //------------------------------------------//
474 
475 
476     public void characters( char[] chars, int start, int length )
477         throws SAXException  
478     {
479         ElementState state;
480 
481         try {
482             // HTML: no CDATA section
483             state = content();
484             state.doCData = false;
485             super.characters( chars, start, length );
486         } catch ( IOException   except ) {
487             throw new SAXException  ( except );
488         }
489     }
490 
491 
492     public void startElement( String   tagName, AttributeList   attrs )
493         throws SAXException  
494     {
495         int          i;
496         boolean      preserveSpace;
497         ElementState state;
498         String         name;
499         String         value;
500 
501         try {
502             if ( _printer == null )
503                 throw new IllegalStateException  ( "SER002 No writer supplied for serializer" );
504 
505             state = getElementState();
506             if ( isDocumentState() ) {
507                 // If this is the root element handle it differently.
508                 // If the first root element in the document, serialize
509                 // the document's DOCTYPE. Space preserving defaults
510                 // to that of the output format.
511                 if ( ! _started )
512                     startDocument( tagName );
513             } else {
514                 // For any other element, if first in parent, then
515                 // close parent's opening tag and use the parnet's
516                 // space preserving.
517                 if ( state.empty )
518                     _printer.printText( '>' );
519                 // Indent this element on a new line if the first
520                 // content of the parent element or immediately
521                 // following an element.
522                 if ( _indenting && ! state.preserveSpace &&
523                      ( state.empty || state.afterElement ) )
524                     _printer.breakLine();
525             }
526             preserveSpace = state.preserveSpace;
527 
528             // Do not change the current element state yet.
529             // This only happens in endElement().
530 
531             // XHTML: element names are lower case, DOM will be different
532             _printer.printText( '<' );
533             if ( _xhtml )
534                 _printer.printText( tagName.toLowerCase() );
535             else
536                 _printer.printText( tagName );
537             _printer.indent();
538 
539             // For each attribute serialize it's name and value as one part,
540             // separated with a space so the element can be broken on
541             // multiple lines.
542             if ( attrs != null ) {
543                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
544                     _printer.printSpace();
545                     name = attrs.getName( i ).toLowerCase();;
546                     value = attrs.getValue( i );
547                     if ( _xhtml ) {
548                         // XHTML: print empty string for null values.
549                         if ( value == null ) {
550                             _printer.printText( name );
551                             _printer.printText( "=\"\"" );
552                         } else {
553                             _printer.printText( name );
554                             _printer.printText( "=\"" );
555                             printEscaped( value );
556                             _printer.printText( '"' );
557                         }
558                     } else {
559                         // HTML: Empty values print as attribute name, no value.
560                         // HTML: URI attributes will print unescaped
561                         if ( value == null ) {
562                             value = "";
563                         }
564                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
565                             _printer.printText( name );
566                         else if ( HTMLdtd.isURI( tagName, name ) ) {
567                             _printer.printText( name );
568                             _printer.printText( "=\"" );
569                             _printer.printText( escapeURI( value ) );
570                             _printer.printText( '"' );
571                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
572                             _printer.printText( name );
573                         else {
574                             _printer.printText( name );
575                             _printer.printText( "=\"" );
576                             printEscaped( value );
577                             _printer.printText( '"' );
578                         }
579                     }
580                 }
581             }
582             if ( HTMLdtd.isPreserveSpace( tagName ) )
583                 preserveSpace = true;
584 
585             // Now it's time to enter a new element state
586             // with the tag name and space preserving.
587             // We still do not change the curent element state.
588             state = enterElementState( null, null, tagName, preserveSpace );
589 
590             // Prevents line breaks inside A/TD
591             if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
592                 state.empty = false;
593                 _printer.printText( '>' );
594             }
595 
596             // Handle SCRIPT and STYLE specifically by changing the
597             // state of the current element to CDATA (XHTML) or
598             // unescaped (HTML).
599             if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
600                  tagName.equalsIgnoreCase( "STYLE" ) ) {
601                 if ( _xhtml ) {
602                     // XHTML: Print contents as CDATA section
603                     state.doCData = true;
604                 } else {
605                     // HTML: Print contents unescaped
606                     state.unescaped = true;
607                 }
608             }
609         } catch ( IOException   except ) {
610             throw new SAXException  ( except );
611         }
612     }
613 
614 
615     public void endElement( String   tagName )
616         throws SAXException  
617     {
618         endElement( null, null, tagName );
619     }
620 
621 
622     //------------------------------------------//
623     // Generic node serializing methods methods //
624     //------------------------------------------//
625 
626 
627     /**
628      * Called to serialize the document's DOCTYPE by the root element.
629      * The document type declaration must name the root element,
630      * but the root element is only known when that element is serialized,
631      * and not at the start of the document.
632      * <p>
633      * This method will check if it has not been called before ({@link #_started}),
634      * will serialize the document type declaration, and will serialize all
635      * pre-root comments and PIs that were accumulated in the document
636      * (see {@link #serializePreRoot}). Pre-root will be serialized even if
637      * this is not the first root element of the document.
638      */
639     protected void startDocument( String   rootTagName )
640         throws IOException  
641     {
642         StringBuffer   buffer;
643 
644         // Not supported in HTML/XHTML, but we still have to switch
645         // out of DTD mode.
646         _printer.leaveDTD();
647         if ( ! _started ) {
648             // If the public and system identifiers were not specified
649             // in the output format, use the appropriate ones for HTML
650             // or XHTML.
651             if ( _docTypePublicId == null && _docTypeSystemId == null ) {
652                 if ( _xhtml ) {
653                     _docTypePublicId = HTMLdtd.XHTMLPublicId;
654                     _docTypeSystemId = HTMLdtd.XHTMLSystemId;
655                 } else {
656                     _docTypePublicId = HTMLdtd.HTMLPublicId;
657                     _docTypeSystemId = HTMLdtd.HTMLSystemId;
658                 }
659             }
660 
661             if ( ! _format.getOmitDocumentType() ) {
662                 // XHTML: If public idnentifier and system identifier
663                 //  specified, print them, else print just system identifier
664                 // HTML: If public identifier specified, print it with
665                 //  system identifier, if specified.
666                 if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null )  ) {
667                     _printer.printText( "<!DOCTYPE HTML PUBLIC " );
668                     printDoctypeURL( _docTypePublicId );
669                     if ( _docTypeSystemId != null ) {
670                         if ( _indenting ) {
671                             _printer.breakLine();
672                             _printer.printText( "                      " );
673                         } else
674                         _printer.printText( ' ' );
675                         printDoctypeURL( _docTypeSystemId );
676                     }
677                     _printer.printText( '>' );
678                     _printer.breakLine();
679                 } else if ( _docTypeSystemId != null ) {
680                     _printer.printText( "<!DOCTYPE HTML SYSTEM " );
681                     printDoctypeURL( _docTypeSystemId );
682                     _printer.printText( '>' );
683                     _printer.breakLine();
684                 }
685             }
686         }
687 
688         _started = true;
689         // Always serialize these, even if not te first root element.
690         serializePreRoot();
691     }
692 
693 
694     /**
695      * Called to serialize a DOM element. Equivalent to calling {@link
696      * #startElement}, {@link #endElement} and serializing everything
697      * inbetween, but better optimized.
698      */
699     protected void serializeElement( Element   elem )
700         throws IOException  
701     {
702         Attr           attr;
703         NamedNodeMap   attrMap;
704         int          i;
705         Node           child;
706         ElementState state;
707         boolean      preserveSpace;
708         String         name;
709         String         value;
710         String         tagName;
711 
712         tagName = elem.getTagName();
713         state = getElementState();
714         if ( isDocumentState() ) {
715             // If this is the root element handle it differently.
716             // If the first root element in the document, serialize
717             // the document's DOCTYPE. Space preserving defaults
718             // to that of the output format.
719             if ( ! _started )
720                 startDocument( tagName );
721         } else {
722             // For any other element, if first in parent, then
723             // close parent's opening tag and use the parnet's
724             // space preserving.
725             if ( state.empty )
726                 _printer.printText( '>' );
727             // Indent this element on a new line if the first
728             // content of the parent element or immediately
729             // following an element.
730             if ( _indenting && ! state.preserveSpace &&
731                  ( state.empty || state.afterElement ) )
732                 _printer.breakLine();
733         }
734         preserveSpace = state.preserveSpace;
735 
736         // Do not change the current element state yet.
737         // This only happens in endElement().
738 
739         // XHTML: element names are lower case, DOM will be different
740         _printer.printText( '<' );
741         if ( _xhtml )
742             _printer.printText( tagName.toLowerCase() );
743         else
744             _printer.printText( tagName );
745         _printer.indent();
746 
747         // Lookup the element's attribute, but only print specified
748         // attributes. (Unspecified attributes are derived from the DTD.
749         // For each attribute print it's name and value as one part,
750         // separated with a space so the element can be broken on
751         // multiple lines.
752         attrMap = elem.getAttributes();
753         if ( attrMap != null ) {
754             for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
755                 attr = (Attr  ) attrMap.item( i );
756                 name = attr.getName().toLowerCase();
757                 value = attr.getValue();
758                 if ( attr.getSpecified() ) {
759                     _printer.printSpace();
760                     if ( _xhtml ) {
761                         // XHTML: print empty string for null values.
762                         if ( value == null ) {
763                             _printer.printText( name );
764                             _printer.printText( "=\"\"" );
765                         } else {
766                             _printer.printText( name );
767                             _printer.printText( "=\"" );
768                             printEscaped( value );
769                             _printer.printText( '"' );
770                         }
771                     } else {
772                         // HTML: Empty values print as attribute name, no value.
773                         // HTML: URI attributes will print unescaped
774                         if ( value == null ) {
775                             value = "";
776                         }
777                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
778                             _printer.printText( name );
779                         else if ( HTMLdtd.isURI( tagName, name ) ) {
780                             _printer.printText( name );
781                             _printer.printText( "=\"" );
782                             _printer.printText( escapeURI( value ) );
783                             _printer.printText( '"' );
784                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
785                             _printer.printText( name );
786                         else {
787                             _printer.printText( name );
788                             _printer.printText( "=\"" );
789                             printEscaped( value );
790                             _printer.printText( '"' );
791                         }
792                     }
793                 }
794             }
795         }
796         if ( HTMLdtd.isPreserveSpace( tagName ) )
797             preserveSpace = true;
798 
799         // If element has children, or if element is not an empty tag,
800         // serialize an opening tag.
801         if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) {
802             // Enter an element state, and serialize the children
803             // one by one. Finally, end the element.
804             state = enterElementState( null, null, tagName, preserveSpace );
805 
806             // Prevents line breaks inside A/TD
807             if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
808                 state.empty = false;
809                 _printer.printText( '>' );
810             }
811 
812             // Handle SCRIPT and STYLE specifically by changing the
813             // state of the current element to CDATA (XHTML) or
814             // unescaped (HTML).
815             if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
816                  tagName.equalsIgnoreCase( "STYLE" ) ) {
817                 if ( _xhtml ) {
818                     // XHTML: Print contents as CDATA section
819                     state.doCData = true;
820                 } else {
821                     // HTML: Print contents unescaped
822                     state.unescaped = true;
823                 }
824             }
825             child = elem.getFirstChild();
826             while ( child != null ) {
827                 serializeNode( child );
828                 child = child.getNextSibling();
829             }
830             endElementIO( null, null, tagName );
831         } else {
832             _printer.unindent();
833             // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
834             // HTML: Empty tags are defined as such in DTD no in document.
835             if ( _xhtml )
836                 _printer.printText( " />" );
837             else
838                 _printer.printText( '>' );
839             // After element but parent element is no longer empty.
840             state.afterElement = true;
841             state.empty = false;
842             if ( isDocumentState() )
843                 _printer.flush();
844         }
845     }
846 
847 
848 
849     protected void characters( String   text )
850         throws IOException  
851     {
852         ElementState state;
853 
854         // HTML: no CDATA section
855         state = content();
856         super.characters( text );
857     }
858 
859 
860     protected String   getEntityRef( int ch )
861     {
862         return HTMLdtd.fromChar( ch );
863     }
864 
865 
866     protected String   escapeURI( String   uri )
867     {
868         int index;
869 
870         // XXX  Apparently Netscape doesn't like if we escape the URI
871         //      using %nn, so we leave it as is, just remove any quotes.
872         index = uri.indexOf( "\"" );
873         if ( index >= 0 )
874             return uri.substring( 0, index );
875         else
876             return uri;
877     }
878 
879 
880 }
881 
882 
883 
884 
885
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags