KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xml > internal > serialize > HTMLSerializer


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999-2004 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58
59 // Sep 14, 2000:
60
// Fixed serializer to report IO exception directly, instead at
61
// the end of document processing.
62
// Reported by Patrick Higgins <phiggins@transzap.com>
63
// Aug 21, 2000:
64
// Fixed bug in startDocument not calling prepare.
65
// Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
66
// Aug 21, 2000:
67
// Added ability to omit DOCTYPE declaration.
68
// Sep 1, 2000:
69
// If no output format is provided the serializer now defaults
70
// to ISO-8859-1 encoding. Reported by Mikael Staldal
71
// <d96-mst@d.kth.se>
72

73
74 package com.sun.org.apache.xml.internal.serialize;
75
76 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
77
78 import java.io.IOException JavaDoc;
79 import java.io.OutputStream JavaDoc;
80 import java.io.Writer JavaDoc;
81 import java.util.Enumeration JavaDoc;
82 import java.util.Locale JavaDoc;
83
84 import org.w3c.dom.Attr JavaDoc;
85 import org.w3c.dom.Element JavaDoc;
86 import org.w3c.dom.NamedNodeMap JavaDoc;
87 import org.w3c.dom.Node JavaDoc;
88 import org.xml.sax.AttributeList JavaDoc;
89 import org.xml.sax.Attributes JavaDoc;
90 import org.xml.sax.SAXException JavaDoc;
91
92
93 /**
94  * Implements an HTML/XHTML serializer supporting both DOM and SAX
95  * pretty serializing. HTML/XHTML mode is determined in the
96  * constructor. For usage instructions see {@link Serializer}.
97  * <p>
98  * If an output stream is used, the encoding is taken from the
99  * output format (defaults to <tt>UTF-8</tt>). If a writer is
100  * used, make sure the writer uses the same encoding (if applies)
101  * as specified in the output format.
102  * <p>
103  * The serializer supports both DOM and SAX. DOM serializing is done
104  * by calling {@link #serialize} and SAX serializing is done by firing
105  * SAX events and using the serializer as a document handler.
106  * <p>
107  * If an I/O exception occurs while serializing, the serializer
108  * will not throw an exception directly, but only throw it
109  * at the end of serializing (either DOM or SAX's {@link
110  * org.xml.sax.DocumentHandler#endDocument}.
111  * <p>
112  * For elements that are not specified as whitespace preserving,
113  * the serializer will potentially break long text lines at space
114  * boundaries, indent lines, and serialize elements on separate
115  * lines. Line terminators will be regarded as spaces, and
116  * spaces at beginning of line will be stripped.
117  * <p>
118  * XHTML is slightly different than HTML:
119  * <ul>
120  * <li>Element/attribute names are lower case and case matters
121  * <li>Attributes must specify value, even if empty string
122  * <li>Empty elements must have '/' in empty tag
123  * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
124  * </ul>
125  *
126  * @deprecated
127  * @version $Revision: 1.26 $ $Date: 2004/02/16 05:24:55 $
128  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
129  * @see Serializer
130  */

131 public class HTMLSerializer
132     extends BaseMarkupSerializer
133 {
134
135
136     /**
137      * True if serializing in XHTML format.
138      */

139     private boolean _xhtml;
140
141
142     public static final String JavaDoc XHTMLNamespace = "http://www.w3.org/1999/xhtml";
143
144     // for users to override XHTMLNamespace if need be.
145
private String JavaDoc fUserXHTMLNamespace = null;
146
147
148     /**
149      * Constructs a new HTML/XHTML serializer depending on the value of
150      * <tt>xhtml</tt>. The serializer cannot be used without calling
151      * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
152      *
153      * @param xhtml True if XHTML serializing
154      */

155     protected HTMLSerializer( boolean xhtml, OutputFormat format )
156     {
157         super( format );
158         _xhtml = xhtml;
159     }
160
161
162     /**
163      * Constructs a new serializer. The serializer cannot be used without
164      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
165      * first.
166      */

167     public HTMLSerializer()
168     {
169         this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
170     }
171
172
173     /**
174      * Constructs a new serializer. The serializer cannot be used without
175      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
176      * first.
177      */

178     public HTMLSerializer( OutputFormat format )
179     {
180         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
181     }
182
183
184
185     /**
186      * Constructs a new serializer that writes to the specified writer
187      * using the specified output format. If <tt>format</tt> is null,
188      * will use a default output format.
189      *
190      * @param writer The writer to use
191      * @param format The output format to use, null for the default
192      */

193     public HTMLSerializer( Writer JavaDoc writer, OutputFormat format )
194     {
195         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
196         setOutputCharStream( writer );
197     }
198
199
200     /**
201      * Constructs a new serializer that writes to the specified output
202      * stream using the specified output format. If <tt>format</tt>
203      * is null, will use a default output format.
204      *
205      * @param output The output stream to use
206      * @param format The output format to use, null for the default
207      */

208     public HTMLSerializer( OutputStream JavaDoc output, OutputFormat format )
209     {
210         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
211         setOutputByteStream( output );
212     }
213
214
215     public void setOutputFormat( OutputFormat format )
216     {
217         super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
218     }
219
220     // Set value for alternate XHTML namespace.
221
public void setXHTMLNamespace(String JavaDoc newNamespace) {
222         fUserXHTMLNamespace = newNamespace;
223     } // setXHTMLNamespace(String)
224

225     //-----------------------------------------//
226
// SAX content handler serializing methods //
227
//-----------------------------------------//
228

229
230     public void startElement( String JavaDoc namespaceURI, String JavaDoc localName,
231                               String JavaDoc rawName, Attributes JavaDoc attrs )
232         throws SAXException JavaDoc
233     {
234         int i;
235         boolean preserveSpace;
236         ElementState state;
237         String JavaDoc name;
238         String JavaDoc value;
239         String JavaDoc htmlName;
240         boolean addNSAttr = false;
241
242         try {
243             if ( _printer == null )
244                 throw new IllegalStateException JavaDoc(
245                     DOMMessageFormatter.formatMessage(
246                     DOMMessageFormatter.SERIALIZER_DOMAIN,
247                     "NoWriterSupplied", null));
248
249             state = getElementState();
250             if ( isDocumentState() ) {
251                 // If this is the root element handle it differently.
252
// If the first root element in the document, serialize
253
// the document's DOCTYPE. Space preserving defaults
254
// to that of the output format.
255
if ( ! _started )
256                     startDocument( (localName == null || localName.length() == 0)
257                         ? rawName : localName );
258             } else {
259                 // For any other element, if first in parent, then
260
// close parent's opening tag and use the parnet's
261
// space preserving.
262
if ( state.empty )
263                     _printer.printText( '>' );
264                 // Indent this element on a new line if the first
265
// content of the parent element or immediately
266
// following an element.
267
if ( _indenting && ! state.preserveSpace &&
268                      ( state.empty || state.afterElement ) )
269                     _printer.breakLine();
270             }
271             preserveSpace = state.preserveSpace;
272
273             // Do not change the current element state yet.
274
// This only happens in endElement().
275

276             // As per SAX2, the namespace URI is an empty string if the element has no
277
// namespace URI, or namespaces is turned off. The check against null protects
278
// against broken SAX implementations, so I've left it there. - mrglavas
279
boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0);
280
281             // SAX2: rawName (QName) could be empty string if
282
// namespace-prefixes property is false.
283
if ( rawName == null || rawName.length() == 0) {
284                 rawName = localName;
285                 if ( hasNamespaceURI ) {
286                     String JavaDoc prefix;
287                     prefix = getPrefix( namespaceURI );
288                     if ( prefix != null && prefix.length() != 0 )
289                         rawName = prefix + ":" + localName;
290                 }
291                 addNSAttr = true;
292             }
293             if ( !hasNamespaceURI )
294                 htmlName = rawName;
295             else {
296                 if ( namespaceURI.equals( XHTMLNamespace ) ||
297                         (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) )
298                     htmlName = localName;
299                 else
300                     htmlName = null;
301             }
302
303             // XHTML: element names are lower case, DOM will be different
304
_printer.printText( '<' );
305             if ( _xhtml )
306                 _printer.printText( rawName.toLowerCase(Locale.ENGLISH) );
307             else
308                 _printer.printText( rawName );
309             _printer.indent();
310
311             // For each attribute serialize it's name and value as one part,
312
// separated with a space so the element can be broken on
313
// multiple lines.
314
if ( attrs != null ) {
315                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
316                     _printer.printSpace();
317                     name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH);
318                     value = attrs.getValue( i );
319                     if ( _xhtml || hasNamespaceURI ) {
320                         // XHTML: print empty string for null values.
321
if ( value == null ) {
322                             _printer.printText( name );
323                             _printer.printText( "=\"\"" );
324                         } else {
325                             _printer.printText( name );
326                             _printer.printText( "=\"" );
327                             printEscaped( value );
328                             _printer.printText( '"' );
329                         }
330                     } else {
331                         // HTML: Empty values print as attribute name, no value.
332
// HTML: URI attributes will print unescaped
333
if ( value == null ) {
334                             value = "";
335                         }
336                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
337                             _printer.printText( name );
338                         else if ( HTMLdtd.isURI( rawName, name ) ) {
339                             _printer.printText( name );
340                             _printer.printText( "=\"" );
341                             _printer.printText( escapeURI( value ) );
342                             _printer.printText( '"' );
343                         } else if ( HTMLdtd.isBoolean( rawName, name ) )
344                             _printer.printText( name );
345                         else {
346                             _printer.printText( name );
347                             _printer.printText( "=\"" );
348                             printEscaped( value );
349                             _printer.printText( '"' );
350                         }
351                     }
352                 }
353             }
354             if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) )
355                 preserveSpace = true;
356
357             if ( addNSAttr ) {
358                 Enumeration JavaDoc keys;
359
360                 keys = _prefixes.keys();
361                 while ( keys.hasMoreElements() ) {
362                     _printer.printSpace();
363                     value = (String JavaDoc) keys.nextElement();
364                     name = (String JavaDoc) _prefixes.get( value );
365                     if ( name.length() == 0 ) {
366                         _printer.printText( "xmlns=\"" );
367                         printEscaped( value );
368                         _printer.printText( '"' );
369                     } else {
370                         _printer.printText( "xmlns:" );
371                         _printer.printText( name );
372                         _printer.printText( "=\"" );
373                         printEscaped( value );
374                         _printer.printText( '"' );
375                     }
376                 }
377             }
378
379             // Now it's time to enter a new element state
380
// with the tag name and space preserving.
381
// We still do not change the curent element state.
382
state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
383
384             // Prevents line breaks inside A/TD
385

386             if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) ||
387                                        htmlName.equalsIgnoreCase( "TD" ) ) ) {
388                 state.empty = false;
389                 _printer.printText( '>' );
390             }
391
392             // Handle SCRIPT and STYLE specifically by changing the
393
// state of the current element to CDATA (XHTML) or
394
// unescaped (HTML).
395
if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) ||
396                                        rawName.equalsIgnoreCase( "STYLE" ) ) ) {
397                 if ( _xhtml ) {
398                     // XHTML: Print contents as CDATA section
399
state.doCData = true;
400                 } else {
401                     // HTML: Print contents unescaped
402
state.unescaped = true;
403                 }
404             }
405         } catch ( IOException JavaDoc except ) {
406             throw new SAXException JavaDoc( except );
407         }
408     }
409
410
411     public void endElement( String JavaDoc namespaceURI, String JavaDoc localName,
412                             String JavaDoc rawName )
413         throws SAXException JavaDoc
414     {
415         try {
416             endElementIO( namespaceURI, localName, rawName );
417         } catch ( IOException JavaDoc except ) {
418             throw new SAXException JavaDoc( except );
419         }
420     }
421
422
423     public void endElementIO( String JavaDoc namespaceURI, String JavaDoc localName,
424                               String JavaDoc rawName )
425         throws IOException JavaDoc
426     {
427         ElementState state;
428         String JavaDoc htmlName;
429
430         // Works much like content() with additions for closing
431
// an element. Note the different checks for the closed
432
// element's state and the parent element's state.
433
_printer.unindent();
434         state = getElementState();
435
436         if ( state.namespaceURI == null || state.namespaceURI.length() == 0 )
437             htmlName = state.rawName;
438         else {
439             if ( state.namespaceURI.equals( XHTMLNamespace ) ||
440                         (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(state.namespaceURI)) )
441                 htmlName = state.localName;
442             else
443                 htmlName = null;
444         }
445
446         if ( _xhtml) {
447             if ( state.empty ) {
448                 _printer.printText( " />" );
449             } else {
450                 // Must leave CData section first
451
if ( state.inCData )
452                     _printer.printText( "]]>" );
453                 // XHTML: element names are lower case, DOM will be different
454
_printer.printText( "</" );
455                 _printer.printText( state.rawName.toLowerCase(Locale.ENGLISH) );
456                 _printer.printText( '>' );
457             }
458         } else {
459             if ( state.empty )
460                 _printer.printText( '>' );
461             // This element is not empty and that last content was
462
// another element, so print a line break before that
463
// last element and this element's closing tag.
464
// [keith] Provided this is not an anchor.
465
// HTML: some elements do not print closing tag (e.g. LI)
466
if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) {
467                 if ( _indenting && ! state.preserveSpace && state.afterElement )
468                     _printer.breakLine();
469                 // Must leave CData section first (Illegal in HTML, but still)
470
if ( state.inCData )
471                     _printer.printText( "]]>" );
472                 _printer.printText( "</" );
473                 _printer.printText( state.rawName );
474                 _printer.printText( '>' );
475             }
476         }
477         // Leave the element state and update that of the parent
478
// (if we're not root) to not empty and after element.
479
state = leaveElementState();
480         // Temporary hack to prevent line breaks inside A/TD
481
if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) &&
482                                    ! htmlName.equalsIgnoreCase( "TD" ) ) )
483
484             state.afterElement = true;
485         state.empty = false;
486         if ( isDocumentState() )
487             _printer.flush();
488     }
489
490
491     //------------------------------------------//
492
// SAX document handler serializing methods //
493
//------------------------------------------//
494

495
496     public void characters( char[] chars, int start, int length )
497         throws SAXException JavaDoc
498     {
499         ElementState state;
500
501         try {
502             // HTML: no CDATA section
503
state = content();
504             state.doCData = false;
505             super.characters( chars, start, length );
506         } catch ( IOException JavaDoc except ) {
507             throw new SAXException JavaDoc( except );
508         }
509     }
510
511
512     public void startElement( String JavaDoc tagName, AttributeList JavaDoc attrs )
513         throws SAXException JavaDoc
514     {
515         int i;
516         boolean preserveSpace;
517         ElementState state;
518         String JavaDoc name;
519         String JavaDoc value;
520
521         try {
522             if ( _printer == null )
523                 throw new IllegalStateException JavaDoc(
524                     DOMMessageFormatter.formatMessage(
525                     DOMMessageFormatter.SERIALIZER_DOMAIN,
526                     "NoWriterSupplied", null));
527
528
529             state = getElementState();
530             if ( isDocumentState() ) {
531                 // If this is the root element handle it differently.
532
// If the first root element in the document, serialize
533
// the document's DOCTYPE. Space preserving defaults
534
// to that of the output format.
535
if ( ! _started )
536                     startDocument( tagName );
537             } else {
538                 // For any other element, if first in parent, then
539
// close parent's opening tag and use the parnet's
540
// space preserving.
541
if ( state.empty )
542                     _printer.printText( '>' );
543                 // Indent this element on a new line if the first
544
// content of the parent element or immediately
545
// following an element.
546
if ( _indenting && ! state.preserveSpace &&
547                      ( state.empty || state.afterElement ) )
548                     _printer.breakLine();
549             }
550             preserveSpace = state.preserveSpace;
551
552             // Do not change the current element state yet.
553
// This only happens in endElement().
554

555             // XHTML: element names are lower case, DOM will be different
556
_printer.printText( '<' );
557             if ( _xhtml )
558                 _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
559             else
560                 _printer.printText( tagName );
561             _printer.indent();
562
563             // For each attribute serialize it's name and value as one part,
564
// separated with a space so the element can be broken on
565
// multiple lines.
566
if ( attrs != null ) {
567                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
568                     _printer.printSpace();
569                     name = attrs.getName( i ).toLowerCase(Locale.ENGLISH);
570                     value = attrs.getValue( i );
571                     if ( _xhtml ) {
572                         // XHTML: print empty string for null values.
573
if ( value == null ) {
574                             _printer.printText( name );
575                             _printer.printText( "=\"\"" );
576                         } else {
577                             _printer.printText( name );
578                             _printer.printText( "=\"" );
579                             printEscaped( value );
580                             _printer.printText( '"' );
581                         }
582                     } else {
583                         // HTML: Empty values print as attribute name, no value.
584
// HTML: URI attributes will print unescaped
585
if ( value == null ) {
586                             value = "";
587                         }
588                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
589                             _printer.printText( name );
590                         else if ( HTMLdtd.isURI( tagName, name ) ) {
591                             _printer.printText( name );
592                             _printer.printText( "=\"" );
593                             _printer.printText( escapeURI( value ) );
594                             _printer.printText( '"' );
595                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
596                             _printer.printText( name );
597                         else {
598                             _printer.printText( name );
599                             _printer.printText( "=\"" );
600                             printEscaped( value );
601                             _printer.printText( '"' );
602                         }
603                     }
604                 }
605             }
606             if ( HTMLdtd.isPreserveSpace( tagName ) )
607                 preserveSpace = true;
608
609             // Now it's time to enter a new element state
610
// with the tag name and space preserving.
611
// We still do not change the curent element state.
612
state = enterElementState( null, null, tagName, preserveSpace );
613
614             // Prevents line breaks inside A/TD
615
if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
616                 state.empty = false;
617                 _printer.printText( '>' );
618             }
619
620             // Handle SCRIPT and STYLE specifically by changing the
621
// state of the current element to CDATA (XHTML) or
622
// unescaped (HTML).
623
if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
624                  tagName.equalsIgnoreCase( "STYLE" ) ) {
625                 if ( _xhtml ) {
626                     // XHTML: Print contents as CDATA section
627
state.doCData = true;
628                 } else {
629                     // HTML: Print contents unescaped
630
state.unescaped = true;
631                 }
632             }
633         } catch ( IOException JavaDoc except ) {
634             throw new SAXException JavaDoc( except );
635         }
636     }
637
638
639     public void endElement( String JavaDoc tagName )
640         throws SAXException JavaDoc
641     {
642         endElement( null, null, tagName );
643     }
644
645
646     //------------------------------------------//
647
// Generic node serializing methods methods //
648
//------------------------------------------//
649

650
651     /**
652      * Called to serialize the document's DOCTYPE by the root element.
653      * The document type declaration must name the root element,
654      * but the root element is only known when that element is serialized,
655      * and not at the start of the document.
656      * <p>
657      * This method will check if it has not been called before ({@link #_started}),
658      * will serialize the document type declaration, and will serialize all
659      * pre-root comments and PIs that were accumulated in the document
660      * (see {@link #serializePreRoot}). Pre-root will be serialized even if
661      * this is not the first root element of the document.
662      */

663     protected void startDocument( String JavaDoc rootTagName )
664         throws IOException JavaDoc
665     {
666         StringBuffer JavaDoc buffer;
667
668         // Not supported in HTML/XHTML, but we still have to switch
669
// out of DTD mode.
670
_printer.leaveDTD();
671         if ( ! _started ) {
672             // If the public and system identifiers were not specified
673
// in the output format, use the appropriate ones for HTML
674
// or XHTML.
675
if ( _docTypePublicId == null && _docTypeSystemId == null ) {
676                 if ( _xhtml ) {
677                     _docTypePublicId = HTMLdtd.XHTMLPublicId;
678                     _docTypeSystemId = HTMLdtd.XHTMLSystemId;
679                 } else {
680                     _docTypePublicId = HTMLdtd.HTMLPublicId;
681                     _docTypeSystemId = HTMLdtd.HTMLSystemId;
682                 }
683             }
684
685             if ( ! _format.getOmitDocumentType() ) {
686                 // XHTML: If public identifier and system identifier
687
// specified, print them, else print just system identifier
688
// HTML: If public identifier specified, print it with
689
// system identifier, if specified.
690
// XHTML requires that all element names are lower case, so the
691
// root on the DOCTYPE must be 'html'. - mrglavas
692
if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null ) ) {
693                     if (_xhtml) {
694                         _printer.printText( "<!DOCTYPE html PUBLIC " );
695                     }
696                     else {
697                         _printer.printText( "<!DOCTYPE HTML PUBLIC " );
698                     }
699                     printDoctypeURL( _docTypePublicId );
700                     if ( _docTypeSystemId != null ) {
701                         if ( _indenting ) {
702                             _printer.breakLine();
703                             _printer.printText( " " );
704                         } else
705                         _printer.printText( ' ' );
706                         printDoctypeURL( _docTypeSystemId );
707                     }
708                     _printer.printText( '>' );
709                     _printer.breakLine();
710                 } else if ( _docTypeSystemId != null ) {
711                     if (_xhtml) {
712                         _printer.printText( "<!DOCTYPE html SYSTEM " );
713                     }
714                     else {
715                         _printer.printText( "<!DOCTYPE HTML SYSTEM " );
716                     }
717                     printDoctypeURL( _docTypeSystemId );
718                     _printer.printText( '>' );
719                     _printer.breakLine();
720                 }
721             }
722         }
723
724         _started = true;
725         // Always serialize these, even if not te first root element.
726
serializePreRoot();
727     }
728
729
730     /**
731      * Called to serialize a DOM element. Equivalent to calling {@link
732      * #startElement}, {@link #endElement} and serializing everything
733      * inbetween, but better optimized.
734      */

735     protected void serializeElement( Element JavaDoc elem )
736         throws IOException JavaDoc
737     {
738         Attr JavaDoc attr;
739         NamedNodeMap JavaDoc attrMap;
740         int i;
741         Node JavaDoc child;
742         ElementState state;
743         boolean preserveSpace;
744         String JavaDoc name;
745         String JavaDoc value;
746         String JavaDoc tagName;
747
748         tagName = elem.getTagName();
749         state = getElementState();
750         if ( isDocumentState() ) {
751             // If this is the root element handle it differently.
752
// If the first root element in the document, serialize
753
// the document's DOCTYPE. Space preserving defaults
754
// to that of the output format.
755
if ( ! _started )
756                 startDocument( tagName );
757         } else {
758             // For any other element, if first in parent, then
759
// close parent's opening tag and use the parnet's
760
// space preserving.
761
if ( state.empty )
762                 _printer.printText( '>' );
763             // Indent this element on a new line if the first
764
// content of the parent element or immediately
765
// following an element.
766
if ( _indenting && ! state.preserveSpace &&
767                  ( state.empty || state.afterElement ) )
768                 _printer.breakLine();
769         }
770         preserveSpace = state.preserveSpace;
771
772         // Do not change the current element state yet.
773
// This only happens in endElement().
774

775         // XHTML: element names are lower case, DOM will be different
776
_printer.printText( '<' );
777         if ( _xhtml )
778             _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
779         else
780             _printer.printText( tagName );
781         _printer.indent();
782
783         // Lookup the element's attribute, but only print specified
784
// attributes. (Unspecified attributes are derived from the DTD.
785
// For each attribute print it's name and value as one part,
786
// separated with a space so the element can be broken on
787
// multiple lines.
788
attrMap = elem.getAttributes();
789         if ( attrMap != null ) {
790             for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
791                 attr = (Attr JavaDoc) attrMap.item( i );
792                 name = attr.getName().toLowerCase(Locale.ENGLISH);
793                 value = attr.getValue();
794                 if ( attr.getSpecified() ) {
795                     _printer.printSpace();
796                     if ( _xhtml ) {
797                         // XHTML: print empty string for null values.
798
if ( value == null ) {
799                             _printer.printText( name );
800                             _printer.printText( "=\"\"" );
801                         } else {
802                             _printer.printText( name );
803                             _printer.printText( "=\"" );
804                             printEscaped( value );
805                             _printer.printText( '"' );
806                         }
807                     } else {
808                         // HTML: Empty values print as attribute name, no value.
809
// HTML: URI attributes will print unescaped
810
if ( value == null ) {
811                             value = "";
812                         }
813                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
814                             _printer.printText( name );
815                         else if ( HTMLdtd.isURI( tagName, name ) ) {
816                             _printer.printText( name );
817                             _printer.printText( "=\"" );
818                             _printer.printText( escapeURI( value ) );
819                             _printer.printText( '"' );
820                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
821                             _printer.printText( name );
822                         else {
823                             _printer.printText( name );
824                             _printer.printText( "=\"" );
825                             printEscaped( value );
826                             _printer.printText( '"' );
827                         }
828                     }
829                 }
830             }
831         }
832         if ( HTMLdtd.isPreserveSpace( tagName ) )
833             preserveSpace = true;
834
835         // If element has children, or if element is not an empty tag,
836
// serialize an opening tag.
837
if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) {
838             // Enter an element state, and serialize the children
839
// one by one. Finally, end the element.
840
state = enterElementState( null, null, tagName, preserveSpace );
841
842             // Prevents line breaks inside A/TD
843
if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
844                 state.empty = false;
845                 _printer.printText( '>' );
846             }
847
848             // Handle SCRIPT and STYLE specifically by changing the
849
// state of the current element to CDATA (XHTML) or
850
// unescaped (HTML).
851
if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
852                  tagName.equalsIgnoreCase( "STYLE" ) ) {
853                 if ( _xhtml ) {
854                     // XHTML: Print contents as CDATA section
855
state.doCData = true;
856                 } else {
857                     // HTML: Print contents unescaped
858
state.unescaped = true;
859                 }
860             }
861             child = elem.getFirstChild();
862             while ( child != null ) {
863                 serializeNode( child );
864                 child = child.getNextSibling();
865             }
866             endElementIO( null, null, tagName );
867         } else {
868             _printer.unindent();
869             // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
870
// HTML: Empty tags are defined as such in DTD no in document.
871
if ( _xhtml )
872                 _printer.printText( " />" );
873             else
874                 _printer.printText( '>' );
875             // After element but parent element is no longer empty.
876
state.afterElement = true;
877             state.empty = false;
878             if ( isDocumentState() )
879                 _printer.flush();
880         }
881     }
882
883
884
885     protected void characters( String JavaDoc text )
886         throws IOException JavaDoc
887     {
888         ElementState state;
889
890         // HTML: no CDATA section
891
state = content();
892         super.characters( text );
893     }
894
895
896     protected String JavaDoc getEntityRef( int ch )
897     {
898         return HTMLdtd.fromChar( ch );
899     }
900
901
902     protected String JavaDoc escapeURI( String JavaDoc uri )
903     {
904         int index;
905
906         // XXX Apparently Netscape doesn't like if we escape the URI
907
// using %nn, so we leave it as is, just remove any quotes.
908
index = uri.indexOf( "\"" );
909         if ( index >= 0 )
910             return uri.substring( 0, index );
911         else
912             return uri;
913     }
914
915
916 }
917
918
919
920
921
Popular Tags