KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xml > serialize > HTMLSerializer


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17
18 // Sep 14, 2000:
19
// Fixed serializer to report IO exception directly, instead at
20
// the end of document processing.
21
// Reported by Patrick Higgins <phiggins@transzap.com>
22
// Aug 21, 2000:
23
// Fixed bug in startDocument not calling prepare.
24
// Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
25
// Aug 21, 2000:
26
// Added ability to omit DOCTYPE declaration.
27
// Sep 1, 2000:
28
// If no output format is provided the serializer now defaults
29
// to ISO-8859-1 encoding. Reported by Mikael Staldal
30
// <d96-mst@d.kth.se>
31

32
33 package org.apache.xml.serialize;
34
35 import org.apache.xerces.dom.DOMMessageFormatter;
36
37 import java.io.IOException JavaDoc;
38 import java.io.OutputStream JavaDoc;
39 import java.io.Writer JavaDoc;
40 import java.util.Enumeration JavaDoc;
41 import java.util.Locale JavaDoc;
42
43 import org.w3c.dom.Attr JavaDoc;
44 import org.w3c.dom.Element JavaDoc;
45 import org.w3c.dom.NamedNodeMap JavaDoc;
46 import org.w3c.dom.Node JavaDoc;
47 import org.xml.sax.AttributeList JavaDoc;
48 import org.xml.sax.Attributes JavaDoc;
49 import org.xml.sax.SAXException JavaDoc;
50
51
52 /**
53  * Implements an HTML/XHTML serializer supporting both DOM and SAX
54  * pretty serializing. HTML/XHTML mode is determined in the
55  * constructor. For usage instructions see {@link Serializer}.
56  * <p>
57  * If an output stream is used, the encoding is taken from the
58  * output format (defaults to <tt>UTF-8</tt>). If a writer is
59  * used, make sure the writer uses the same encoding (if applies)
60  * as specified in the output format.
61  * <p>
62  * The serializer supports both DOM and SAX. DOM serializing is done
63  * by calling {@link #serialize} and SAX serializing is done by firing
64  * SAX events and using the serializer as a document handler.
65  * <p>
66  * If an I/O exception occurs while serializing, the serializer
67  * will not throw an exception directly, but only throw it
68  * at the end of serializing (either DOM or SAX's {@link
69  * org.xml.sax.DocumentHandler#endDocument}.
70  * <p>
71  * For elements that are not specified as whitespace preserving,
72  * the serializer will potentially break long text lines at space
73  * boundaries, indent lines, and serialize elements on separate
74  * lines. Line terminators will be regarded as spaces, and
75  * spaces at beginning of line will be stripped.
76  * <p>
77  * XHTML is slightly different than HTML:
78  * <ul>
79  * <li>Element/attribute names are lower case and case matters
80  * <li>Attributes must specify value, even if empty string
81  * <li>Empty elements must have '/' in empty tag
82  * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
83  * </ul>
84  *
85  * @deprecated This class was deprecated in Xerces 2.6.2. It is
86  * recommended that new applications use JAXP's Transformation API
87  * for XML (TrAX) for serializing HTML. See the Xerces documentation
88  * for more information.
89  * @version $Revision: 1.28 $ $Date: 2004/09/06 22:57:13 $
90  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
91  * @see Serializer
92  */

93 public class HTMLSerializer
94     extends BaseMarkupSerializer
95 {
96
97
98     /**
99      * True if serializing in XHTML format.
100      */

101     private boolean _xhtml;
102
103
104     public static final String JavaDoc XHTMLNamespace = "http://www.w3.org/1999/xhtml";
105
106     // for users to override XHTMLNamespace if need be.
107
private String JavaDoc fUserXHTMLNamespace = null;
108
109
110     /**
111      * Constructs a new HTML/XHTML serializer depending on the value of
112      * <tt>xhtml</tt>. The serializer cannot be used without calling
113      * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
114      *
115      * @param xhtml True if XHTML serializing
116      */

117     protected HTMLSerializer( boolean xhtml, OutputFormat format )
118     {
119         super( format );
120         _xhtml = xhtml;
121     }
122
123
124     /**
125      * Constructs a new serializer. The serializer cannot be used without
126      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
127      * first.
128      */

129     public HTMLSerializer()
130     {
131         this( false, new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
132     }
133
134
135     /**
136      * Constructs a new serializer. The serializer cannot be used without
137      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
138      * first.
139      */

140     public HTMLSerializer( OutputFormat format )
141     {
142         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
143     }
144
145
146
147     /**
148      * Constructs a new serializer that writes to the specified writer
149      * using the specified output format. If <tt>format</tt> is null,
150      * will use a default output format.
151      *
152      * @param writer The writer to use
153      * @param format The output format to use, null for the default
154      */

155     public HTMLSerializer( Writer JavaDoc writer, OutputFormat format )
156     {
157         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
158         setOutputCharStream( writer );
159     }
160
161
162     /**
163      * Constructs a new serializer that writes to the specified output
164      * stream using the specified output format. If <tt>format</tt>
165      * is null, will use a default output format.
166      *
167      * @param output The output stream to use
168      * @param format The output format to use, null for the default
169      */

170     public HTMLSerializer( OutputStream JavaDoc output, OutputFormat format )
171     {
172         this( false, format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
173         setOutputByteStream( output );
174     }
175
176
177     public void setOutputFormat( OutputFormat format )
178     {
179         super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
180     }
181
182     // Set value for alternate XHTML namespace.
183
public void setXHTMLNamespace(String JavaDoc newNamespace) {
184         fUserXHTMLNamespace = newNamespace;
185     } // setXHTMLNamespace(String)
186

187     //-----------------------------------------//
188
// SAX content handler serializing methods //
189
//-----------------------------------------//
190

191
192     public void startElement( String JavaDoc namespaceURI, String JavaDoc localName,
193                               String JavaDoc rawName, Attributes JavaDoc attrs )
194         throws SAXException JavaDoc
195     {
196         int i;
197         boolean preserveSpace;
198         ElementState state;
199         String JavaDoc name;
200         String JavaDoc value;
201         String JavaDoc htmlName;
202         boolean addNSAttr = false;
203
204         try {
205             if ( _printer == null )
206                 throw new IllegalStateException JavaDoc(
207                     DOMMessageFormatter.formatMessage(
208                     DOMMessageFormatter.SERIALIZER_DOMAIN,
209                     "NoWriterSupplied", null));
210
211             state = getElementState();
212             if ( isDocumentState() ) {
213                 // If this is the root element handle it differently.
214
// If the first root element in the document, serialize
215
// the document's DOCTYPE. Space preserving defaults
216
// to that of the output format.
217
if ( ! _started )
218                     startDocument( (localName == null || localName.length() == 0)
219                         ? rawName : localName );
220             } else {
221                 // For any other element, if first in parent, then
222
// close parent's opening tag and use the parnet's
223
// space preserving.
224
if ( state.empty )
225                     _printer.printText( '>' );
226                 // Indent this element on a new line if the first
227
// content of the parent element or immediately
228
// following an element.
229
if ( _indenting && ! state.preserveSpace &&
230                      ( state.empty || state.afterElement ) )
231                     _printer.breakLine();
232             }
233             preserveSpace = state.preserveSpace;
234
235             // Do not change the current element state yet.
236
// This only happens in endElement().
237

238             // As per SAX2, the namespace URI is an empty string if the element has no
239
// namespace URI, or namespaces is turned off. The check against null protects
240
// against broken SAX implementations, so I've left it there. - mrglavas
241
boolean hasNamespaceURI = (namespaceURI != null && namespaceURI.length() != 0);
242
243             // SAX2: rawName (QName) could be empty string if
244
// namespace-prefixes property is false.
245
if ( rawName == null || rawName.length() == 0) {
246                 rawName = localName;
247                 if ( hasNamespaceURI ) {
248                     String JavaDoc prefix;
249                     prefix = getPrefix( namespaceURI );
250                     if ( prefix != null && prefix.length() != 0 )
251                         rawName = prefix + ":" + localName;
252                 }
253                 addNSAttr = true;
254             }
255             if ( !hasNamespaceURI )
256                 htmlName = rawName;
257             else {
258                 if ( namespaceURI.equals( XHTMLNamespace ) ||
259                         (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(namespaceURI)) )
260                     htmlName = localName;
261                 else
262                     htmlName = null;
263             }
264
265             // XHTML: element names are lower case, DOM will be different
266
_printer.printText( '<' );
267             if ( _xhtml )
268                 _printer.printText( rawName.toLowerCase(Locale.ENGLISH) );
269             else
270                 _printer.printText( rawName );
271             _printer.indent();
272
273             // For each attribute serialize it's name and value as one part,
274
// separated with a space so the element can be broken on
275
// multiple lines.
276
if ( attrs != null ) {
277                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
278                     _printer.printSpace();
279                     name = attrs.getQName( i ).toLowerCase(Locale.ENGLISH);
280                     value = attrs.getValue( i );
281                     if ( _xhtml || hasNamespaceURI ) {
282                         // XHTML: print empty string for null values.
283
if ( value == null ) {
284                             _printer.printText( name );
285                             _printer.printText( "=\"\"" );
286                         } else {
287                             _printer.printText( name );
288                             _printer.printText( "=\"" );
289                             printEscaped( value );
290                             _printer.printText( '"' );
291                         }
292                     } else {
293                         // HTML: Empty values print as attribute name, no value.
294
// HTML: URI attributes will print unescaped
295
if ( value == null ) {
296                             value = "";
297                         }
298                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
299                             _printer.printText( name );
300                         else if ( HTMLdtd.isURI( rawName, name ) ) {
301                             _printer.printText( name );
302                             _printer.printText( "=\"" );
303                             _printer.printText( escapeURI( value ) );
304                             _printer.printText( '"' );
305                         } else if ( HTMLdtd.isBoolean( rawName, name ) )
306                             _printer.printText( name );
307                         else {
308                             _printer.printText( name );
309                             _printer.printText( "=\"" );
310                             printEscaped( value );
311                             _printer.printText( '"' );
312                         }
313                     }
314                 }
315             }
316             if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) )
317                 preserveSpace = true;
318
319             if ( addNSAttr ) {
320                 Enumeration JavaDoc keys;
321
322                 keys = _prefixes.keys();
323                 while ( keys.hasMoreElements() ) {
324                     _printer.printSpace();
325                     value = (String JavaDoc) keys.nextElement();
326                     name = (String JavaDoc) _prefixes.get( value );
327                     if ( name.length() == 0 ) {
328                         _printer.printText( "xmlns=\"" );
329                         printEscaped( value );
330                         _printer.printText( '"' );
331                     } else {
332                         _printer.printText( "xmlns:" );
333                         _printer.printText( name );
334                         _printer.printText( "=\"" );
335                         printEscaped( value );
336                         _printer.printText( '"' );
337                     }
338                 }
339             }
340
341             // Now it's time to enter a new element state
342
// with the tag name and space preserving.
343
// We still do not change the curent element state.
344
state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
345
346             // Prevents line breaks inside A/TD
347

348             if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) ||
349                                        htmlName.equalsIgnoreCase( "TD" ) ) ) {
350                 state.empty = false;
351                 _printer.printText( '>' );
352             }
353
354             // Handle SCRIPT and STYLE specifically by changing the
355
// state of the current element to CDATA (XHTML) or
356
// unescaped (HTML).
357
if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) ||
358                                        rawName.equalsIgnoreCase( "STYLE" ) ) ) {
359                 if ( _xhtml ) {
360                     // XHTML: Print contents as CDATA section
361
state.doCData = true;
362                 } else {
363                     // HTML: Print contents unescaped
364
state.unescaped = true;
365                 }
366             }
367         } catch ( IOException JavaDoc except ) {
368             throw new SAXException JavaDoc( except );
369         }
370     }
371
372
373     public void endElement( String JavaDoc namespaceURI, String JavaDoc localName,
374                             String JavaDoc rawName )
375         throws SAXException JavaDoc
376     {
377         try {
378             endElementIO( namespaceURI, localName, rawName );
379         } catch ( IOException JavaDoc except ) {
380             throw new SAXException JavaDoc( except );
381         }
382     }
383
384
385     public void endElementIO( String JavaDoc namespaceURI, String JavaDoc localName,
386                               String JavaDoc rawName )
387         throws IOException JavaDoc
388     {
389         ElementState state;
390         String JavaDoc htmlName;
391
392         // Works much like content() with additions for closing
393
// an element. Note the different checks for the closed
394
// element's state and the parent element's state.
395
_printer.unindent();
396         state = getElementState();
397
398         if ( state.namespaceURI == null || state.namespaceURI.length() == 0 )
399             htmlName = state.rawName;
400         else {
401             if ( state.namespaceURI.equals( XHTMLNamespace ) ||
402                         (fUserXHTMLNamespace != null && fUserXHTMLNamespace.equals(state.namespaceURI)) )
403                 htmlName = state.localName;
404             else
405                 htmlName = null;
406         }
407
408         if ( _xhtml) {
409             if ( state.empty ) {
410                 _printer.printText( " />" );
411             } else {
412                 // Must leave CData section first
413
if ( state.inCData )
414                     _printer.printText( "]]>" );
415                 // XHTML: element names are lower case, DOM will be different
416
_printer.printText( "</" );
417                 _printer.printText( state.rawName.toLowerCase(Locale.ENGLISH) );
418                 _printer.printText( '>' );
419             }
420         } else {
421             if ( state.empty )
422                 _printer.printText( '>' );
423             // This element is not empty and that last content was
424
// another element, so print a line break before that
425
// last element and this element's closing tag.
426
// [keith] Provided this is not an anchor.
427
// HTML: some elements do not print closing tag (e.g. LI)
428
if ( htmlName == null || ! HTMLdtd.isOnlyOpening( htmlName ) ) {
429                 if ( _indenting && ! state.preserveSpace && state.afterElement )
430                     _printer.breakLine();
431                 // Must leave CData section first (Illegal in HTML, but still)
432
if ( state.inCData )
433                     _printer.printText( "]]>" );
434                 _printer.printText( "</" );
435                 _printer.printText( state.rawName );
436                 _printer.printText( '>' );
437             }
438         }
439         // Leave the element state and update that of the parent
440
// (if we're not root) to not empty and after element.
441
state = leaveElementState();
442         // Temporary hack to prevent line breaks inside A/TD
443
if ( htmlName == null || ( ! htmlName.equalsIgnoreCase( "A" ) &&
444                                    ! htmlName.equalsIgnoreCase( "TD" ) ) )
445
446             state.afterElement = true;
447         state.empty = false;
448         if ( isDocumentState() )
449             _printer.flush();
450     }
451
452
453     //------------------------------------------//
454
// SAX document handler serializing methods //
455
//------------------------------------------//
456

457
458     public void characters( char[] chars, int start, int length )
459         throws SAXException JavaDoc
460     {
461         ElementState state;
462
463         try {
464             // HTML: no CDATA section
465
state = content();
466             state.doCData = false;
467             super.characters( chars, start, length );
468         } catch ( IOException JavaDoc except ) {
469             throw new SAXException JavaDoc( except );
470         }
471     }
472
473
474     public void startElement( String JavaDoc tagName, AttributeList JavaDoc attrs )
475         throws SAXException JavaDoc
476     {
477         int i;
478         boolean preserveSpace;
479         ElementState state;
480         String JavaDoc name;
481         String JavaDoc value;
482
483         try {
484             if ( _printer == null )
485                 throw new IllegalStateException JavaDoc(
486                     DOMMessageFormatter.formatMessage(
487                     DOMMessageFormatter.SERIALIZER_DOMAIN,
488                     "NoWriterSupplied", null));
489
490
491             state = getElementState();
492             if ( isDocumentState() ) {
493                 // If this is the root element handle it differently.
494
// If the first root element in the document, serialize
495
// the document's DOCTYPE. Space preserving defaults
496
// to that of the output format.
497
if ( ! _started )
498                     startDocument( tagName );
499             } else {
500                 // For any other element, if first in parent, then
501
// close parent's opening tag and use the parnet's
502
// space preserving.
503
if ( state.empty )
504                     _printer.printText( '>' );
505                 // Indent this element on a new line if the first
506
// content of the parent element or immediately
507
// following an element.
508
if ( _indenting && ! state.preserveSpace &&
509                      ( state.empty || state.afterElement ) )
510                     _printer.breakLine();
511             }
512             preserveSpace = state.preserveSpace;
513
514             // Do not change the current element state yet.
515
// This only happens in endElement().
516

517             // XHTML: element names are lower case, DOM will be different
518
_printer.printText( '<' );
519             if ( _xhtml )
520                 _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
521             else
522                 _printer.printText( tagName );
523             _printer.indent();
524
525             // For each attribute serialize it's name and value as one part,
526
// separated with a space so the element can be broken on
527
// multiple lines.
528
if ( attrs != null ) {
529                 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
530                     _printer.printSpace();
531                     name = attrs.getName( i ).toLowerCase(Locale.ENGLISH);
532                     value = attrs.getValue( i );
533                     if ( _xhtml ) {
534                         // XHTML: print empty string for null values.
535
if ( value == null ) {
536                             _printer.printText( name );
537                             _printer.printText( "=\"\"" );
538                         } else {
539                             _printer.printText( name );
540                             _printer.printText( "=\"" );
541                             printEscaped( value );
542                             _printer.printText( '"' );
543                         }
544                     } else {
545                         // HTML: Empty values print as attribute name, no value.
546
// HTML: URI attributes will print unescaped
547
if ( value == null ) {
548                             value = "";
549                         }
550                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
551                             _printer.printText( name );
552                         else if ( HTMLdtd.isURI( tagName, name ) ) {
553                             _printer.printText( name );
554                             _printer.printText( "=\"" );
555                             _printer.printText( escapeURI( value ) );
556                             _printer.printText( '"' );
557                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
558                             _printer.printText( name );
559                         else {
560                             _printer.printText( name );
561                             _printer.printText( "=\"" );
562                             printEscaped( value );
563                             _printer.printText( '"' );
564                         }
565                     }
566                 }
567             }
568             if ( HTMLdtd.isPreserveSpace( tagName ) )
569                 preserveSpace = true;
570
571             // Now it's time to enter a new element state
572
// with the tag name and space preserving.
573
// We still do not change the curent element state.
574
state = enterElementState( null, null, tagName, preserveSpace );
575
576             // Prevents line breaks inside A/TD
577
if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
578                 state.empty = false;
579                 _printer.printText( '>' );
580             }
581
582             // Handle SCRIPT and STYLE specifically by changing the
583
// state of the current element to CDATA (XHTML) or
584
// unescaped (HTML).
585
if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
586                  tagName.equalsIgnoreCase( "STYLE" ) ) {
587                 if ( _xhtml ) {
588                     // XHTML: Print contents as CDATA section
589
state.doCData = true;
590                 } else {
591                     // HTML: Print contents unescaped
592
state.unescaped = true;
593                 }
594             }
595         } catch ( IOException JavaDoc except ) {
596             throw new SAXException JavaDoc( except );
597         }
598     }
599
600
601     public void endElement( String JavaDoc tagName )
602         throws SAXException JavaDoc
603     {
604         endElement( null, null, tagName );
605     }
606
607
608     //------------------------------------------//
609
// Generic node serializing methods methods //
610
//------------------------------------------//
611

612
613     /**
614      * Called to serialize the document's DOCTYPE by the root element.
615      * The document type declaration must name the root element,
616      * but the root element is only known when that element is serialized,
617      * and not at the start of the document.
618      * <p>
619      * This method will check if it has not been called before ({@link #_started}),
620      * will serialize the document type declaration, and will serialize all
621      * pre-root comments and PIs that were accumulated in the document
622      * (see {@link #serializePreRoot}). Pre-root will be serialized even if
623      * this is not the first root element of the document.
624      */

625     protected void startDocument( String JavaDoc rootTagName )
626         throws IOException JavaDoc
627     {
628         StringBuffer JavaDoc buffer;
629
630         // Not supported in HTML/XHTML, but we still have to switch
631
// out of DTD mode.
632
_printer.leaveDTD();
633         if ( ! _started ) {
634             // If the public and system identifiers were not specified
635
// in the output format, use the appropriate ones for HTML
636
// or XHTML.
637
if ( _docTypePublicId == null && _docTypeSystemId == null ) {
638                 if ( _xhtml ) {
639                     _docTypePublicId = HTMLdtd.XHTMLPublicId;
640                     _docTypeSystemId = HTMLdtd.XHTMLSystemId;
641                 } else {
642                     _docTypePublicId = HTMLdtd.HTMLPublicId;
643                     _docTypeSystemId = HTMLdtd.HTMLSystemId;
644                 }
645             }
646
647             if ( ! _format.getOmitDocumentType() ) {
648                 // XHTML: If public identifier and system identifier
649
// specified, print them, else print just system identifier
650
// HTML: If public identifier specified, print it with
651
// system identifier, if specified.
652
// XHTML requires that all element names are lower case, so the
653
// root on the DOCTYPE must be 'html'. - mrglavas
654
if ( _docTypePublicId != null && ( ! _xhtml || _docTypeSystemId != null ) ) {
655                     if (_xhtml) {
656                         _printer.printText( "<!DOCTYPE html PUBLIC " );
657                     }
658                     else {
659                         _printer.printText( "<!DOCTYPE HTML PUBLIC " );
660                     }
661                     printDoctypeURL( _docTypePublicId );
662                     if ( _docTypeSystemId != null ) {
663                         if ( _indenting ) {
664                             _printer.breakLine();
665                             _printer.printText( " " );
666                         } else
667                         _printer.printText( ' ' );
668                         printDoctypeURL( _docTypeSystemId );
669                     }
670                     _printer.printText( '>' );
671                     _printer.breakLine();
672                 } else if ( _docTypeSystemId != null ) {
673                     if (_xhtml) {
674                         _printer.printText( "<!DOCTYPE html SYSTEM " );
675                     }
676                     else {
677                         _printer.printText( "<!DOCTYPE HTML SYSTEM " );
678                     }
679                     printDoctypeURL( _docTypeSystemId );
680                     _printer.printText( '>' );
681                     _printer.breakLine();
682                 }
683             }
684         }
685
686         _started = true;
687         // Always serialize these, even if not te first root element.
688
serializePreRoot();
689     }
690
691
692     /**
693      * Called to serialize a DOM element. Equivalent to calling {@link
694      * #startElement}, {@link #endElement} and serializing everything
695      * inbetween, but better optimized.
696      */

697     protected void serializeElement( Element JavaDoc elem )
698         throws IOException JavaDoc
699     {
700         Attr JavaDoc attr;
701         NamedNodeMap JavaDoc attrMap;
702         int i;
703         Node JavaDoc child;
704         ElementState state;
705         boolean preserveSpace;
706         String JavaDoc name;
707         String JavaDoc value;
708         String JavaDoc tagName;
709
710         tagName = elem.getTagName();
711         state = getElementState();
712         if ( isDocumentState() ) {
713             // If this is the root element handle it differently.
714
// If the first root element in the document, serialize
715
// the document's DOCTYPE. Space preserving defaults
716
// to that of the output format.
717
if ( ! _started )
718                 startDocument( tagName );
719         } else {
720             // For any other element, if first in parent, then
721
// close parent's opening tag and use the parnet's
722
// space preserving.
723
if ( state.empty )
724                 _printer.printText( '>' );
725             // Indent this element on a new line if the first
726
// content of the parent element or immediately
727
// following an element.
728
if ( _indenting && ! state.preserveSpace &&
729                  ( state.empty || state.afterElement ) )
730                 _printer.breakLine();
731         }
732         preserveSpace = state.preserveSpace;
733
734         // Do not change the current element state yet.
735
// This only happens in endElement().
736

737         // XHTML: element names are lower case, DOM will be different
738
_printer.printText( '<' );
739         if ( _xhtml )
740             _printer.printText( tagName.toLowerCase(Locale.ENGLISH) );
741         else
742             _printer.printText( tagName );
743         _printer.indent();
744
745         // Lookup the element's attribute, but only print specified
746
// attributes. (Unspecified attributes are derived from the DTD.
747
// For each attribute print it's name and value as one part,
748
// separated with a space so the element can be broken on
749
// multiple lines.
750
attrMap = elem.getAttributes();
751         if ( attrMap != null ) {
752             for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
753                 attr = (Attr JavaDoc) attrMap.item( i );
754                 name = attr.getName().toLowerCase(Locale.ENGLISH);
755                 value = attr.getValue();
756                 if ( attr.getSpecified() ) {
757                     _printer.printSpace();
758                     if ( _xhtml ) {
759                         // XHTML: print empty string for null values.
760
if ( value == null ) {
761                             _printer.printText( name );
762                             _printer.printText( "=\"\"" );
763                         } else {
764                             _printer.printText( name );
765                             _printer.printText( "=\"" );
766                             printEscaped( value );
767                             _printer.printText( '"' );
768                         }
769                     } else {
770                         // HTML: Empty values print as attribute name, no value.
771
// HTML: URI attributes will print unescaped
772
if ( value == null ) {
773                             value = "";
774                         }
775                         if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
776                             _printer.printText( name );
777                         else if ( HTMLdtd.isURI( tagName, name ) ) {
778                             _printer.printText( name );
779                             _printer.printText( "=\"" );
780                             _printer.printText( escapeURI( value ) );
781                             _printer.printText( '"' );
782                         } else if ( HTMLdtd.isBoolean( tagName, name ) )
783                             _printer.printText( name );
784                         else {
785                             _printer.printText( name );
786                             _printer.printText( "=\"" );
787                             printEscaped( value );
788                             _printer.printText( '"' );
789                         }
790                     }
791                 }
792             }
793         }
794         if ( HTMLdtd.isPreserveSpace( tagName ) )
795             preserveSpace = true;
796
797         // If element has children, or if element is not an empty tag,
798
// serialize an opening tag.
799
if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) {
800             // Enter an element state, and serialize the children
801
// one by one. Finally, end the element.
802
state = enterElementState( null, null, tagName, preserveSpace );
803
804             // Prevents line breaks inside A/TD
805
if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
806                 state.empty = false;
807                 _printer.printText( '>' );
808             }
809
810             // Handle SCRIPT and STYLE specifically by changing the
811
// state of the current element to CDATA (XHTML) or
812
// unescaped (HTML).
813
if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
814                  tagName.equalsIgnoreCase( "STYLE" ) ) {
815                 if ( _xhtml ) {
816                     // XHTML: Print contents as CDATA section
817
state.doCData = true;
818                 } else {
819                     // HTML: Print contents unescaped
820
state.unescaped = true;
821                 }
822             }
823             child = elem.getFirstChild();
824             while ( child != null ) {
825                 serializeNode( child );
826                 child = child.getNextSibling();
827             }
828             endElementIO( null, null, tagName );
829         } else {
830             _printer.unindent();
831             // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
832
// HTML: Empty tags are defined as such in DTD no in document.
833
if ( _xhtml )
834                 _printer.printText( " />" );
835             else
836                 _printer.printText( '>' );
837             // After element but parent element is no longer empty.
838
state.afterElement = true;
839             state.empty = false;
840             if ( isDocumentState() )
841                 _printer.flush();
842         }
843     }
844
845
846
847     protected void characters( String JavaDoc text )
848         throws IOException JavaDoc
849     {
850         ElementState state;
851
852         // HTML: no CDATA section
853
state = content();
854         super.characters( text );
855     }
856
857
858     protected String JavaDoc getEntityRef( int ch )
859     {
860         return HTMLdtd.fromChar( ch );
861     }
862
863
864     protected String JavaDoc escapeURI( String JavaDoc uri )
865     {
866         int index;
867
868         // XXX Apparently Netscape doesn't like if we escape the URI
869
// using %nn, so we leave it as is, just remove any quotes.
870
index = uri.indexOf( "\"" );
871         if ( index >= 0 )
872             return uri.substring( 0, index );
873         else
874             return uri;
875     }
876
877
878 }
879
880
881
882
883
Popular Tags