KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xml > internal > serialize > OutputFormat


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58
59 // Aug 21, 2000:
60
// Added ability to omit DOCTYPE declaration.
61
// Reported by Lars Martin <lars@smb-tec.com>
62
// Aug 25, 2000:
63
// Added ability to omit comments.
64
// Contributed by Anupam Bagchi <abagchi@jtcsv.com>
65

66
67 package com.sun.org.apache.xml.internal.serialize;
68
69
70 import java.io.UnsupportedEncodingException JavaDoc;
71
72 import org.w3c.dom.Document JavaDoc;
73 import org.w3c.dom.DocumentType JavaDoc;
74 import org.w3c.dom.Node JavaDoc;
75 import org.w3c.dom.html.HTMLDocument;
76
77
78 /**
79  * Specifies an output format to control the serializer. Based on the
80  * XSLT specification for output format, plus additional parameters.
81  * Used to select the suitable serializer and determine how the
82  * document should be formatted on output.
83  * <p>
84  * The two interesting constructors are:
85  * <ul>
86  * <li>{@link #OutputFormat(String,String,boolean)} creates a format
87  * for the specified method (XML, HTML, Text, etc), encoding and indentation
88  * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
89  * compatible with the document type (XML, HTML, Text, etc), encoding and
90  * indentation
91  * </ul>
92  *
93  *
94  * @version $Revision: 1.20 $ $Date: 2003/12/10 17:14:17 $
95  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
96  * <a HREF="mailto:visco@intalio.com">Keith Visco</a>
97  * @see Serializer
98  * @see Method
99  * @see LineSeparator
100  */

101 public class OutputFormat
102 {
103
104
105     public static class DTD
106     {
107
108         /**
109          * Public identifier for HTML 4.01 (Strict) document type.
110          */

111         public static final String JavaDoc HTMLPublicId = "-//W3C//DTD HTML 4.01//EN";
112
113         /**
114          * System identifier for HTML 4.01 (Strict) document type.
115          */

116         public static final String JavaDoc HTMLSystemId =
117             "http://www.w3.org/TR/html4/strict.dtd";
118
119         /**
120          * Public identifier for XHTML 1.0 (Strict) document type.
121          */

122         public static final String JavaDoc XHTMLPublicId =
123             "-//W3C//DTD XHTML 1.0 Strict//EN";
124
125         /**
126          * System identifier for XHTML 1.0 (Strict) document type.
127          */

128         public static final String JavaDoc XHTMLSystemId =
129             "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
130
131     }
132
133
134     public static class Defaults
135     {
136
137         /**
138          * If indentation is turned on, the default identation
139          * level is 4.
140          *
141          * @see #setIndenting(boolean)
142          */

143         public static final int Indent = 4;
144
145         /**
146          * The default encoding for Web documents it UTF-8.
147          *
148          * @see #getEncoding()
149          */

150         public static final String JavaDoc Encoding = "UTF-8";
151
152         /**
153          * The default line width at which to break long lines
154          * when identing. This is set to 72.
155          */

156         public static final int LineWidth = 72;
157
158     }
159
160
161     /**
162      * Holds the output method specified for this document,
163      * or null if no method was specified.
164      */

165     private String JavaDoc _method;
166
167
168     /**
169      * Specifies the version of the output method.
170      */

171     private String JavaDoc _version;
172
173
174     /**
175      * The indentation level, or zero if no indentation
176      * was requested.
177      */

178     private int _indent = 0;
179
180
181     /**
182      * The encoding to use, if an input stream is used.
183      * The default is always UTF-8.
184      */

185     private String JavaDoc _encoding = Defaults.Encoding;
186
187     /**
188      * The EncodingInfo instance for _encoding.
189      */

190     private EncodingInfo _encodingInfo = null;
191
192     // whether java names for encodings are permitted
193
private boolean _allowJavaNames = false;
194
195     /**
196      * The specified media type or null.
197      */

198     private String JavaDoc _mediaType;
199
200
201     /**
202      * The specified document type system identifier, or null.
203      */

204     private String JavaDoc _doctypeSystem;
205
206
207     /**
208      * The specified document type public identifier, or null.
209      */

210     private String JavaDoc _doctypePublic;
211
212
213     /**
214      * Ture if the XML declaration should be ommited;
215      */

216     private boolean _omitXmlDeclaration = false;
217
218
219     /**
220      * Ture if the DOCTYPE declaration should be ommited;
221      */

222     private boolean _omitDoctype = false;
223
224
225     /**
226      * Ture if comments should be ommited;
227      */

228     private boolean _omitComments = false;
229
230
231     /**
232      * Ture if the comments should be ommited;
233      */

234     private boolean _stripComments = false;
235
236
237     /**
238      * True if the document type should be marked as standalone.
239      */

240     private boolean _standalone = false;
241
242
243     /**
244      * List of element tag names whose text node children must
245      * be output as CDATA.
246      */

247     private String JavaDoc[] _cdataElements;
248
249
250     /**
251      * List of element tag names whose text node children must
252      * be output unescaped.
253      */

254     private String JavaDoc[] _nonEscapingElements;
255
256
257     /**
258      * The selected line separator.
259      */

260     private String JavaDoc _lineSeparator = LineSeparator.Web;
261
262
263     /**
264      * The line width at which to wrap long lines when indenting.
265      */

266     private int _lineWidth = Defaults.LineWidth;
267
268
269     /**
270      * True if spaces should be preserved in elements that do not
271      * specify otherwise, or specify the default behavior.
272      */

273     private boolean _preserve = false;
274     /** If true, an empty string valued attribute is output as "". If false and
275      * and we are using the HTMLSerializer, then only the attribute name is
276      * serialized. Defaults to false for backwards compatibility.
277      */

278     private boolean _preserveEmptyAttributes = false;
279
280     /**
281      * Constructs a new output format with the default values.
282      */

283     public OutputFormat()
284     {
285     }
286
287
288     /**
289      * Constructs a new output format with the default values for
290      * the specified method and encoding. If <tt>indent</tt>
291      * is true, the document will be pretty printed with the default
292      * indentation level and default line wrapping.
293      *
294      * @param method The specified output method
295      * @param encoding The specified encoding
296      * @param indenting True for pretty printing
297      * @see #setEncoding
298      * @see #setIndenting
299      * @see #setMethod
300      */

301     public OutputFormat( String JavaDoc method, String JavaDoc encoding, boolean indenting )
302     {
303         setMethod( method );
304         setEncoding( encoding );
305         setIndenting( indenting );
306     }
307
308
309     /**
310      * Constructs a new output format with the proper method,
311      * document type identifiers and media type for the specified
312      * document.
313      *
314      * @param doc The document to output
315      * @see #whichMethod
316      */

317     public OutputFormat( Document JavaDoc doc )
318     {
319         setMethod( whichMethod( doc ) );
320         setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
321         setMediaType( whichMediaType( getMethod() ) );
322     }
323
324
325     /**
326      * Constructs a new output format with the proper method,
327      * document type identifiers and media type for the specified
328      * document, and with the specified encoding. If <tt>indent</tt>
329      * is true, the document will be pretty printed with the default
330      * indentation level and default line wrapping.
331      *
332      * @param doc The document to output
333      * @param encoding The specified encoding
334      * @param indenting True for pretty printing
335      * @see #setEncoding
336      * @see #setIndenting
337      * @see #whichMethod
338      */

339     public OutputFormat( Document JavaDoc doc, String JavaDoc encoding, boolean indenting )
340     {
341         this( doc );
342         setEncoding( encoding );
343         setIndenting( indenting );
344     }
345
346
347     /**
348      * Returns the method specified for this output format.
349      * Typically the method will be <tt>xml</tt>, <tt>html</tt>
350      * or <tt>text</tt>, but it might be other values.
351      * If no method was specified, null will be returned
352      * and the most suitable method will be determined for
353      * the document by calling {@link #whichMethod}.
354      *
355      * @return The specified output method, or null
356      */

357     public String JavaDoc getMethod()
358     {
359         return _method;
360     }
361
362
363     /**
364      * Sets the method for this output format.
365      *
366      * @see #getMethod
367      * @param method The output method, or null
368      */

369     public void setMethod( String JavaDoc method )
370     {
371         _method = method;
372     }
373
374
375     /**
376      * Returns the version for this output method.
377      * If no version was specified, will return null
378      * and the default version number will be used.
379      * If the serializerr does not support that particular
380      * version, it should default to a supported version.
381      *
382      * @return The specified method version, or null
383      */

384     public String JavaDoc getVersion()
385     {
386         return _version;
387     }
388
389
390     /**
391      * Sets the version for this output method.
392      * For XML the value would be "1.0", for HTML
393      * it would be "4.0".
394      *
395      * @see #getVersion
396      * @param version The output method version, or null
397      */

398     public void setVersion( String JavaDoc version )
399     {
400         _version = version;
401     }
402
403
404     /**
405      * Returns the indentation specified. If no indentation
406      * was specified, zero is returned and the document
407      * should not be indented.
408      *
409      * @return The indentation or zero
410      * @see #setIndenting
411      */

412     public int getIndent()
413     {
414         return _indent;
415     }
416
417
418     /**
419      * Returns true if indentation was specified.
420      */

421     public boolean getIndenting()
422     {
423         return ( _indent > 0 );
424     }
425
426
427     /**
428      * Sets the indentation. The document will not be
429      * indented if the indentation is set to zero.
430      * Calling {@link #setIndenting} will reset this
431      * value to zero (off) or the default (on).
432      *
433      * @param indent The indentation, or zero
434      */

435     public void setIndent( int indent )
436     {
437         if ( indent < 0 )
438             _indent = 0;
439         else
440             _indent = indent;
441     }
442
443
444     /**
445      * Sets the indentation on and off. When set on, the default
446      * indentation level and default line wrapping is used
447      * (see {@link Defaults#Indent} and {@link Defaults#LineWidth}).
448      * To specify a different indentation level or line wrapping,
449      * use {@link #setIndent} and {@link #setLineWidth}.
450      *
451      * @param on True if indentation should be on
452      */

453     public void setIndenting( boolean on )
454     {
455         if ( on ) {
456             _indent = Defaults.Indent;
457             _lineWidth = Defaults.LineWidth;
458         } else {
459             _indent = 0;
460             _lineWidth = 0;
461         }
462     }
463
464
465     /**
466      * Returns the specified encoding. If no encoding was
467      * specified, the default is always "UTF-8".
468      *
469      * @return The encoding
470      */

471     public String JavaDoc getEncoding()
472     {
473         return _encoding;
474     }
475
476
477     /**
478      * Sets the encoding for this output method. If no
479      * encoding was specified, the default is always "UTF-8".
480      * Make sure the encoding is compatible with the one
481      * used by the {@link java.io.Writer}.
482      *
483      * @see #getEncoding
484      * @param encoding The encoding, or null
485      */

486     public void setEncoding( String JavaDoc encoding )
487     {
488         _encoding = encoding;
489         _encodingInfo = null;
490     }
491
492     /**
493      * Sets the encoding for this output method with an <code>EncodingInfo</code>
494      * instance.
495      */

496     public void setEncoding(EncodingInfo encInfo) {
497         _encoding = encInfo.getIANAName();
498         _encodingInfo = encInfo;
499     }
500
501     /**
502      * Returns an <code>EncodingInfo<code> instance for the encoding.
503      *
504      * @see #setEncoding
505      */

506     public EncodingInfo getEncodingInfo() throws UnsupportedEncodingException JavaDoc {
507         if (_encodingInfo == null)
508             _encodingInfo = Encodings.getEncodingInfo(_encoding, _allowJavaNames);
509         return _encodingInfo;
510     }
511
512     /**
513      * Sets whether java encoding names are permitted
514      */

515     public void setAllowJavaNames (boolean allow) {
516         _allowJavaNames = allow;
517     }
518
519     /**
520      * Returns whether java encoding names are permitted
521      */

522     public boolean setAllowJavaNames () {
523         return _allowJavaNames;
524     }
525
526     /**
527      * Returns the specified media type, or null.
528      * To determine the media type based on the
529      * document type, use {@link #whichMediaType}.
530      *
531      * @return The specified media type, or null
532      */

533     public String JavaDoc getMediaType()
534     {
535         return _mediaType;
536     }
537
538
539     /**
540      * Sets the media type.
541      *
542      * @see #getMediaType
543      * @param mediaType The specified media type
544      */

545     public void setMediaType( String JavaDoc mediaType )
546     {
547         _mediaType = mediaType;
548     }
549
550
551     /**
552      * Sets the document type public and system identifiers.
553      * Required only if the DOM Document or SAX events do not
554      * specify the document type, and one must be present in
555      * the serialized document. Any document type specified
556      * by the DOM Document or SAX events will override these
557      * values.
558      *
559      * @param publicId The public identifier, or null
560      * @param systemId The system identifier, or null
561      */

562     public void setDoctype( String JavaDoc publicId, String JavaDoc systemId )
563     {
564         _doctypePublic = publicId;
565         _doctypeSystem = systemId;
566     }
567
568
569     /**
570      * Returns the specified document type public identifier,
571      * or null.
572      */

573     public String JavaDoc getDoctypePublic()
574     {
575         return _doctypePublic;
576     }
577
578
579     /**
580      * Returns the specified document type system identifier,
581      * or null.
582      */

583     public String JavaDoc getDoctypeSystem()
584     {
585         return _doctypeSystem;
586     }
587
588
589     /**
590      * Returns true if comments should be ommited.
591      * The default is false.
592      */

593     public boolean getOmitComments()
594     {
595         return _omitComments;
596     }
597
598
599     /**
600      * Sets comment omitting on and off.
601      *
602      * @param omit True if comments should be ommited
603      */

604     public void setOmitComments( boolean omit )
605     {
606         _omitComments = omit;
607     }
608
609
610     /**
611      * Returns true if the DOCTYPE declaration should
612      * be ommited. The default is false.
613      */

614     public boolean getOmitDocumentType()
615     {
616         return _omitDoctype;
617     }
618
619
620     /**
621      * Sets DOCTYPE declaration omitting on and off.
622      *
623      * @param omit True if DOCTYPE declaration should be ommited
624      */

625     public void setOmitDocumentType( boolean omit )
626     {
627         _omitDoctype = omit;
628     }
629
630
631     /**
632      * Returns true if the XML document declaration should
633      * be ommited. The default is false.
634      */

635     public boolean getOmitXMLDeclaration()
636     {
637         return _omitXmlDeclaration;
638     }
639
640
641     /**
642      * Sets XML declaration omitting on and off.
643      *
644      * @param omit True if XML declaration should be ommited
645      */

646     public void setOmitXMLDeclaration( boolean omit )
647     {
648         _omitXmlDeclaration = omit;
649     }
650
651
652     /**
653      * Returns true if the document type is standalone.
654      * The default is false.
655      */

656     public boolean getStandalone()
657     {
658         return _standalone;
659     }
660
661
662     /**
663      * Sets document DTD standalone. The public and system
664      * identifiers must be null for the document to be
665      * serialized as standalone.
666      *
667      * @param standalone True if document DTD is standalone
668      */

669     public void setStandalone( boolean standalone )
670     {
671         _standalone = standalone;
672     }
673
674
675     /**
676      * Returns a list of all the elements whose text node children
677      * should be output as CDATA, or null if no such elements were
678      * specified.
679      */

680     public String JavaDoc[] getCDataElements()
681     {
682         return _cdataElements;
683     }
684
685
686     /**
687      * Returns true if the text node children of the given elements
688      * should be output as CDATA.
689      *
690      * @param tagName The element's tag name
691      * @return True if should serialize as CDATA
692      */

693     public boolean isCDataElement( String JavaDoc tagName )
694     {
695         int i;
696
697         if ( _cdataElements == null )
698             return false;
699         for ( i = 0 ; i < _cdataElements.length ; ++i )
700             if ( _cdataElements[ i ].equals( tagName ) )
701                 return true;
702         return false;
703     }
704
705
706     /**
707      * Sets the list of elements for which text node children
708      * should be output as CDATA.
709      *
710      * @param cdataElements List of CDATA element tag names
711      */

712     public void setCDataElements( String JavaDoc[] cdataElements )
713     {
714         _cdataElements = cdataElements;
715     }
716
717
718     /**
719      * Returns a list of all the elements whose text node children
720      * should be output unescaped (no character references), or null
721      * if no such elements were specified.
722      */

723     public String JavaDoc[] getNonEscapingElements()
724     {
725         return _nonEscapingElements;
726     }
727
728
729     /**
730      * Returns true if the text node children of the given elements
731      * should be output unescaped.
732      *
733      * @param tagName The element's tag name
734      * @return True if should serialize unescaped
735      */

736     public boolean isNonEscapingElement( String JavaDoc tagName )
737     {
738         int i;
739
740         if ( _nonEscapingElements == null ) {
741             return false;
742         }
743         for ( i = 0 ; i < _nonEscapingElements.length ; ++i )
744             if ( _nonEscapingElements[ i ].equals( tagName ) )
745                 return true;
746         return false;
747     }
748
749
750     /**
751      * Sets the list of elements for which text node children
752      * should be output unescaped (no character references).
753      *
754      * @param nonEscapingElements List of unescaped element tag names
755      */

756     public void setNonEscapingElements( String JavaDoc[] nonEscapingElements )
757     {
758         _nonEscapingElements = nonEscapingElements;
759     }
760
761
762
763     /**
764      * Returns a specific line separator to use. The default is the
765      * Web line separator (<tt>\n</tt>). A string is returned to
766      * support double codes (CR + LF).
767      *
768      * @return The specified line separator
769      */

770     public String JavaDoc getLineSeparator()
771     {
772         return _lineSeparator;
773     }
774
775
776     /**
777      * Sets the line separator. The default is the Web line separator
778      * (<tt>\n</tt>). The machine's line separator can be obtained
779      * from the system property <tt>line.separator</tt>, but is only
780      * useful if the document is edited on machines of the same type.
781      * For general documents, use the Web line separator.
782      *
783      * @param lineSeparator The specified line separator
784      */

785     public void setLineSeparator( String JavaDoc lineSeparator )
786     {
787         if ( lineSeparator == null )
788             _lineSeparator = LineSeparator.Web;
789         else
790             _lineSeparator = lineSeparator;
791     }
792
793
794     /**
795      * Returns true if the default behavior for this format is to
796      * preserve spaces. All elements that do not specify otherwise
797      * or specify the default behavior will be formatted based on
798      * this rule. All elements that specify space preserving will
799      * always preserve space.
800      */

801     public boolean getPreserveSpace()
802     {
803         return _preserve;
804     }
805
806
807     /**
808      * Sets space preserving as the default behavior. The default is
809      * space stripping and all elements that do not specify otherwise
810      * or use the default value will not preserve spaces.
811      *
812      * @param preserve True if spaces should be preserved
813      */

814     public void setPreserveSpace( boolean preserve )
815     {
816         _preserve = preserve;
817     }
818
819
820     /**
821      * Return the selected line width for breaking up long lines.
822      * When indenting, and only when indenting, long lines will be
823      * broken at space boundaries based on this line width.
824      * No line wrapping occurs if this value is zero.
825      */

826     public int getLineWidth()
827     {
828         return _lineWidth;
829     }
830
831
832     /**
833      * Sets the line width. If zero then no line wrapping will
834      * occur. Calling {@link #setIndenting} will reset this
835      * value to zero (off) or the default (on).
836      *
837      * @param lineWidth The line width to use, zero for default
838      * @see #getLineWidth
839      * @see #setIndenting
840      */

841     public void setLineWidth( int lineWidth )
842     {
843         if ( lineWidth <= 0 )
844             _lineWidth = 0;
845         else
846             _lineWidth = lineWidth;
847     }
848     /**
849      * Returns the preserveEmptyAttribute flag. If flag is false, then'
850      * attributes with empty string values are output as the attribute
851      * name only (in HTML mode).
852      * @return preserve the preserve flag
853      */
public boolean getPreserveEmptyAttributes () { return _preserveEmptyAttributes; } /**
854      * Sets the preserveEmptyAttribute flag. If flag is false, then'
855      * attributes with empty string values are output as the attribute
856      * name only (in HTML mode).
857      * @param preserve the preserve flag
858      */
public void setPreserveEmptyAttributes (boolean preserve) { _preserveEmptyAttributes = preserve; }
859
860     /**
861      * Returns the last printable character based on the selected
862      * encoding. Control characters and non-printable characters
863      * are always printed as character references.
864      */

865     public char getLastPrintable()
866     {
867         if ( getEncoding() != null &&
868              ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
869             return 0xFF;
870         else
871             return 0xFFFF;
872     }
873
874
875     /**
876      * Determine the output method for the specified document.
877      * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument}
878      * then the method is said to be <tt>html</tt>. If the root
879      * element is 'html' and all text nodes preceding the root
880      * element are all whitespace, then the method is said to be
881      * <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
882      *
883      * @param doc The document to check
884      * @return The suitable method
885      */

886     public static String JavaDoc whichMethod( Document JavaDoc doc )
887     {
888         Node JavaDoc node;
889         String JavaDoc value;
890         int i;
891
892         // If document is derived from HTMLDocument then the default
893
// method is html.
894
if ( doc instanceof HTMLDocument )
895             return Method.HTML;
896
897         // Lookup the root element and the text nodes preceding it.
898
// If root element is html and all text nodes contain whitespace
899
// only, the method is html.
900

901         // FIXME (SM) should we care about namespaces here?
902

903         node = doc.getFirstChild();
904         while (node != null) {
905             // If the root element is html, the method is html.
906
if ( node.getNodeType() == Node.ELEMENT_NODE ) {
907                 if ( node.getNodeName().equalsIgnoreCase( "html" ) ) {
908                     return Method.HTML;
909                 } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) {
910                     return Method.FOP;
911                 } else {
912                     return Method.XML;
913                 }
914             } else if ( node.getNodeType() == Node.TEXT_NODE ) {
915                 // If a text node preceding the root element contains
916
// only whitespace, this might be html, otherwise it's
917
// definitely xml.
918
value = node.getNodeValue();
919                 for ( i = 0 ; i < value.length() ; ++i )
920                     if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A &&
921                          value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D )
922                         return Method.XML;
923             }
924             node = node.getNextSibling();
925         }
926         // Anything else, the method is xml.
927
return Method.XML;
928     }
929
930
931     /**
932      * Returns the document type public identifier
933      * specified for this document, or null.
934      */

935     public static String JavaDoc whichDoctypePublic( Document JavaDoc doc )
936     {
937         DocumentType JavaDoc doctype;
938
939            /* DOM Level 2 was introduced into the code base*/
940            doctype = doc.getDoctype();
941            if ( doctype != null ) {
942            // Note on catch: DOM Level 1 does not specify this method
943
// and the code will throw a NoSuchMethodError
944
try {
945            return doctype.getPublicId();
946            } catch ( Error JavaDoc except ) { }
947            }
948         
949         if ( doc instanceof HTMLDocument )
950             return DTD.XHTMLPublicId;
951         return null;
952     }
953
954
955     /**
956      * Returns the document type system identifier
957      * specified for this document, or null.
958      */

959     public static String JavaDoc whichDoctypeSystem( Document JavaDoc doc )
960     {
961         DocumentType JavaDoc doctype;
962
963         /* DOM Level 2 was introduced into the code base*/
964            doctype = doc.getDoctype();
965            if ( doctype != null ) {
966            // Note on catch: DOM Level 1 does not specify this method
967
// and the code will throw a NoSuchMethodError
968
try {
969            return doctype.getSystemId();
970            } catch ( Error JavaDoc except ) { }
971            }
972         
973         if ( doc instanceof HTMLDocument )
974             return DTD.XHTMLSystemId;
975         return null;
976     }
977
978
979     /**
980      * Returns the suitable media format for a document
981      * output with the specified method.
982      */

983     public static String JavaDoc whichMediaType( String JavaDoc method )
984     {
985         if ( method.equalsIgnoreCase( Method.XML ) )
986             return "text/xml";
987         if ( method.equalsIgnoreCase( Method.HTML ) )
988             return "text/html";
989         if ( method.equalsIgnoreCase( Method.XHTML ) )
990             return "text/html";
991         if ( method.equalsIgnoreCase( Method.TEXT ) )
992             return "text/plain";
993         if ( method.equalsIgnoreCase( Method.FOP ) )
994             return "application/pdf";
995         return null;
996     }
997
998
999 }
1000
1001
Popular Tags