KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xml > serialize > OutputFormat


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58
59 // Aug 21, 2000:
60
// Added ability to omit DOCTYPE declaration.
61
// Reported by Lars Martin <lars@smb-tec.com>
62
// Aug 25, 2000:
63
// Added ability to omit comments.
64
// Contributed by Anupam Bagchi <abagchi@jtcsv.com>
65

66
67 package org.enhydra.apache.xml.serialize;
68
69
70 import org.w3c.dom.Document JavaDoc;
71 import org.w3c.dom.DocumentType JavaDoc;
72 import org.w3c.dom.Node JavaDoc;
73 import org.w3c.dom.html.HTMLDocument;
74
75
76 /**
77  * Specifies an output format to control the serializer. Based on the
78  * XSLT specification for output format, plus additional parameters.
79  * Used to select the suitable serializer and determine how the
80  * document should be formatted on output.
81  * <p>
82  * The two interesting constructors are:
83  * <ul>
84  * <li>{@link #OutputFormat(String,String,boolean)} creates a format
85  * for the specified method (XML, HTML, Text, etc), encoding and indentation
86  * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
87  * compatible with the document type (XML, HTML, Text, etc), encoding and
88  * indentation
89  * </ul>
90  *
91  *
92  * @version $Revision: 1.2 $ $Date: 2005/01/26 08:28:45 $
93  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
94  * <a HREF="mailto:visco@intalio.com">Keith Visco</a>
95  * @see Serializer
96  * @see Method
97  * @see LineSeparator
98  */

99 public class OutputFormat
100 {
101
102
103     public static class DTD
104     {
105
106         /**
107          * Public identifier for HTML document type.
108          */

109         public static final String JavaDoc HTMLPublicId = "-//W3C//DTD HTML 4.0//EN";
110
111         /**
112          * System identifier for HTML document type.
113          */

114         public static final String JavaDoc HTMLSystemId =
115             "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";
116
117         /**
118          * Public identifier for XHTML document type.
119          */

120         public static final String JavaDoc XHTMLPublicId =
121             "-//W3C//DTD XHTML 1.0 Strict//EN";
122
123         /**
124          * System identifier for XHTML document type.
125          */

126         public static final String JavaDoc XHTMLSystemId =
127             "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";
128
129     }
130
131
132     public static class Defaults
133     {
134
135         /**
136          * If indentation is turned on, the default identation
137          * level is 4.
138          *
139          * @see #setIndenting(boolean)
140          */

141         public static final int Indent = 4;
142
143         /**
144          * The default encoding for Web documents it UTF-8.
145          *
146          * @see #getEncoding()
147          */

148         public static final String JavaDoc Encoding = "UTF-8";
149
150         /**
151          * The default line width at which to break long lines
152          * when identing. This is set to 72.
153          */

154         public static final int LineWidth = 72;
155
156     }
157
158
159     /**
160      * Holds the output method specified for this document,
161      * or null if no method was specified.
162      */

163     private String JavaDoc _method;
164
165
166     /**
167      * Specifies the version of the output method.
168      */

169     private String JavaDoc _version;
170
171
172     /**
173      * The indentation level, or zero if no indentation
174      * was requested.
175      */

176     private int _indent = 0;
177
178
179     /**
180      * The encoding to use, if an input stream is used.
181      * The default is always UTF-8.
182      */

183     private String JavaDoc _encoding = Defaults.Encoding;
184
185     /**
186      * The EncodingInfo instance for _encoding.
187      */

188     private EncodingInfo _encodingInfo = null;
189
190     /**
191      * The specified media type or null.
192      */

193     private String JavaDoc _mediaType;
194
195
196     /**
197      * The specified document type system identifier, or null.
198      */

199     private String JavaDoc _doctypeSystem;
200
201
202     /**
203      * The specified document type public identifier, or null.
204      */

205     private String JavaDoc _doctypePublic;
206
207
208     /**
209      * Ture if the XML declaration should be ommited;
210      */

211     private boolean _omitXmlDeclaration = false;
212
213
214     /**
215      * Ture if the DOCTYPE declaration should be ommited;
216      */

217     private boolean _omitDoctype = false;
218
219
220     /**
221      * Ture if comments should be ommited;
222      */

223     private boolean _omitComments = false;
224
225
226     /**
227      * Ture if the comments should be ommited;
228      */

229     private boolean _stripComments = false;
230
231
232     /**
233      * True if the document type should be marked as standalone.
234      */

235     private boolean _standalone = false;
236
237
238     /**
239      * List of element tag names whose text node children must
240      * be output as CDATA.
241      */

242     private String JavaDoc[] _cdataElements;
243
244
245     /**
246      * List of element tag names whose text node children must
247      * be output unescaped.
248      */

249     private String JavaDoc[] _nonEscapingElements;
250
251
252     /**
253      * The selected line separator.
254      */

255     private String JavaDoc _lineSeparator = LineSeparator.Web;
256
257
258     /**
259      * The line width at which to wrap long lines when indenting.
260      */

261     private int _lineWidth = Defaults.LineWidth;
262
263
264     /**
265      * True if spaces should be preserved in elements that do not
266      * specify otherwise, or specify the default behavior.
267      */

268     private boolean _preserve = false;
269     /** If true, an empty string valued attribute is output as "". If false and
270      * and we are using the HTMLSerializer, then only the attribute name is
271      * serialized. Defaults to false for backwards compatibility.
272      */

273     private boolean _preserveEmptyAttributes = false;
274
275     /**
276      * Constructs a new output format with the default values.
277      */

278     public OutputFormat()
279     {
280     }
281
282
283     /**
284      * Constructs a new output format with the default values for
285      * the specified method and encoding. If <tt>indent</tt>
286      * is true, the document will be pretty printed with the default
287      * indentation level and default line wrapping.
288      *
289      * @param method The specified output method
290      * @param encoding The specified encoding
291      * @param indenting True for pretty printing
292      * @see #setEncoding
293      * @see #setIndenting
294      * @see #setMethod
295      */

296     public OutputFormat( String JavaDoc method, String JavaDoc encoding, boolean indenting )
297     {
298         setMethod( method );
299         setEncoding( encoding );
300         setIndenting( indenting );
301     }
302
303
304     /**
305      * Constructs a new output format with the proper method,
306      * document type identifiers and media type for the specified
307      * document.
308      *
309      * @param doc The document to output
310      * @see #whichMethod
311      */

312     public OutputFormat( Document JavaDoc doc )
313     {
314         setMethod( whichMethod( doc ) );
315         setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
316         setMediaType( whichMediaType( getMethod() ) );
317     }
318
319
320     /**
321      * Constructs a new output format with the proper method,
322      * document type identifiers and media type for the specified
323      * document, and with the specified encoding. If <tt>indent</tt>
324      * is true, the document will be pretty printed with the default
325      * indentation level and default line wrapping.
326      *
327      * @param doc The document to output
328      * @param encoding The specified encoding
329      * @param indenting True for pretty printing
330      * @see #setEncoding
331      * @see #setIndenting
332      * @see #whichMethod
333      */

334     public OutputFormat( Document JavaDoc doc, String JavaDoc encoding, boolean indenting )
335     {
336         this( doc );
337         setEncoding( encoding );
338         setIndenting( indenting );
339     }
340
341
342     /**
343      * Returns the method specified for this output format.
344      * Typically the method will be <tt>xml</tt>, <tt>html</tt>
345      * or <tt>text</tt>, but it might be other values.
346      * If no method was specified, null will be returned
347      * and the most suitable method will be determined for
348      * the document by calling {@link #whichMethod}.
349      *
350      * @return The specified output method, or null
351      */

352     public String JavaDoc getMethod()
353     {
354         return _method;
355     }
356
357
358     /**
359      * Sets the method for this output format.
360      *
361      * @see #getMethod
362      * @param method The output method, or null
363      */

364     public void setMethod( String JavaDoc method )
365     {
366         _method = method;
367     }
368
369
370     /**
371      * Returns the version for this output method.
372      * If no version was specified, will return null
373      * and the default version number will be used.
374      * If the serializerr does not support that particular
375      * version, it should default to a supported version.
376      *
377      * @return The specified method version, or null
378      */

379     public String JavaDoc getVersion()
380     {
381         return _version;
382     }
383
384
385     /**
386      * Sets the version for this output method.
387      * For XML the value would be "1.0", for HTML
388      * it would be "4.0".
389      *
390      * @see #getVersion
391      * @param version The output method version, or null
392      */

393     public void setVersion( String JavaDoc version )
394     {
395         _version = version;
396     }
397
398
399     /**
400      * Returns the indentation specified. If no indentation
401      * was specified, zero is returned and the document
402      * should not be indented.
403      *
404      * @return The indentation or zero
405      * @see #setIndenting
406      */

407     public int getIndent()
408     {
409         return _indent;
410     }
411
412
413     /**
414      * Returns true if indentation was specified.
415      */

416     public boolean getIndenting()
417     {
418         return ( _indent > 0 );
419     }
420
421
422     /**
423      * Sets the indentation. The document will not be
424      * indented if the indentation is set to zero.
425      * Calling {@link #setIndenting} will reset this
426      * value to zero (off) or the default (on).
427      *
428      * @param indent The indentation, or zero
429      */

430     public void setIndent( int indent )
431     {
432         if ( indent < 0 )
433             _indent = 0;
434         else
435             _indent = indent;
436     }
437
438
439     /**
440      * Sets the indentation on and off. When set on, the default
441      * indentation level and default line wrapping is used
442      * (see {@link #DEFAULT_INDENT} and {@link #DEFAULT_LINE_WIDTH}).
443      * To specify a different indentation level or line wrapping,
444      * use {@link #setIndent} and {@link #setLineWidth}.
445      *
446      * @param on True if indentation should be on
447      */

448     public void setIndenting( boolean on )
449     {
450         if ( on ) {
451             _indent = Defaults.Indent;
452             _lineWidth = Defaults.LineWidth;
453         } else {
454             _indent = 0;
455             _lineWidth = 0;
456         }
457     }
458
459
460     /**
461      * Returns the specified encoding. If no encoding was
462      * specified, the default is always "UTF-8".
463      *
464      * @return The encoding
465      */

466     public String JavaDoc getEncoding()
467     {
468         return _encoding;
469     }
470
471
472     /**
473      * Sets the encoding for this output method. If no
474      * encoding was specified, the default is always "UTF-8".
475      * Make sure the encoding is compatible with the one
476      * used by the {@link java.io.Writer}.
477      *
478      * @see #getEncoding
479      * @param encoding The encoding, or null
480      */

481     public void setEncoding( String JavaDoc encoding )
482     {
483         _encoding = encoding;
484         _encodingInfo = null;
485     }
486
487     /**
488      * Sets the encoding for this output method with an <code>EncodingInfo</code>
489      * instance.
490      */

491     public void setEncoding(EncodingInfo encInfo) {
492         _encoding = encInfo.getName();
493         _encodingInfo = encInfo;
494     }
495
496     /**
497      * Returns an <code>EncodingInfo<code> instance for the encoding.
498      *
499      * @see setEncoding
500      */

501     public EncodingInfo getEncodingInfo() {
502         if (_encodingInfo == null)
503             _encodingInfo = Encodings.getEncodingInfo(_encoding);
504         return _encodingInfo;
505     }
506
507     /**
508      * Returns the specified media type, or null.
509      * To determine the media type based on the
510      * document type, use {@link #whichMediaType}.
511      *
512      * @return The specified media type, or null
513      */

514     public String JavaDoc getMediaType()
515     {
516         return _mediaType;
517     }
518
519
520     /**
521      * Sets the media type.
522      *
523      * @see #getMediaType
524      * @param mediaType The specified media type
525      */

526     public void setMediaType( String JavaDoc mediaType )
527     {
528         _mediaType = mediaType;
529     }
530
531
532     /**
533      * Sets the document type public and system identifiers.
534      * Required only if the DOM Document or SAX events do not
535      * specify the document type, and one must be present in
536      * the serialized document. Any document type specified
537      * by the DOM Document or SAX events will override these
538      * values.
539      *
540      * @param publicId The public identifier, or null
541      * @param systemId The system identifier, or null
542      */

543     public void setDoctype( String JavaDoc publicId, String JavaDoc systemId )
544     {
545         _doctypePublic = publicId;
546         _doctypeSystem = systemId;
547     }
548
549
550     /**
551      * Returns the specified document type public identifier,
552      * or null.
553      */

554     public String JavaDoc getDoctypePublic()
555     {
556         return _doctypePublic;
557     }
558
559
560     /**
561      * Returns the specified document type system identifier,
562      * or null.
563      */

564     public String JavaDoc getDoctypeSystem()
565     {
566         return _doctypeSystem;
567     }
568
569
570     /**
571      * Returns true if comments should be ommited.
572      * The default is false.
573      */

574     public boolean getOmitComments()
575     {
576         return _omitComments;
577     }
578
579
580     /**
581      * Sets comment omitting on and off.
582      *
583      * @param omit True if comments should be ommited
584      */

585     public void setOmitComments( boolean omit )
586     {
587         _omitComments = omit;
588     }
589
590
591     /**
592      * Returns true if the DOCTYPE declaration should
593      * be ommited. The default is false.
594      */

595     public boolean getOmitDocumentType()
596     {
597         return _omitDoctype;
598     }
599
600
601     /**
602      * Sets DOCTYPE declaration omitting on and off.
603      *
604      * @param omit True if DOCTYPE declaration should be ommited
605      */

606     public void setOmitDocumentType( boolean omit )
607     {
608         _omitDoctype = omit;
609     }
610
611
612     /**
613      * Returns true if the XML document declaration should
614      * be ommited. The default is false.
615      */

616     public boolean getOmitXMLDeclaration()
617     {
618         return _omitXmlDeclaration;
619     }
620
621
622     /**
623      * Sets XML declaration omitting on and off.
624      *
625      * @param omit True if XML declaration should be ommited
626      */

627     public void setOmitXMLDeclaration( boolean omit )
628     {
629         _omitXmlDeclaration = omit;
630     }
631
632
633     /**
634      * Returns true if the document type is standalone.
635      * The default is false.
636      */

637     public boolean getStandalone()
638     {
639         return _standalone;
640     }
641
642
643     /**
644      * Sets document DTD standalone. The public and system
645      * identifiers must be null for the document to be
646      * serialized as standalone.
647      *
648      * @param standalone True if document DTD is standalone
649      */

650     public void setStandalone( boolean standalone )
651     {
652         _standalone = standalone;
653     }
654
655
656     /**
657      * Returns a list of all the elements whose text node children
658      * should be output as CDATA, or null if no such elements were
659      * specified.
660      */

661     public String JavaDoc[] getCDataElements()
662     {
663         return _cdataElements;
664     }
665
666
667     /**
668      * Returns true if the text node children of the given elements
669      * should be output as CDATA.
670      *
671      * @param tagName The element's tag name
672      * @return True if should serialize as CDATA
673      */

674     public boolean isCDataElement( String JavaDoc tagName )
675     {
676         int i;
677
678         if ( _cdataElements == null )
679             return false;
680         for ( i = 0 ; i < _cdataElements.length ; ++i )
681             if ( _cdataElements[ i ].equals( tagName ) )
682                 return true;
683         return false;
684     }
685
686
687     /**
688      * Sets the list of elements for which text node children
689      * should be output as CDATA.
690      *
691      * @param cdataElements List of CDATA element tag names
692      */

693     public void setCDataElements( String JavaDoc[] cdataElements )
694     {
695         _cdataElements = cdataElements;
696     }
697
698
699     /**
700      * Returns a list of all the elements whose text node children
701      * should be output unescaped (no character references), or null
702      * if no such elements were specified.
703      */

704     public String JavaDoc[] getNonEscapingElements()
705     {
706         return _nonEscapingElements;
707     }
708
709
710     /**
711      * Returns true if the text node children of the given elements
712      * should be output unescaped.
713      *
714      * @param tagName The element's tag name
715      * @return True if should serialize unescaped
716      */

717     public boolean isNonEscapingElement( String JavaDoc tagName )
718     {
719         int i;
720
721         if ( _nonEscapingElements == null )
722             return false;
723         for ( i = 0 ; i < _nonEscapingElements.length ; ++i )
724             if ( _nonEscapingElements[ i ].equals( tagName ) )
725                 return true;
726         return false;
727     }
728
729
730     /**
731      * Sets the list of elements for which text node children
732      * should be output unescaped (no character references).
733      *
734      * @param nonEscapingElements List of unescaped element tag names
735      */

736     public void setNonEscapingElements( String JavaDoc[] nonEscapingElements )
737     {
738         _nonEscapingElements = nonEscapingElements;
739     }
740
741
742
743     /**
744      * Returns a specific line separator to use. The default is the
745      * Web line separator (<tt>\n</tt>). A string is returned to
746      * support double codes (CR + LF).
747      *
748      * @return The specified line separator
749      */

750     public String JavaDoc getLineSeparator()
751     {
752         return _lineSeparator;
753     }
754
755
756     /**
757      * Sets the line separator. The default is the Web line separator
758      * (<tt>\n</tt>). The machine's line separator can be obtained
759      * from the system property <tt>line.separator</tt>, but is only
760      * useful if the document is edited on machines of the same type.
761      * For general documents, use the Web line separator.
762      *
763      * @param lineSeparator The specified line separator
764      */

765     public void setLineSeparator( String JavaDoc lineSeparator )
766     {
767         if ( lineSeparator == null )
768             _lineSeparator = LineSeparator.Web;
769         else
770             _lineSeparator = lineSeparator;
771     }
772
773
774     /**
775      * Returns true if the default behavior for this format is to
776      * preserve spaces. All elements that do not specify otherwise
777      * or specify the default behavior will be formatted based on
778      * this rule. All elements that specify space preserving will
779      * always preserve space.
780      */

781     public boolean getPreserveSpace()
782     {
783         return _preserve;
784     }
785
786
787     /**
788      * Sets space preserving as the default behavior. The default is
789      * space stripping and all elements that do not specify otherwise
790      * or use the default value will not preserve spaces.
791      *
792      * @param preserve True if spaces should be preserved
793      */

794     public void setPreserveSpace( boolean preserve )
795     {
796         _preserve = preserve;
797     }
798
799
800     /**
801      * Return the selected line width for breaking up long lines.
802      * When indenting, and only when indenting, long lines will be
803      * broken at space boundaries based on this line width.
804      * No line wrapping occurs if this value is zero.
805      */

806     public int getLineWidth()
807     {
808         return _lineWidth;
809     }
810
811
812     /**
813      * Sets the line width. If zero then no line wrapping will
814      * occur. Calling {@link #setIndenting} will reset this
815      * value to zero (off) or the default (on).
816      *
817      * @param lineWidth The line width to use, zero for default
818      * @see #getLineWidth
819      * @see #setIndenting
820      */

821     public void setLineWidth( int lineWidth )
822     {
823         if ( lineWidth <= 0 )
824             _lineWidth = 0;
825         else
826             _lineWidth = lineWidth;
827     }
828
829     /**
830      * Returns the preserveEmptyAttribute flag. If flag is false, then'
831      * attributes with empty string values are output as the attribute
832      * name only (in HTML mode).
833      * @return preserve the preserve flag
834      */

835     public boolean getPreserveEmptyAttributes () {
836         return _preserveEmptyAttributes;
837     }
838     /**
839      * Sets the preserveEmptyAttribute flag. If flag is false, then'
840      * attributes with empty string values are output as the attribute
841      * name only (in HTML mode).
842      * @param preserve the preserve flag
843      */

844     public void setPreserveEmptyAttributes (boolean preserve) {
845         _preserveEmptyAttributes = preserve;
846     }
847
848     /**
849      * Returns the last printable character based on the selected
850      * encoding. Control characters and non-printable characters
851      * are always printed as character references.
852      */

853     public char getLastPrintable()
854     {
855         if ( getEncoding() != null &&
856              ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
857             return 0xFF;
858         else
859             return 0xFFFF;
860     }
861
862
863     /**
864      * Determine the output method for the specified document.
865      * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument}
866      * then the method is said to be <tt>html</tt>. If the root
867      * element is 'html' and all text nodes preceding the root
868      * element are all whitespace, then the method is said to be
869      * <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
870      *
871      * @param doc The document to check
872      * @return The suitable method
873      */

874     public static String JavaDoc whichMethod( Document JavaDoc doc )
875     {
876         Node JavaDoc node;
877         String JavaDoc value;
878         int i;
879
880         // If document is derived from HTMLDocument then the default
881
// method is html.
882
if ( doc instanceof HTMLDocument )
883             return Method.HTML;
884
885         // Lookup the root element and the text nodes preceding it.
886
// If root element is html and all text nodes contain whitespace
887
// only, the method is html.
888

889         // FIXME (SM) should we care about namespaces here?
890

891         node = doc.getFirstChild();
892         while (node != null) {
893             // If the root element is html, the method is html.
894
if ( node.getNodeType() == Node.ELEMENT_NODE ) {
895                 if ( node.getNodeName().equalsIgnoreCase( "html" ) ) {
896                     return Method.HTML;
897                 } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) {
898                     return Method.FOP;
899                 } else {
900                     return Method.XML;
901                 }
902             } else if ( node.getNodeType() == Node.TEXT_NODE ) {
903                 // If a text node preceding the root element contains
904
// only whitespace, this might be html, otherwise it's
905
// definitely xml.
906
value = node.getNodeValue();
907                 for ( i = 0 ; i < value.length() ; ++i )
908                     if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A &&
909                          value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D )
910                         return Method.XML;
911             }
912             node = node.getNextSibling();
913         }
914         // Anything else, the method is xml.
915
return Method.XML;
916     }
917
918
919     /**
920      * Returns the document type public identifier
921      * specified for this document, or null.
922      */

923     public static String JavaDoc whichDoctypePublic( Document JavaDoc doc )
924     {
925         DocumentType JavaDoc doctype;
926
927            /* DOM Level 2 was introduced into the code base*/
928            doctype = doc.getDoctype();
929            if ( doctype != null ) {
930            // Note on catch: DOM Level 1 does not specify this method
931
// and the code will throw a NoSuchMethodError
932
try {
933            return doctype.getPublicId();
934            } catch ( Error JavaDoc except ) { }
935            }
936         
937         if ( doc instanceof HTMLDocument )
938             return DTD.XHTMLPublicId;
939         return null;
940     }
941
942
943     /**
944      * Returns the document type system identifier
945      * specified for this document, or null.
946      */

947     public static String JavaDoc whichDoctypeSystem( Document JavaDoc doc )
948     {
949         DocumentType JavaDoc doctype;
950
951         /* DOM Level 2 was introduced into the code base*/
952            doctype = doc.getDoctype();
953            if ( doctype != null ) {
954            // Note on catch: DOM Level 1 does not specify this method
955
// and the code will throw a NoSuchMethodError
956
try {
957            return doctype.getSystemId();
958            } catch ( Error JavaDoc except ) { }
959            }
960         
961         if ( doc instanceof HTMLDocument )
962             return DTD.XHTMLSystemId;
963         return null;
964     }
965
966
967     /**
968      * Returns the suitable media format for a document
969      * output with the specified method.
970      */

971     public static String JavaDoc whichMediaType( String JavaDoc method )
972     {
973         if ( method.equalsIgnoreCase( Method.XML ) )
974             return "text/xml";
975         if ( method.equalsIgnoreCase( Method.HTML ) )
976             return "text/html";
977         if ( method.equalsIgnoreCase( Method.XHTML ) )
978             return "text/html";
979         if ( method.equalsIgnoreCase( Method.TEXT ) )
980             return "text/plain";
981         if ( method.equalsIgnoreCase( Method.FOP ) )
982             return "application/pdf";
983         return null;
984     }
985
986
987 }
988
989
Popular Tags