BaseMarkupSerializer


1   /**
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  /*
19   * This code has been borrowed from the Apache Xerces project. We're copying the code to
20   * keep from adding a dependency on Xerces in the Geronimo kernel.
21   */
22  
23  package org.apache.geronimo.system.configuration;
24  
25  import java.io.Writer  ;
26  import java.io.OutputStream  ;
27  import java.io.IOException  ;
28  import java.util.Vector  ;
29  import java.util.Hashtable  ;
30  
31  import org.w3c.dom.Document  ;
32  import org.w3c.dom.DocumentFragment  ;
33  import org.w3c.dom.DocumentType  ;
34  import org.w3c.dom.Element  ;
35  import org.w3c.dom.Node  ;
36  import org.xml.sax.Locator  ;
37  import org.xml.sax.SAXException  ;
38  
39  /**
40   * Base class for a serializer supporting both DOM and SAX pretty
41   * serializing of XML/HTML/XHTML documents. Derives classes perform
42   * the method-specific serializing, this class provides the common
43   * serializing mechanisms.
44   * <p>
45   * The serializer must be initialized with the proper writer and
46   * output format before it can be used by calling {@link #init}.
47   * The serializer can be reused any number of times, but cannot
48   * be used concurrently by two threads.
49   * <p>
50   * If an output stream is used, the encoding is taken from the
51   * output format (defaults to <tt>UTF-8</tt>). If a writer is
52   * used, make sure the writer uses the same encoding (if applies)
53   * as specified in the output format.
54   * <p>
55   * The serializer supports both DOM and SAX. DOM serializing is done
56   * by calling {@link #serialize} and SAX serializing is done by firing
57   * SAX events and using the serializer as a document handler.
58   * This also applies to derived class.
59   * <p>
60   * If an I/O exception occurs while serializing, the serializer
61   * will not throw an exception directly, but only throw it
62   * at the end of serializing (either DOM or SAX's {@link
63   * org.xml.sax.DocumentHandler#endDocument}.
64   * <p>
65   * For elements that are not specified as whitespace preserving,
66   * the serializer will potentially break long text lines at space
67   * boundaries, indent lines, and serialize elements on separate
68   * lines. Line terminators will be regarded as spaces, and
69   * spaces at beginning of line will be stripped.
70   * <p>
71   * When indenting, the serializer is capable of detecting seemingly
72   * element content, and serializing these elements indented on separate
73   * lines. An element is serialized indented when it is the first or
74   * last child of an element, or immediate following or preceding
75   * another element.
76   *
77   *
78   * @version $Revision: 476049 $ $Date: 2006-11-16 23:35:17 -0500 (Thu, 16 Nov 2006) $
79   * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
80   * @see Serializer
81   * @see DOMSerializer
82   */
83  public abstract class BaseMarkupSerializer
84  {
85  
86      private EncodingInfo encodingInfo;
87  
88      /**
89       * Holds array of all element states that have been entered.
90       * The array is automatically resized. When leaving an element,
91       * it's state is not removed but reused when later returning
92       * to the same nesting level.
93       */
94      private ElementState[]  elementStates;
95  
96      /**
97       * The index of the next state to place in the array,
98       * or one plus the index of the current state. When zero,
99       * we are in no state.
100      */
101     private int             elementStateCount;
102 
103     /**
104      * Vector holding comments and PIs that come before the root
105      * element (even after it), see {@link #serializePreRoot}.
106      */
107     private Vector            preRoot;
108 
109     /**
110      * If the document has been started (header serialized), this
111      * flag is set to true so it's not started twice.
112      */
113     protected boolean       started;
114 
115     /**
116      * True if the serializer has been prepared. This flag is set
117      * to false when the serializer is reset prior to using it,
118      * and to true after it has been prepared for usage.
119      */
120     private boolean         prepared;
121 
122     /**
123      * Association between namespace URIs (keys) and prefixes (values).
124      * Accumulated here prior to starting an element and placing this
125      * list in the element state.
126      */
127     protected Hashtable       prefixes;
128 
129     /**
130      * The system identifier of the document type, if known.
131      */
132     protected String          docTypePublicId;
133 
134 
135     /**
136      * The system identifier of the document type, if known.
137      */
138     protected String          docTypeSystemId;
139 
140 
141     /**
142      * The output format associated with this serializer. This will never
143      * be a null reference. If no format was passed to the constructor,
144      * the default one for this document type will be used. The format
145      * object is never changed by the serializer.
146      */
147     protected OutputFormat   format;
148 
149 
150     /**
151      * The printer used for printing text parts.
152      */
153     protected Printer       printer;
154 
155 
156     /**
157      * True if indenting printer.
158      */
159     protected boolean       indenting;
160 
161 
162     /**
163      * The underlying writer.
164      */
165     private Writer            writer;
166 
167 
168     /**
169      * The output stream.
170      */
171     private OutputStream      output;
172 
173 
174     //--------------------------------//
175     // Constructor and initialization //
176     //--------------------------------//
177 
178 
179     /**
180      * Protected constructor can only be used by derived class.
181      * Must initialize the serializer before serializing any document,
182      * see {@link #init}.
183      */
184     protected BaseMarkupSerializer( OutputFormat format )
185     {
186         int i;
187 
188         elementStates = new ElementState[ 10 ];
189         for ( i = 0 ; i < elementStates.length ; ++i )
190             elementStates[ i ] = new ElementState();
191         this.format = format;
192     }
193 
194 
195     public void setOutputByteStream( OutputStream   output )
196     {
197         if ( output == null )
198             throw new NullPointerException  ( "SER001 Argument 'output' is null." );
199         this.output = output;
200         writer = null;
201         reset();
202     }
203 
204 
205     public void setOutputCharStream( Writer   writer )
206     {
207         if ( writer == null )
208             throw new NullPointerException  ( "SER001 Argument 'writer' is null." );
209         this.writer = writer;
210         output = null;
211         reset();
212     }
213 
214 
215     public void setOutputFormat( OutputFormat format )
216     {
217         if ( format == null )
218             throw new NullPointerException  ( "SER001 Argument 'format' is null." );
219         this.format = format;
220         reset();
221     }
222 
223 
224     public boolean reset()
225     {
226         if ( elementStateCount > 1 )
227             throw new IllegalStateException  ( "Serializer reset in the middle of serialization" );
228         prepared = false;
229         return true;
230     }
231 
232 
233     protected void prepare()
234         throws IOException  
235     {
236         if ( prepared )
237             return;
238 
239         if ( writer == null && output == null )
240             throw new IOException  ( "SER002 No writer supplied for serializer" );
241         // If the output stream has been set, use it to construct
242         // the writer. It is possible that the serializer has been
243         // reused with the same output stream and different encoding.
244 
245         encodingInfo = format.getEncodingInfo();
246 
247         if ( output != null ) {
248             writer = encodingInfo.getWriter(output);
249         }
250 
251         if ( format.getIndenting() ) {
252             indenting = true;
253             printer = new IndentPrinter( writer, format );
254         } else {
255             indenting = false;
256             printer = new Printer( writer, format );
257         }
258 
259         ElementState state;
260 
261         elementStateCount = 0;
262         state = elementStates[ 0 ];
263         state.namespaceURI = null;
264         state.localName = null;
265         state.rawName = null;
266         state.preserveSpace = format.getPreserveSpace();
267         state.empty = true;
268         state.afterElement = false;
269         state.afterComment = false;
270         state.doCData = state.inCData = false;
271         state.prefixes = null;
272 
273         docTypePublicId = format.getDoctypePublic();
274         docTypeSystemId = format.getDoctypeSystem();
275         started = false;
276         prepared = true;
277     }
278 
279 
280 
281     //----------------------------------//
282     // DOM document serializing methods //
283     //----------------------------------//
284 
285 
286     /**
287      * Serializes the DOM element using the previously specified
288      * writer and output format. Throws an exception only if
289      * an I/O exception occured while serializing.
290      *
291      * @param elem The element to serialize
292      * @throws IOException An I/O exception occured while
293      *   serializing
294      */
295     public void serialize( Element   elem )
296         throws IOException  
297     {
298         prepare();
299         serializeNode( elem );
300         printer.flush();
301         if ( printer.getException() != null )
302             throw printer.getException();
303     }
304 
305 
306     /**
307      * Serializes the DOM document fragmnt using the previously specified
308      * writer and output format. Throws an exception only if
309      * an I/O exception occured while serializing.
310      *
311      * @param elem The element to serialize
312      * @throws IOException An I/O exception occured while
313      *   serializing
314      */
315     public void serialize( DocumentFragment   frag )
316         throws IOException  
317     {
318         prepare();
319         serializeNode( frag );
320         printer.flush();
321         if ( printer.getException() != null )
322             throw printer.getException();
323     }
324 
325 
326     /**
327      * Serializes the DOM document using the previously specified
328      * writer and output format. Throws an exception only if
329      * an I/O exception occured while serializing.
330      *
331      * @param doc The document to serialize
332      * @throws IOException An I/O exception occured while
333      *   serializing
334      */
335     public void serialize( Document   doc )
336         throws IOException  
337     {
338         prepare();
339         serializeNode( doc );
340         serializePreRoot();
341         printer.flush();
342         if ( printer.getException() != null )
343             throw printer.getException();
344     }
345 
346 
347     //------------------------------------------//
348     // SAX document handler serializing methods //
349     //------------------------------------------//
350 
351 
352     public void startDocument()
353         throws SAXException  
354     {
355         try {
356             prepare();
357         } catch ( IOException   except ) {
358             throw new SAXException  ( except.toString() );
359         }
360         // Nothing to do here. All the magic happens in startDocument(String)
361     }
362     
363     
364     public void characters( char[] chars, int start, int length )
365         throws SAXException  
366     {
367         ElementState state;
368 
369         try {
370         state = content();
371 
372         // Check if text should be print as CDATA section or unescaped
373         // based on elements listed in the output format (the element
374         // state) or whether we are inside a CDATA section or entity.
375 
376         if ( state.inCData || state.doCData ) {
377             int          saveIndent;
378 
379             // Print a CDATA section. The text is not escaped, but ']]>'
380             // appearing in the code must be identified and dealt with.
381             // The contents of a text node is considered space preserving.
382             if ( ! state.inCData ) {
383                 printer.printText( "<![CDATA[" );
384                 state.inCData = true;
385             }
386             saveIndent = printer.getNextIndent();
387             printer.setNextIndent( 0 );
388             for ( int index = 0 ; index < length ; ++index ) {
389                 if ( index + 2 < length && chars[ index ] == ']' &&
390                      chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
391 
392                     printText( chars, start, index + 2, true, true );
393                     printer.printText( "]]><![CDATA[" );
394                     start += index + 2;
395                     length -= index + 2;
396                     index = 0;
397                 }
398             }
399             if ( length > 0 )
400                 printText( chars, start, length, true, true );
401             printer.setNextIndent( saveIndent );
402 
403         } else {
404 
405             int saveIndent;
406 
407             if ( state.preserveSpace ) {
408                 // If preserving space then hold of indentation so no
409                 // excessive spaces are printed at line breaks, escape
410                 // the text content without replacing spaces and print
411                 // the text breaking only at line breaks.
412                 saveIndent = printer.getNextIndent();
413                 printer.setNextIndent( 0 );
414                 printText( chars, start, length, true, state.unescaped );
415                 printer.setNextIndent( saveIndent );
416             } else {
417                 printText( chars, start, length, false, state.unescaped );
418             }
419         }
420         } catch ( IOException   except ) {
421             throw new SAXException  ( except );
422         }
423     }
424 
425 
426     public void ignorableWhitespace( char[] chars, int start, int length )
427         throws SAXException  
428     {
429         int i;
430 
431         try {
432         content();
433 
434         // Print ignorable whitespaces only when indenting, after
435         // all they are indentation. Cancel the indentation to
436         // not indent twice.
437         if ( indenting ) {
438             printer.setThisIndent( 0 );
439             for ( i = start ; length-- > 0 ; ++i )
440                 printer.printText( chars[ i ] );
441         }
442         } catch ( IOException   except ) {
443             throw new SAXException  ( except );
444         }
445     }
446 
447 
448     public final void processingInstruction( String   target, String   code )
449         throws SAXException  
450     {
451         try {
452             processingInstructionIO( target, code );
453         } catch ( IOException   except ) {
454         throw new SAXException  ( except );
455         }
456     }
457 
458     public void processingInstructionIO( String   target, String   code )
459         throws IOException  
460     {
461         int          index;
462         StringBuffer   buffer;
463         ElementState state;
464 
465         state = content();
466         buffer = new StringBuffer  ( 40 );
467 
468         // Create the processing instruction textual representation.
469         // Make sure we don't have '?>' inside either target or code.
470         index = target.indexOf( "?>" );
471         if ( index >= 0 )
472             buffer.append( "<?" ).append( target.substring( 0, index ) );
473         else
474             buffer.append( "<?" ).append( target );
475         if ( code != null ) {
476             buffer.append( ' ' );
477             index = code.indexOf( "?>" );
478             if ( index >= 0 )
479                 buffer.append( code.substring( 0, index ) );
480             else
481                 buffer.append( code );
482         }
483         buffer.append( "?>" );
484 
485         // If before the root element (or after it), do not print
486         // the PI directly but place it in the pre-root vector.
487         if ( isDocumentState() ) {
488             if ( preRoot == null )
489                 preRoot = new Vector  ();
490             preRoot.addElement( buffer.toString() );
491         } else {
492             printer.indent();
493             printText( buffer.toString(), true, true );
494             printer.unindent();
495             if ( indenting )
496             state.afterElement = true;
497         }
498     }
499 
500 
501     public void comment( char[] chars, int start, int length )
502         throws SAXException  
503     {
504         try {
505         comment( new String  ( chars, start, length ) );
506         } catch ( IOException   except ) {
507             throw new SAXException  ( except );
508     }
509     }
510 
511 
512     public void comment( String   text )
513         throws IOException  
514     {
515         StringBuffer   buffer;
516         int          index;
517         ElementState state;
518         
519         if ( format.getOmitComments() )
520             return;
521 
522         state  = content();
523         buffer = new StringBuffer  ( 40 );
524         // Create the processing comment textual representation.
525         // Make sure we don't have '-->' inside the comment.
526         index = text.indexOf( "-->" );
527         if ( index >= 0 )
528             buffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" );
529         else
530             buffer.append( "<!--" ).append( text ).append( "-->" );
531 
532         // If before the root element (or after it), do not print
533         // the comment directly but place it in the pre-root vector.
534         if ( isDocumentState() ) {
535             if ( preRoot == null )
536                 preRoot = new Vector  ();
537             preRoot.addElement( buffer.toString() );
538         } else {
539             // Indent this element on a new line if the first
540             // content of the parent element or immediately
541             // following an element.
542             if ( indenting && ! state.preserveSpace)
543                 printer.breakLine();
544                         printer.indent();
545             printText( buffer.toString(), true, true );
546                         printer.unindent();
547             if ( indenting )
548                 state.afterElement = true;
549         }
550                 state.afterComment = true;
551                 state.afterElement = false;
552     }
553 
554 
555     public void startCDATA()
556     {
557         ElementState state;
558 
559         state = getElementState();
560         state.doCData = true;
561     }
562 
563 
564     public void endCDATA()
565     {
566         ElementState state;
567 
568         state = getElementState();
569         state.doCData = false;
570     }
571 
572 
573     public void startNonEscaping()
574     {
575         ElementState state;
576 
577         state = getElementState();
578         state.unescaped = true;
579     }
580 
581 
582     public void endNonEscaping()
583     {
584         ElementState state;
585 
586         state = getElementState();
587         state.unescaped = false;
588     }
589 
590 
591     public void startPreserving()
592     {
593         ElementState state;
594 
595         state = getElementState();
596         state.preserveSpace = true;
597     }
598 
599 
600     public void endPreserving()
601     {
602         ElementState state;
603 
604         state = getElementState();
605         state.preserveSpace = false;
606     }
607 
608 
609     /**
610      * Called at the end of the document to wrap it up.
611      * Will flush the output stream and throw an exception
612      * if any I/O error occured while serializing.
613      *
614      * @throws SAXException An I/O exception occured during
615      *  serializing
616      */
617     public void endDocument()
618         throws SAXException  
619     {
620         try {
621         // Print all the elements accumulated outside of
622         // the root element.
623         serializePreRoot();
624         // Flush the output, this is necessary for buffered output.
625         printer.flush();
626         } catch ( IOException   except ) {
627             throw new SAXException  ( except );
628     }
629     }
630 
631 
632     public void startEntity( String   name )
633     {
634         // ???
635     }
636 
637 
638     public void endEntity( String   name )
639     {
640         // ???
641     }
642 
643 
644     public void setDocumentLocator( Locator   locator )
645     {
646         // Nothing to do
647     }
648 
649 
650     //-----------------------------------------//
651     // SAX content handler serializing methods //
652     //-----------------------------------------//
653 
654 
655     public void skippedEntity ( String   name )
656         throws SAXException  
657     {
658         try {
659         endCDATA();
660         content();
661         printer.printText( '&' );
662         printer.printText( name );
663         printer.printText( ';' );
664         } catch ( IOException   except ) {
665             throw new SAXException  ( except );
666     }
667     }
668 
669 
670     public void startPrefixMapping( String   prefix, String   uri )
671         throws SAXException  
672     {
673         if ( prefixes == null )
674             prefixes = new Hashtable  ();
675         prefixes.put( uri, prefix == null ? "" : prefix );
676     }
677 
678 
679     public void endPrefixMapping( String   prefix )
680         throws SAXException  
681     {
682     }
683 
684 
685     //------------------------------------------//
686     // SAX DTD/Decl handler serializing methods //
687     //------------------------------------------//
688 
689 
690     public final void startDTD( String   name, String   publicId, String   systemId )
691         throws SAXException  
692     {
693         try {
694         printer.enterDTD();
695         docTypePublicId = publicId;
696         docTypeSystemId = systemId;
697         } catch ( IOException   except ) {
698             throw new SAXException  ( except );
699         }
700     }
701 
702 
703     public void endDTD()
704     {
705         // Nothing to do here, all the magic occurs in startDocument(String).
706     }
707 
708 
709     public void elementDecl( String   name, String   model )
710         throws SAXException  
711     {
712         try {
713         printer.enterDTD();
714         printer.printText( "<!ELEMENT " );
715         printer.printText( name );
716         printer.printText( ' ' );
717         printer.printText( model );
718         printer.printText( '>' );
719         if ( indenting )
720             printer.breakLine();
721         } catch ( IOException   except ) {
722             throw new SAXException  ( except );
723         }
724     }
725 
726 
727     public void attributeDecl( String   eName, String   aName, String   type,
728                                String   valueDefault, String   value )
729         throws SAXException  
730     {
731         try {
732         printer.enterDTD();
733         printer.printText( "<!ATTLIST " );
734         printer.printText( eName );
735         printer.printText( ' ' );
736         printer.printText( aName );
737         printer.printText( ' ' );
738         printer.printText( type );
739         if ( valueDefault != null ) {
740             printer.printText( ' ' );
741             printer.printText( valueDefault );
742         }
743         if ( value != null ) {
744             printer.printText( " \"" );
745             printEscaped( value );
746             printer.printText( '"' );
747         }
748         printer.printText( '>' );
749         if ( indenting )
750             printer.breakLine();
751         } catch ( IOException   except ) {
752             throw new SAXException  ( except );
753     }
754     }
755 
756 
757     public void internalEntityDecl( String   name, String   value )
758         throws SAXException  
759     {
760         try {
761         printer.enterDTD();
762         printer.printText( "<!ENTITY " );
763         printer.printText( name );
764         printer.printText( " \"" );
765         printEscaped( value );
766         printer.printText( "\">" );
767         if ( indenting )
768             printer.breakLine();
769         } catch ( IOException   except ) {
770             throw new SAXException  ( except );
771         }
772     }
773 
774 
775     public void externalEntityDecl( String   name, String   publicId, String   systemId )
776         throws SAXException  
777     {
778         try {
779         printer.enterDTD();
780         unparsedEntityDecl( name, publicId, systemId, null );
781         } catch ( IOException   except ) {
782             throw new SAXException  ( except );
783         }
784     }
785 
786 
787     public void unparsedEntityDecl( String   name, String   publicId,
788                                     String   systemId, String   notationName )
789         throws SAXException  
790     {
791         try {
792         printer.enterDTD();
793         if ( publicId == null ) {
794             printer.printText( "<!ENTITY " );
795             printer.printText( name );
796             printer.printText( " SYSTEM " );
797             printDoctypeURL( systemId );
798         } else {
799             printer.printText( "<!ENTITY " );
800             printer.printText( name );
801             printer.printText( " PUBLIC " );
802             printDoctypeURL( publicId );
803             printer.printText( ' ' );
804             printDoctypeURL( systemId );
805         }
806         if ( notationName != null ) {
807             printer.printText( " NDATA " );
808             printer.printText( notationName );
809         }
810         printer.printText( '>' );
811         if ( indenting )
812             printer.breakLine();
813         } catch ( IOException   except ) {
814             throw new SAXException  ( except );
815     }
816     }
817 
818 
819     public void notationDecl( String   name, String   publicId, String   systemId )
820         throws SAXException  
821     {
822         try {
823         printer.enterDTD();
824         if ( publicId != null ) {
825             printer.printText( "<!NOTATION " );
826             printer.printText( name );
827             printer.printText( " PUBLIC " );
828             printDoctypeURL( publicId );
829             if ( systemId != null ) {
830                 printer.printText( ' ' );
831                 printDoctypeURL( systemId );
832             }
833         } else {
834             printer.printText( "<!NOTATION " );
835             printer.printText( name );
836             printer.printText( " SYSTEM " );
837             printDoctypeURL( systemId );
838         }
839         printer.printText( '>' );
840         if ( indenting )
841             printer.breakLine();
842         } catch ( IOException   except ) {
843             throw new SAXException  ( except );
844         }
845     }
846 
847 
848     //------------------------------------------//
849     // Generic node serializing methods methods //
850     //------------------------------------------//
851 
852 
853     /**
854      * Serialize the DOM node. This method is shared across XML, HTML and XHTML
855      * serializers and the differences are masked out in a separate {@link
856      * #serializeElement}.
857      *
858      * @param node The node to serialize
859      * @see #serializeElement
860      * @throws IOException An I/O exception occured while
861      *   serializing
862      */
863     protected void serializeNode( Node   node )
864         throws IOException  
865     {
866         // Based on the node type call the suitable SAX handler.
867         // Only comments entities and documents which are not
868         // handled by SAX are serialized directly.
869         switch ( node.getNodeType() ) {
870         case Node.TEXT_NODE : {
871             String   text;
872 
873             text = node.getNodeValue();
874             if ( text != null )
875                 if ( !indenting || getElementState().preserveSpace
876                      || (text.replace('\n',' ').trim().length() != 0))
877                     characters( text );
878             break;
879         }
880 
881         case Node.CDATA_SECTION_NODE : {
882             String   text;
883 
884             text = node.getNodeValue();
885             if ( text != null ) {
886                 startCDATA();
887                 characters( text );
888                 endCDATA();
889             }
890             break;
891         }
892 
893         case Node.COMMENT_NODE : {
894             String   text;
895 
896             if ( ! format.getOmitComments() ) {
897                 text = node.getNodeValue();
898                 if ( text != null )
899                     comment( text );
900             }
901             break;
902         }
903 
904         case Node.ENTITY_REFERENCE_NODE : {
905             Node           child;
906 
907             endCDATA();
908             content();
909             child = node.getFirstChild();
910             while ( child != null ) {
911                 serializeNode( child );
912                 child = child.getNextSibling();
913             }
914             break;
915         }
916 
917         case Node.PROCESSING_INSTRUCTION_NODE :
918             processingInstructionIO( node.getNodeName(), node.getNodeValue() );
919             break;
920 
921         case Node.ELEMENT_NODE :
922             serializeElement( (Element  ) node );
923             break;
924 
925         case Node.DOCUMENT_NODE : {
926             DocumentType        docType;
927 
928             // If there is a document type, use the SAX events to
929             // serialize it.
930             docType = ( (Document  ) node ).getDoctype();
931             if (docType != null) {
932                 // DOM Level 2 (or higher)
933                 // TODO: result of the following call was assigned to a local variable that was never
934                 // read. Can the call be deleted?
935                 ( (Document  ) node ).getImplementation();
936                 try {
937                     String   internal;
938 
939                     printer.enterDTD();
940                     docTypePublicId = docType.getPublicId();
941                     docTypeSystemId = docType.getSystemId();
942                     internal = docType.getInternalSubset();
943                     if ( internal != null && internal.length() > 0 )
944                         printer.printText( internal );
945                     endDTD();
946                 }
947                 // DOM Level 1 -- does implementation have methods?
948                 catch (NoSuchMethodError   nsme) {
949                     Class   docTypeClass = docType.getClass();
950 
951                     String   docTypePublicId = null;
952                     String   docTypeSystemId = null;
953                     try {
954                         java.lang.reflect.Method   getPublicId = docTypeClass.getMethod("getPublicId", null);
955                         if (getPublicId.getReturnType().equals(String  .class)) {
956                             docTypePublicId = (String  )getPublicId.invoke(docType, null);
957                         }
958                     }
959                     catch (Exception   e) {
960                         // ignore
961                     }
962                     try {
963                         java.lang.reflect.Method   getSystemId = docTypeClass.getMethod("getSystemId", null);
964                         if (getSystemId.getReturnType().equals(String  .class)) {
965                             docTypeSystemId = (String  )getSystemId.invoke(docType, null);
966                         }
967                     }
968                     catch (Exception   e) {
969                         // ignore
970                     }
971                     this.printer.enterDTD();
972                     this.docTypePublicId = docTypePublicId;
973                     this.docTypeSystemId = docTypeSystemId;
974                     endDTD();
975                 }
976             }
977             // !! Fall through
978         }
979         case Node.DOCUMENT_FRAGMENT_NODE : {
980             Node           child;
981 
982             // By definition this will happen if the node is a document,
983             // document fragment, etc. Just serialize its contents. It will
984             // work well for other nodes that we do not know how to serialize.
985             child = node.getFirstChild();
986             while ( child != null ) {
987                 serializeNode( child );
988                 child = child.getNextSibling();
989             }
990             break;
991         }
992 
993         default:
994             break;
995         }
996     }
997 
998 
999     /**
1000     * Must be called by a method about to print any type of content.
1001     * If the element was just opened, the opening tag is closed and
1002     * will be matched to a closing tag. Returns the current element
1003     * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
1004     *
1005     * @return The current element state
1006     * @throws IOException An I/O exception occured while
1007     *   serializing
1008     */
1009    protected ElementState content()
1010        throws IOException  
1011    {
1012        ElementState state;
1013
1014        state = getElementState();
1015        if ( ! isDocumentState() ) {
1016            // Need to close CData section first
1017            if ( state.inCData && ! state.doCData ) {
1018                printer.printText( "]]>" );
1019                state.inCData = false;
1020            }
1021            // If this is the first content in the element,
1022            // change the state to not-empty and close the
1023            // opening element tag.
1024            if ( state.empty ) {
1025                printer.printText( '>' );
1026                state.empty = false;
1027            }
1028            // Except for one content type, all of them
1029            // are not last element. That one content
1030            // type will take care of itself.
1031            state.afterElement = false;
1032            // Except for one content type, all of them
1033            // are not last comment. That one content
1034            // type will take care of itself.
1035            state.afterComment = false;
1036        }
1037        return state;
1038    }
1039
1040
1041    /**
1042     * Called to print the text contents in the prevailing element format.
1043     * Since this method is capable of printing text as CDATA, it is used
1044     * for that purpose as well. White space handling is determined by the
1045     * current element state. In addition, the output format can dictate
1046     * whether the text is printed as CDATA or unescaped.
1047     *
1048     * @param text The text to print
1049     * @param unescaped True is should print unescaped
1050     * @throws IOException An I/O exception occured while
1051     *   serializing
1052     */
1053    protected void characters( String   text )
1054        throws IOException  
1055    {
1056        ElementState state;
1057
1058        state = content();
1059        // Check if text should be print as CDATA section or unescaped
1060        // based on elements listed in the output format (the element
1061        // state) or whether we are inside a CDATA section or entity.
1062
1063        if ( state.inCData || state.doCData ) {
1064            StringBuffer   buffer;
1065            int          index;
1066            int          saveIndent;
1067
1068            // Print a CDATA section. The text is not escaped, but ']]>'
1069            // appearing in the code must be identified and dealt with.
1070            // The contents of a text node is considered space preserving.
1071            buffer = new StringBuffer  ( text.length() );
1072            if ( ! state.inCData ) {
1073                buffer.append( "<![CDATA[" );
1074                state.inCData = true;
1075            }
1076            index = text.indexOf( "]]>" );
1077            while ( index >= 0 ) {
1078                buffer.append( text.substring( 0, index + 2 ) ).append( "]]><![CDATA[" );
1079                text = text.substring( index + 2 );
1080                index = text.indexOf( "]]>" );
1081            }
1082            buffer.append( text );
1083            saveIndent = printer.getNextIndent();
1084            printer.setNextIndent( 0 );
1085            printText( buffer.toString(), true, true );
1086            printer.setNextIndent( saveIndent );
1087
1088        } else {
1089
1090            int saveIndent;
1091
1092            if ( state.preserveSpace ) {
1093                // If preserving space then hold of indentation so no
1094                // excessive spaces are printed at line breaks, escape
1095                // the text content without replacing spaces and print
1096                // the text breaking only at line breaks.
1097                saveIndent = printer.getNextIndent();
1098                printer.setNextIndent( 0 );
1099                printText( text, true, state.unescaped );
1100                printer.setNextIndent( saveIndent );
1101            } else {
1102                printText( text, false, state.unescaped );
1103            }
1104        }
1105    }
1106
1107
1108    /**
1109     * Returns the suitable entity reference for this character value,
1110     * or null if no such entity exists. Calling this method with <tt>'&amp;'</tt>
1111     * will return <tt>"&amp;amp;"</tt>.
1112     *
1113     * @param ch Character value
1114     * @return Character entity name, or null
1115     */
1116    protected abstract String   getEntityRef( int ch );
1117
1118
1119    /**
1120     * Called to serializee the DOM element. The element is serialized based on
1121     * the serializer's method (XML, HTML, XHTML).
1122     *
1123     * @param elem The element to serialize
1124     * @throws IOException An I/O exception occured while
1125     *   serializing
1126     */
1127    protected abstract void serializeElement( Element   elem )
1128        throws IOException  ;
1129
1130
1131    /**
1132     * Comments and PIs cannot be serialized before the root element,
1133     * because the root element serializes the document type, which
1134     * generally comes first. Instead such PIs and comments are
1135     * accumulated inside a vector and serialized by calling this
1136     * method. Will be called when the root element is serialized
1137     * and when the document finished serializing.
1138     *
1139     * @throws IOException An I/O exception occured while
1140     *   serializing
1141     */
1142    protected void serializePreRoot()
1143        throws IOException  
1144    {
1145        int i;
1146
1147        if ( preRoot != null ) {
1148            for ( i = 0 ; i < preRoot.size() ; ++i ) {
1149                printText( (String  ) preRoot.elementAt( i ), true, true );
1150                if ( indenting )
1151                printer.breakLine();
1152            }
1153            preRoot.removeAllElements();
1154        }
1155    }
1156
1157
1158    //---------------------------------------------//
1159    // Text pretty printing and formatting methods //
1160    //---------------------------------------------//
1161
1162
1163    /**
1164     * Called to print additional text with whitespace handling.
1165     * If spaces are preserved, the text is printed as if by calling
1166     * {@link #printText(String)} with a call to {@link #breakLine}
1167     * for each new line. If spaces are not preserved, the text is
1168     * broken at space boundaries if longer than the line width;
1169     * Multiple spaces are printed as such, but spaces at beginning
1170     * of line are removed.
1171     *
1172     * @param text The text to print
1173     * @param preserveSpace Space preserving flag
1174     * @param unescaped Print unescaped
1175     */
1176    protected final void printText( char[] chars, int start, int length,
1177                                    boolean preserveSpace, boolean unescaped )
1178        throws IOException  
1179    {
1180        char ch;
1181
1182        if ( preserveSpace ) {
1183            // Preserving spaces: the text must print exactly as it is,
1184            // without breaking when spaces appear in the text and without
1185            // consolidating spaces. If a line terminator is used, a line
1186            // break will occur.
1187            while ( length-- > 0 ) {
1188                ch = chars[ start ];
1189                ++start;
1190                if ( ch == '\n' || ch == '\r' || unescaped )
1191                    printer.printText( ch );
1192                else
1193                    printEscaped( ch );
1194            }
1195        } else {
1196            // Not preserving spaces: print one part at a time, and
1197            // use spaces between parts to break them into different
1198            // lines. Spaces at beginning of line will be stripped
1199            // by printing mechanism. Line terminator is treated
1200            // no different than other text part.
1201            while ( length-- > 0 ) {
1202                ch = chars[ start ];
1203                ++start;
1204                if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1205                    printer.printSpace();
1206                else if ( unescaped )
1207                    printer.printText( ch );
1208                else
1209                    printEscaped( ch );
1210            }
1211        }
1212    }
1213
1214
1215    protected final void printText( String   text, boolean preserveSpace, boolean unescaped )
1216        throws IOException  
1217    {
1218        int index;
1219        char ch;
1220
1221        if ( preserveSpace ) {
1222            // Preserving spaces: the text must print exactly as it is,
1223            // without breaking when spaces appear in the text and without
1224            // consolidating spaces. If a line terminator is used, a line
1225            // break will occur.
1226            for ( index = 0 ; index < text.length() ; ++index ) {
1227                ch = text.charAt( index );
1228                if ( ch == '\n' || ch == '\r' || unescaped )
1229                    printer.printText( ch );
1230                else
1231                    printEscaped( ch );
1232            }
1233        } else {
1234            // Not preserving spaces: print one part at a time, and
1235            // use spaces between parts to break them into different
1236            // lines. Spaces at beginning of line will be stripped
1237            // by printing mechanism. Line terminator is treated
1238            // no different than other text part.
1239            for ( index = 0 ; index < text.length() ; ++index ) {
1240                ch = text.charAt( index );
1241                if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1242                    printer.printSpace();
1243                else if ( unescaped )
1244                    printer.printText( ch );
1245                else
1246                    printEscaped( ch );
1247            }
1248        }
1249    }
1250
1251
1252    /**
1253     * Print a document type public or system identifier URL.
1254     * Encapsulates the URL in double quotes, escapes non-printing
1255     * characters and print it equivalent to {@link #printText}.
1256     *
1257     * @param url The document type url to print
1258     */
1259    protected void printDoctypeURL( String   url )
1260        throws IOException  
1261    {
1262        int                i;
1263
1264        printer.printText( '"' );
1265        for( i = 0 ; i < url.length() ; ++i ) {
1266            if ( url.charAt( i ) == '"' ||  url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) {
1267                printer.printText( '%' );
1268                printer.printText( Integer.toHexString( url.charAt( i ) ) );
1269            } else
1270                printer.printText( url.charAt( i ) );
1271        }
1272        printer.printText( '"' );
1273    }
1274
1275
1276    protected void printEscaped( int ch )
1277        throws IOException  
1278    {
1279        String   charRef;
1280
1281        // If there is a suitable entity reference for this
1282        // character, print it. The list of available entity
1283        // references is almost but not identical between
1284        // XML and HTML.
1285        charRef = getEntityRef( ch );
1286        if ( charRef != null ) {
1287            printer.printText( '&' );
1288            printer.printText( charRef );
1289            printer.printText( ';' );
1290        } else if ( ( ch >= ' ' && encodingInfo.isPrintable(ch) && ch != 0xF7 ) ||
1291                    ch == '\n' || ch == '\r' || ch == '\t' ) {
1292            // If the character is not printable, print as character reference.
1293            // Non printables are below ASCII space but not tab or line
1294            // terminator, ASCII delete, or above a certain Unicode threshold.
1295            if (ch < 0x10000) {
1296                printer.printText((char)ch );
1297            } else {
1298                printer.printText((char)(((ch-0x10000)>>10)+0xd800));
1299                printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00));
1300            }
1301
1302        } else {
1303            printer.printText( "&#x" );
1304            printer.printText(Integer.toHexString(ch));
1305            printer.printText( ';' );
1306        }
1307    }
1308
1309
1310    /**
1311     * Escapes a string so it may be printed as text content or attribute
1312     * value. Non printable characters are escaped using character references.
1313     * Where the format specifies a deault entity reference, that reference
1314     * is used (e.g. <tt>&amp;lt;</tt>).
1315     *
1316     * @param source The string to escape
1317     */
1318    protected void printEscaped( String   source )
1319        throws IOException  
1320    {
1321        for ( int i = 0 ; i < source.length() ; ++i ) {
1322            int ch = source.charAt(i);
1323            if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) {
1324                int lowch = source.charAt(i+1);
1325                if ((lowch & 0xfc00) == 0xdc00) {
1326                    ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00;
1327                    i++;
1328                }
1329            }
1330            printEscaped(ch);
1331        }
1332    }
1333
1334
1335    //--------------------------------//
1336    // Element state handling methods //
1337    //--------------------------------//
1338
1339
1340    /**
1341     * Return the state of the current element.
1342     *
1343     * @return Current element state
1344     */
1345    protected ElementState getElementState()
1346    {
1347        return elementStates[ elementStateCount ];
1348    }
1349
1350
1351    /**
1352     * Enter a new element state for the specified element.
1353     * Tag name and space preserving is specified, element
1354     * state is initially empty.
1355     *
1356     * @return Current element state, or null
1357     */
1358    protected ElementState enterElementState( String   namespaceURI, String   localName,
1359                                              String   rawName, boolean preserveSpace )
1360    {
1361        ElementState state;
1362
1363        if ( elementStateCount + 1 == elementStates.length ) {
1364            ElementState[] newStates;
1365
1366            // Need to create a larger array of states. This does not happen
1367            // often, unless the document is really deep.
1368            newStates = new ElementState[ elementStates.length + 10 ];
1369            for ( int i = 0 ; i < elementStates.length ; ++i )
1370                newStates[ i ] = elementStates[ i ];
1371            for ( int i = elementStates.length ; i < newStates.length ; ++i )
1372                newStates[ i ] = new ElementState();
1373            elementStates = newStates;
1374        }
1375
1376        ++elementStateCount;
1377        state = elementStates[ elementStateCount ];
1378        state.namespaceURI = namespaceURI;
1379        state.localName = localName;
1380        state.rawName = rawName;
1381        state.preserveSpace = preserveSpace;
1382        state.empty = true;
1383        state.afterElement = false;
1384        state.afterComment = false;
1385        state.doCData = state.inCData = false;
1386        state.unescaped = false;
1387        state.prefixes = prefixes;
1388
1389        prefixes = null;
1390        return state;
1391    }
1392
1393
1394    /**
1395     * Leave the current element state and return to the
1396     * state of the parent element. If this was the root
1397     * element, return to the state of the document.
1398     *
1399     * @return Previous element state
1400     */
1401    protected ElementState leaveElementState()
1402    {
1403        if ( elementStateCount > 0 ) {
1404            /*Corrected by David Blondeau (blondeau@intalio.com)*/
1405        prefixes = null;
1406        //_prefixes = _elementStates[ _elementStateCount ].prefixes;
1407            -- elementStateCount;
1408            return elementStates[ elementStateCount ];
1409        } else
1410            throw new IllegalStateException  ( "Internal error: element state is zero" );
1411    }
1412
1413
1414    /**
1415     * Returns true if in the state of the document.
1416     * Returns true before entering any element and after
1417     * leaving the root element.
1418     *
1419     * @return True if in the state of the document
1420     */
1421    protected boolean isDocumentState()
1422    {
1423        return elementStateCount == 0;
1424    }
1425
1426
1427    /**
1428     * Returns the namespace prefix for the specified URI.
1429     * If the URI has been mapped to a prefix, returns the
1430     * prefix, otherwise returns null.
1431     *
1432     * @param namespaceURI The namespace URI
1433     * @return The namespace prefix if known, or null
1434     */
1435    protected String   getPrefix( String   namespaceURI )
1436    {
1437        String      prefix;
1438
1439        if ( prefixes != null ) {
1440            prefix = (String  ) prefixes.get( namespaceURI );
1441            if ( prefix != null )
1442                return prefix;
1443        }
1444        if ( elementStateCount == 0 )
1445            return null;
1446        else {
1447            for ( int i = elementStateCount ; i > 0 ; --i ) {
1448                if ( elementStates[ i ].prefixes != null ) {
1449                    prefix = (String  ) elementStates[ i ].prefixes.get( namespaceURI );
1450                    if ( prefix != null )
1451                        return prefix;
1452                }
1453            }
1454        }
1455        return null;
1456    }
1457
1458
1459}
1460
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Free Books Free Magazines
Popular Tags