KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xml > serialize > BaseMarkupSerializer


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58
59 // Sep 14, 2000:
60
// Fixed comments to preserve whitespaces and add a line break
61
// when indenting. Reported by Gervase Markham <GRM@dataconnection.com>
62
// Sep 14, 2000:
63
// Fixed serializer to report IO exception directly, instead at
64
// the end of document processing.
65
// Reported by Patrick Higgins <phiggins@transzap.com>
66
// Sep 13, 2000:
67
// CR in character data will print as &#0D;
68
// Aug 25, 2000:
69
// Fixed processing instruction printing inside element content
70
// to not escape content. Reported by Mikael Staldal
71
// <d96-mst@d.kth.se>
72
// Aug 25, 2000:
73
// Added ability to omit comments.
74
// Contributed by Anupam Bagchi <abagchi@jtcsv.com>
75
// Aug 26, 2000:
76
// Fixed bug in newline handling when preserving spaces.
77
// Contributed by Mike Dusseault <mdusseault@home.com>
78
// Aug 29, 2000:
79
// Fixed state.unescaped not being set to false when
80
// entering element state.
81
// Reported by Lowell Vaughn <lvaughn@agillion.com>
82

83
84 package org.enhydra.apache.xml.serialize;
85
86
87 import java.io.IOException JavaDoc;
88 import java.io.OutputStream JavaDoc;
89 import java.io.Writer JavaDoc;
90 import java.util.Hashtable JavaDoc;
91 import java.util.Vector JavaDoc;
92
93 import org.w3c.dom.DOMImplementation JavaDoc;
94 import org.w3c.dom.Document JavaDoc;
95 import org.w3c.dom.DocumentFragment JavaDoc;
96 import org.w3c.dom.DocumentType JavaDoc;
97 import org.w3c.dom.Element JavaDoc;
98 import org.w3c.dom.Entity JavaDoc;
99 import org.w3c.dom.NamedNodeMap JavaDoc;
100 import org.w3c.dom.Node JavaDoc;
101 import org.w3c.dom.Notation JavaDoc;
102 import org.xml.sax.ContentHandler JavaDoc;
103 import org.xml.sax.DTDHandler JavaDoc;
104 import org.xml.sax.DocumentHandler JavaDoc;
105 import org.xml.sax.Locator JavaDoc;
106 import org.xml.sax.SAXException JavaDoc;
107 import org.xml.sax.ext.DeclHandler JavaDoc;
108 import org.xml.sax.ext.LexicalHandler JavaDoc;
109
110
111 /**
112  * Base class for a serializer supporting both DOM and SAX pretty
113  * serializing of XML/HTML/XHTML documents. Derives classes perform
114  * the method-specific serializing, this class provides the common
115  * serializing mechanisms.
116  * <p>
117  * The serializer must be initialized with the proper writer and
118  * output format before it can be used by calling {@link #init}.
119  * The serializer can be reused any number of times, but cannot
120  * be used concurrently by two threads.
121  * <p>
122  * If an output stream is used, the encoding is taken from the
123  * output format (defaults to <tt>UTF-8</tt>). If a writer is
124  * used, make sure the writer uses the same encoding (if applies)
125  * as specified in the output format.
126  * <p>
127  * The serializer supports both DOM and SAX. DOM serializing is done
128  * by calling {@link #serialize} and SAX serializing is done by firing
129  * SAX events and using the serializer as a document handler.
130  * This also applies to derived class.
131  * <p>
132  * If an I/O exception occurs while serializing, the serializer
133  * will not throw an exception directly, but only throw it
134  * at the end of serializing (either DOM or SAX's {@link
135  * org.xml.sax.DocumentHandler#endDocument}.
136  * <p>
137  * For elements that are not specified as whitespace preserving,
138  * the serializer will potentially break long text lines at space
139  * boundaries, indent lines, and serialize elements on separate
140  * lines. Line terminators will be regarded as spaces, and
141  * spaces at beginning of line will be stripped.
142  * <p>
143  * When indenting, the serializer is capable of detecting seemingly
144  * element content, and serializing these elements indented on separate
145  * lines. An element is serialized indented when it is the first or
146  * last child of an element, or immediate following or preceding
147  * another element.
148  *
149  *
150  * @version $Revision: 1.2 $ $Date: 2005/01/26 08:28:45 $
151  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
152  * @see Serializer
153  * @see DOMSerializer
154  */

155 public abstract class BaseMarkupSerializer
156     implements ContentHandler JavaDoc, DocumentHandler JavaDoc, LexicalHandler JavaDoc,
157                DTDHandler JavaDoc, DeclHandler JavaDoc, DOMSerializer, Serializer
158 {
159
160
161     private EncodingInfo _encodingInfo;
162
163
164     /**
165      * Holds array of all element states that have been entered.
166      * The array is automatically resized. When leaving an element,
167      * it's state is not removed but reused when later returning
168      * to the same nesting level.
169      */

170     private ElementState[] _elementStates;
171
172
173     /**
174      * The index of the next state to place in the array,
175      * or one plus the index of the current state. When zero,
176      * we are in no state.
177      */

178     private int _elementStateCount;
179
180
181     /**
182      * Vector holding comments and PIs that come before the root
183      * element (even after it), see {@link #serializePreRoot}.
184      */

185     private Vector JavaDoc _preRoot;
186
187
188     /**
189      * If the document has been started (header serialized), this
190      * flag is set to true so it's not started twice.
191      */

192     protected boolean _started;
193
194
195     /**
196      * True if the serializer has been prepared. This flag is set
197      * to false when the serializer is reset prior to using it,
198      * and to true after it has been prepared for usage.
199      */

200     private boolean _prepared;
201
202
203     /**
204      * Association between namespace URIs (keys) and prefixes (values).
205      * Accumulated here prior to starting an element and placing this
206      * list in the element state.
207      */

208     protected Hashtable JavaDoc _prefixes;
209
210
211     /**
212      * The system identifier of the document type, if known.
213      */

214     protected String JavaDoc _docTypePublicId;
215
216
217     /**
218      * The system identifier of the document type, if known.
219      */

220     protected String JavaDoc _docTypeSystemId;
221
222
223     /**
224      * The output format associated with this serializer. This will never
225      * be a null reference. If no format was passed to the constructor,
226      * the default one for this document type will be used. The format
227      * object is never changed by the serializer.
228      */

229     protected OutputFormat _format;
230
231
232     /**
233      * The printer used for printing text parts.
234      */

235     protected Printer _printer;
236
237
238     /**
239      * True if indenting printer.
240      */

241     protected boolean _indenting;
242
243
244     /**
245      * The underlying writer.
246      */

247     private Writer JavaDoc _writer;
248
249
250     /**
251      * The output stream.
252      */

253     private OutputStream JavaDoc _output;
254
255
256     //--------------------------------//
257
// Constructor and initialization //
258
//--------------------------------//
259

260
261     /**
262      * Protected constructor can only be used by derived class.
263      * Must initialize the serializer before serializing any document,
264      * see {@link #init}.
265      */

266     protected BaseMarkupSerializer( OutputFormat format )
267     {
268         int i;
269
270         _elementStates = new ElementState[ 10 ];
271         for ( i = 0 ; i < _elementStates.length ; ++i )
272             _elementStates[ i ] = new ElementState();
273         _format = format;
274     }
275
276
277     public DocumentHandler JavaDoc asDocumentHandler()
278         throws IOException JavaDoc
279     {
280         prepare();
281         return this;
282     }
283
284
285     public ContentHandler JavaDoc asContentHandler()
286         throws IOException JavaDoc
287     {
288         prepare();
289         return this;
290     }
291
292
293     public DOMSerializer asDOMSerializer()
294         throws IOException JavaDoc
295     {
296         prepare();
297         return this;
298     }
299
300
301     public void setOutputByteStream( OutputStream JavaDoc output )
302     {
303         if ( output == null )
304             throw new NullPointerException JavaDoc( "SER001 Argument 'output' is null." );
305         _output = output;
306         _writer = null;
307         reset();
308     }
309
310
311     public void setOutputCharStream( Writer JavaDoc writer )
312     {
313         if ( writer == null )
314             throw new NullPointerException JavaDoc( "SER001 Argument 'writer' is null." );
315         _writer = writer;
316         _output = null;
317         reset();
318     }
319
320
321     public void setOutputFormat( OutputFormat format )
322     {
323         if ( format == null )
324             throw new NullPointerException JavaDoc( "SER001 Argument 'format' is null." );
325         _format = format;
326         reset();
327     }
328
329
330     public boolean reset()
331     {
332         if ( _elementStateCount > 1 )
333             throw new IllegalStateException JavaDoc( "Serializer reset in the middle of serialization" );
334         _prepared = false;
335         return true;
336     }
337
338
339     protected void prepare()
340         throws IOException JavaDoc
341     {
342         if ( _prepared )
343             return;
344
345         if ( _writer == null && _output == null )
346             throw new IOException JavaDoc( "SER002 No writer supplied for serializer" );
347         // If the output stream has been set, use it to construct
348
// the writer. It is possible that the serializer has been
349
// reused with the same output stream and different encoding.
350

351         _encodingInfo = _format.getEncodingInfo();
352
353         if ( _output != null ) {
354             _writer = _encodingInfo.getWriter(_output);
355         }
356
357         if ( _format.getIndenting() ) {
358             _indenting = true;
359             _printer = new IndentPrinter( _writer, _format );
360         } else {
361             _indenting = false;
362             _printer = new Printer( _writer, _format );
363         }
364
365         ElementState state;
366
367         _elementStateCount = 0;
368         state = _elementStates[ 0 ];
369         state.namespaceURI = null;
370         state.localName = null;
371         state.rawName = null;
372         state.preserveSpace = _format.getPreserveSpace();
373         state.empty = true;
374         state.afterElement = false;
375         state.afterComment = false;
376         state.doCData = state.inCData = false;
377         state.prefixes = null;
378
379         _docTypePublicId = _format.getDoctypePublic();
380         _docTypeSystemId = _format.getDoctypeSystem();
381         _started = false;
382         _prepared = true;
383     }
384
385
386
387     //----------------------------------//
388
// DOM document serializing methods //
389
//----------------------------------//
390

391
392     /**
393      * Serializes the DOM element using the previously specified
394      * writer and output format. Throws an exception only if
395      * an I/O exception occured while serializing.
396      *
397      * @param elem The element to serialize
398      * @throws IOException An I/O exception occured while
399      * serializing
400      */

401     public void serialize( Element JavaDoc elem )
402         throws IOException JavaDoc
403     {
404         prepare();
405         serializeNode( elem );
406         _printer.flush();
407         if ( _printer.getException() != null )
408             throw _printer.getException();
409     }
410
411
412     /**
413      * Serializes the DOM document fragmnt using the previously specified
414      * writer and output format. Throws an exception only if
415      * an I/O exception occured while serializing.
416      *
417      * @param elem The element to serialize
418      * @throws IOException An I/O exception occured while
419      * serializing
420      */

421     public void serialize( DocumentFragment JavaDoc frag )
422         throws IOException JavaDoc
423     {
424         prepare();
425         serializeNode( frag );
426         _printer.flush();
427         if ( _printer.getException() != null )
428             throw _printer.getException();
429     }
430
431
432     /**
433      * Serializes the DOM document using the previously specified
434      * writer and output format. Throws an exception only if
435      * an I/O exception occured while serializing.
436      *
437      * @param doc The document to serialize
438      * @throws IOException An I/O exception occured while
439      * serializing
440      */

441     public void serialize( Document JavaDoc doc )
442         throws IOException JavaDoc
443     {
444         prepare();
445         serializeNode( doc );
446         serializePreRoot();
447         _printer.flush();
448         if ( _printer.getException() != null )
449             throw _printer.getException();
450     }
451
452
453     //------------------------------------------//
454
// SAX document handler serializing methods //
455
//------------------------------------------//
456

457
458     public void startDocument()
459         throws SAXException JavaDoc
460     {
461         try {
462             prepare();
463         } catch ( IOException JavaDoc except ) {
464             throw new SAXException JavaDoc( except.toString() );
465         }
466         // Nothing to do here. All the magic happens in startDocument(String)
467
}
468     
469     
470     public void characters( char[] chars, int start, int length )
471         throws SAXException JavaDoc
472     {
473         ElementState state;
474
475         try {
476         state = content();
477
478         // Check if text should be print as CDATA section or unescaped
479
// based on elements listed in the output format (the element
480
// state) or whether we are inside a CDATA section or entity.
481

482         if ( state.inCData || state.doCData ) {
483             int saveIndent;
484
485             // Print a CDATA section. The text is not escaped, but ']]>'
486
// appearing in the code must be identified and dealt with.
487
// The contents of a text node is considered space preserving.
488
if ( ! state.inCData ) {
489                 _printer.printText( "<![CDATA[" );
490                 state.inCData = true;
491             }
492             saveIndent = _printer.getNextIndent();
493             _printer.setNextIndent( 0 );
494             for ( int index = 0 ; index < length ; ++index ) {
495                 if ( index + 2 < length && chars[ index ] == ']' &&
496                      chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
497
498                     printText( chars, start, index + 2, true, true );
499                     _printer.printText( "]]><![CDATA[" );
500                     start += index + 2;
501                     length -= index + 2;
502                     index = 0;
503                 }
504             }
505             if ( length > 0 )
506                 printText( chars, start, length, true, true );
507             _printer.setNextIndent( saveIndent );
508
509         } else {
510
511             int saveIndent;
512
513             if ( state.preserveSpace ) {
514                 // If preserving space then hold of indentation so no
515
// excessive spaces are printed at line breaks, escape
516
// the text content without replacing spaces and print
517
// the text breaking only at line breaks.
518
saveIndent = _printer.getNextIndent();
519                 _printer.setNextIndent( 0 );
520                 printText( chars, start, length, true, state.unescaped );
521                 _printer.setNextIndent( saveIndent );
522             } else {
523                 printText( chars, start, length, false, state.unescaped );
524             }
525         }
526         } catch ( IOException JavaDoc except ) {
527             throw new SAXException JavaDoc( except );
528         }
529     }
530
531
532     public void ignorableWhitespace( char[] chars, int start, int length )
533         throws SAXException JavaDoc
534     {
535         int i;
536
537         try {
538         content();
539
540         // Print ignorable whitespaces only when indenting, after
541
// all they are indentation. Cancel the indentation to
542
// not indent twice.
543
if ( _indenting ) {
544             _printer.setThisIndent( 0 );
545             for ( i = start ; length-- > 0 ; ++i )
546                 _printer.printText( chars[ i ] );
547         }
548         } catch ( IOException JavaDoc except ) {
549             throw new SAXException JavaDoc( except );
550         }
551     }
552
553
554     public final void processingInstruction( String JavaDoc target, String JavaDoc code )
555         throws SAXException JavaDoc
556     {
557         try {
558             processingInstructionIO( target, code );
559         } catch ( IOException JavaDoc except ) {
560         throw new SAXException JavaDoc( except );
561         }
562     }
563
564     public void processingInstructionIO( String JavaDoc target, String JavaDoc code )
565         throws IOException JavaDoc
566     {
567         int index;
568         StringBuffer JavaDoc buffer;
569         ElementState state;
570
571         state = content();
572         buffer = new StringBuffer JavaDoc( 40 );
573
574         // Create the processing instruction textual representation.
575
// Make sure we don't have '?>' inside either target or code.
576
index = target.indexOf( "?>" );
577         if ( index >= 0 )
578             buffer.append( "<?" ).append( target.substring( 0, index ) );
579         else
580             buffer.append( "<?" ).append( target );
581         if ( code != null ) {
582             buffer.append( ' ' );
583             index = code.indexOf( "?>" );
584             if ( index >= 0 )
585                 buffer.append( code.substring( 0, index ) );
586             else
587                 buffer.append( code );
588         }
589         buffer.append( "?>" );
590
591         // If before the root element (or after it), do not print
592
// the PI directly but place it in the pre-root vector.
593
if ( isDocumentState() ) {
594             if ( _preRoot == null )
595                 _preRoot = new Vector JavaDoc();
596             _preRoot.addElement( buffer.toString() );
597         } else {
598             _printer.indent();
599             printText( buffer.toString(), true, true );
600             _printer.unindent();
601             if ( _indenting )
602             state.afterElement = true;
603         }
604     }
605
606
607     public void comment( char[] chars, int start, int length )
608         throws SAXException JavaDoc
609     {
610         try {
611         comment( new String JavaDoc( chars, start, length ) );
612         } catch ( IOException JavaDoc except ) {
613             throw new SAXException JavaDoc( except );
614     }
615     }
616
617
618     public void comment( String JavaDoc text )
619         throws IOException JavaDoc
620     {
621         StringBuffer JavaDoc buffer;
622         int index;
623         ElementState state;
624         
625         if ( _format.getOmitComments() )
626             return;
627
628         state = content();
629         buffer = new StringBuffer JavaDoc( 40 );
630         // Create the processing comment textual representation.
631
// Make sure we don't have '-->' inside the comment.
632
index = text.indexOf( "-->" );
633         if ( index >= 0 )
634             buffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" );
635         else
636             buffer.append( "<!--" ).append( text ).append( "-->" );
637
638         // If before the root element (or after it), do not print
639
// the comment directly but place it in the pre-root vector.
640
if ( isDocumentState() ) {
641             if ( _preRoot == null )
642                 _preRoot = new Vector JavaDoc();
643             _preRoot.addElement( buffer.toString() );
644         } else {
645             // Indent this element on a new line if the first
646
// content of the parent element or immediately
647
// following an element.
648
if ( _indenting && ! state.preserveSpace)
649                 _printer.breakLine();
650                         _printer.indent();
651             printText( buffer.toString(), true, true );
652                         _printer.unindent();
653             if ( _indenting )
654                 state.afterElement = true;
655         }
656                 state.afterComment = true;
657                 state.afterElement = false;
658     }
659
660
661     public void startCDATA()
662     {
663         ElementState state;
664
665         state = getElementState();
666         state.doCData = true;
667     }
668
669
670     public void endCDATA()
671     {
672         ElementState state;
673
674         state = getElementState();
675         state.doCData = false;
676     }
677
678
679     public void startNonEscaping()
680     {
681         ElementState state;
682
683         state = getElementState();
684         state.unescaped = true;
685     }
686
687
688     public void endNonEscaping()
689     {
690         ElementState state;
691
692         state = getElementState();
693         state.unescaped = false;
694     }
695
696
697     public void startPreserving()
698     {
699         ElementState state;
700
701         state = getElementState();
702         state.preserveSpace = true;
703     }
704
705
706     public void endPreserving()
707     {
708         ElementState state;
709
710         state = getElementState();
711         state.preserveSpace = false;
712     }
713
714
715     /**
716      * Called at the end of the document to wrap it up.
717      * Will flush the output stream and throw an exception
718      * if any I/O error occured while serializing.
719      *
720      * @throws SAXException An I/O exception occured during
721      * serializing
722      */

723     public void endDocument()
724         throws SAXException JavaDoc
725     {
726         try {
727         // Print all the elements accumulated outside of
728
// the root element.
729
serializePreRoot();
730         // Flush the output, this is necessary for buffered output.
731
_printer.flush();
732         } catch ( IOException JavaDoc except ) {
733             throw new SAXException JavaDoc( except );
734     }
735     }
736
737
738     public void startEntity( String JavaDoc name )
739     {
740         // ???
741
}
742
743
744     public void endEntity( String JavaDoc name )
745     {
746         // ???
747
}
748
749
750     public void setDocumentLocator( Locator JavaDoc locator )
751     {
752         // Nothing to do
753
}
754
755
756     //-----------------------------------------//
757
// SAX content handler serializing methods //
758
//-----------------------------------------//
759

760
761     public void skippedEntity ( String JavaDoc name )
762         throws SAXException JavaDoc
763     {
764         try {
765         endCDATA();
766         content();
767         _printer.printText( '&' );
768         _printer.printText( name );
769         _printer.printText( ';' );
770         } catch ( IOException JavaDoc except ) {
771             throw new SAXException JavaDoc( except );
772     }
773     }
774
775
776     public void startPrefixMapping( String JavaDoc prefix, String JavaDoc uri )
777         throws SAXException JavaDoc
778     {
779         if ( _prefixes == null )
780             _prefixes = new Hashtable JavaDoc();
781         _prefixes.put( uri, prefix == null ? "" : prefix );
782     }
783
784
785     public void endPrefixMapping( String JavaDoc prefix )
786         throws SAXException JavaDoc
787     {
788     }
789
790
791     //------------------------------------------//
792
// SAX DTD/Decl handler serializing methods //
793
//------------------------------------------//
794

795
796     public final void startDTD( String JavaDoc name, String JavaDoc publicId, String JavaDoc systemId )
797         throws SAXException JavaDoc
798     {
799         try {
800         _printer.enterDTD();
801         _docTypePublicId = publicId;
802         _docTypeSystemId = systemId;
803         } catch ( IOException JavaDoc except ) {
804             throw new SAXException JavaDoc( except );
805         }
806     }
807
808
809     public void endDTD()
810     {
811         // Nothing to do here, all the magic occurs in startDocument(String).
812
}
813
814
815     public void elementDecl( String JavaDoc name, String JavaDoc model )
816         throws SAXException JavaDoc
817     {
818         try {
819         _printer.enterDTD();
820         _printer.printText( "<!ELEMENT " );
821         _printer.printText( name );
822         _printer.printText( ' ' );
823         _printer.printText( model );
824         _printer.printText( '>' );
825         if ( _indenting )
826             _printer.breakLine();
827         } catch ( IOException JavaDoc except ) {
828             throw new SAXException JavaDoc( except );
829         }
830     }
831
832
833     public void attributeDecl( String JavaDoc eName, String JavaDoc aName, String JavaDoc type,
834                                String JavaDoc valueDefault, String JavaDoc value )
835         throws SAXException JavaDoc
836     {
837         try {
838         _printer.enterDTD();
839         _printer.printText( "<!ATTLIST " );
840         _printer.printText( eName );
841         _printer.printText( ' ' );
842         _printer.printText( aName );
843         _printer.printText( ' ' );
844         _printer.printText( type );
845         if ( valueDefault != null ) {
846             _printer.printText( ' ' );
847             _printer.printText( valueDefault );
848         }
849         if ( value != null ) {
850             _printer.printText( " \"" );
851             printEscaped( value );
852             _printer.printText( '"' );
853         }
854         _printer.printText( '>' );
855         if ( _indenting )
856             _printer.breakLine();
857         } catch ( IOException JavaDoc except ) {
858             throw new SAXException JavaDoc( except );
859     }
860     }
861
862
863     public void internalEntityDecl( String JavaDoc name, String JavaDoc value )
864         throws SAXException JavaDoc
865     {
866         try {
867         _printer.enterDTD();
868         _printer.printText( "<!ENTITY " );
869         _printer.printText( name );
870         _printer.printText( " \"" );
871         printEscaped( value );
872         _printer.printText( "\">" );
873         if ( _indenting )
874             _printer.breakLine();
875         } catch ( IOException JavaDoc except ) {
876             throw new SAXException JavaDoc( except );
877         }
878     }
879
880
881     public void externalEntityDecl( String JavaDoc name, String JavaDoc publicId, String JavaDoc systemId )
882         throws SAXException JavaDoc
883     {
884         try {
885         _printer.enterDTD();
886         unparsedEntityDecl( name, publicId, systemId, null );
887         } catch ( IOException JavaDoc except ) {
888             throw new SAXException JavaDoc( except );
889         }
890     }
891
892
893     public void unparsedEntityDecl( String JavaDoc name, String JavaDoc publicId,
894                                     String JavaDoc systemId, String JavaDoc notationName )
895         throws SAXException JavaDoc
896     {
897         try {
898         _printer.enterDTD();
899         if ( publicId == null ) {
900             _printer.printText( "<!ENTITY " );
901             _printer.printText( name );
902             _printer.printText( " SYSTEM " );
903             printDoctypeURL( systemId );
904         } else {
905             _printer.printText( "<!ENTITY " );
906             _printer.printText( name );
907             _printer.printText( " PUBLIC " );
908             printDoctypeURL( publicId );
909             _printer.printText( ' ' );
910             printDoctypeURL( systemId );
911         }
912         if ( notationName != null ) {
913             _printer.printText( " NDATA " );
914             _printer.printText( notationName );
915         }
916         _printer.printText( '>' );
917         if ( _indenting )
918             _printer.breakLine();
919         } catch ( IOException JavaDoc except ) {
920             throw new SAXException JavaDoc( except );
921     }
922     }
923
924
925     public void notationDecl( String JavaDoc name, String JavaDoc publicId, String JavaDoc systemId )
926         throws SAXException JavaDoc
927     {
928         try {
929         _printer.enterDTD();
930         if ( publicId != null ) {
931             _printer.printText( "<!NOTATION " );
932             _printer.printText( name );
933             _printer.printText( " PUBLIC " );
934             printDoctypeURL( publicId );
935             if ( systemId != null ) {
936                 _printer.printText( ' ' );
937                 printDoctypeURL( systemId );
938             }
939         } else {
940             _printer.printText( "<!NOTATION " );
941             _printer.printText( name );
942             _printer.printText( " SYSTEM " );
943             printDoctypeURL( systemId );
944         }
945         _printer.printText( '>' );
946         if ( _indenting )
947             _printer.breakLine();
948         } catch ( IOException JavaDoc except ) {
949             throw new SAXException JavaDoc( except );
950         }
951     }
952
953
954     //------------------------------------------//
955
// Generic node serializing methods methods //
956
//------------------------------------------//
957

958
959     /**
960      * Serialize the DOM node. This method is shared across XML, HTML and XHTML
961      * serializers and the differences are masked out in a separate {@link
962      * #serializeElement}.
963      *
964      * @param node The node to serialize
965      * @see #serializeElement
966      * @throws IOException An I/O exception occured while
967      * serializing
968      */

969     protected void serializeNode( Node JavaDoc node )
970         throws IOException JavaDoc
971     {
972         // Based on the node type call the suitable SAX handler.
973
// Only comments entities and documents which are not
974
// handled by SAX are serialized directly.
975
switch ( node.getNodeType() ) {
976         case Node.TEXT_NODE : {
977             String JavaDoc text;
978
979             text = node.getNodeValue();
980             if ( text != null )
981                 if ( !_indenting || getElementState().preserveSpace
982                      || (text.replace('\n',' ').trim().length() != 0))
983                     characters( text );
984             break;
985         }
986
987         case Node.CDATA_SECTION_NODE : {
988             String JavaDoc text;
989
990             text = node.getNodeValue();
991             if ( text != null ) {
992                 startCDATA();
993                 characters( text );
994                 endCDATA();
995             }
996             break;
997         }
998
999         case Node.COMMENT_NODE : {
1000            String JavaDoc text;
1001
1002            if ( ! _format.getOmitComments() ) {
1003                text = node.getNodeValue();
1004                if ( text != null )
1005                    comment( text );
1006            }
1007            break;
1008        }
1009
1010        case Node.ENTITY_REFERENCE_NODE : {
1011            Node JavaDoc child;
1012
1013            endCDATA();
1014            content();
1015            child = node.getFirstChild();
1016            while ( child != null ) {
1017                serializeNode( child );
1018                child = child.getNextSibling();
1019            }
1020            break;
1021        }
1022
1023        case Node.PROCESSING_INSTRUCTION_NODE :
1024            processingInstructionIO( node.getNodeName(), node.getNodeValue() );
1025            break;
1026
1027        case Node.ELEMENT_NODE :
1028            serializeElement( (Element JavaDoc) node );
1029            break;
1030
1031        case Node.DOCUMENT_NODE : {
1032            DocumentType JavaDoc docType;
1033            DOMImplementation JavaDoc domImpl;
1034            NamedNodeMap JavaDoc map;
1035            Entity JavaDoc entity;
1036            Notation JavaDoc notation;
1037            int i;
1038
1039            // If there is a document type, use the SAX events to
1040
// serialize it.
1041
docType = ( (Document JavaDoc) node ).getDoctype();
1042            if (docType != null) {
1043                // DOM Level 2 (or higher)
1044
domImpl = ( (Document JavaDoc) node ).getImplementation();
1045                try {
1046                    String JavaDoc internal;
1047
1048                    _printer.enterDTD();
1049                    _docTypePublicId = docType.getPublicId();
1050                    _docTypeSystemId = docType.getSystemId();
1051                    internal = docType.getInternalSubset();
1052                    if ( internal != null && internal.length() > 0 )
1053                        _printer.printText( internal );
1054                    endDTD();
1055                }
1056                // DOM Level 1 -- does implementation have methods?
1057
catch (NoSuchMethodError JavaDoc nsme) {
1058                    Class JavaDoc docTypeClass = docType.getClass();
1059
1060                    String JavaDoc docTypePublicId = null;
1061                    String JavaDoc docTypeSystemId = null;
1062                    try {
1063                        java.lang.reflect.Method JavaDoc getPublicId = docTypeClass.getMethod("getPublicId", (Class JavaDoc[])null);
1064                        if (getPublicId.getReturnType().equals(String JavaDoc.class)) {
1065                            docTypePublicId = (String JavaDoc)getPublicId.invoke(docType, (Object JavaDoc[])null);
1066                        }
1067                    }
1068                    catch (Exception JavaDoc e) {
1069                        // ignore
1070
}
1071                    try {
1072                        java.lang.reflect.Method JavaDoc getSystemId = docTypeClass.getMethod("getSystemId", (Class JavaDoc[])null);
1073                        if (getSystemId.getReturnType().equals(String JavaDoc.class)) {
1074                            docTypeSystemId = (String JavaDoc)getSystemId.invoke(docType, (Object JavaDoc[])null);
1075                        }
1076                    }
1077                    catch (Exception JavaDoc e) {
1078                        // ignore
1079
}
1080                    _printer.enterDTD();
1081                    _docTypePublicId = docTypePublicId;
1082                    _docTypeSystemId = docTypeSystemId;
1083                    endDTD();
1084                }
1085            }
1086            // !! Fall through
1087
}
1088        case Node.DOCUMENT_FRAGMENT_NODE : {
1089            Node JavaDoc child;
1090
1091            // By definition this will happen if the node is a document,
1092
// document fragment, etc. Just serialize its contents. It will
1093
// work well for other nodes that we do not know how to serialize.
1094
child = node.getFirstChild();
1095            while ( child != null ) {
1096                serializeNode( child );
1097                child = child.getNextSibling();
1098            }
1099            break;
1100        }
1101
1102        default:
1103            break;
1104        }
1105    }
1106
1107
1108    /**
1109     * Must be called by a method about to print any type of content.
1110     * If the element was just opened, the opening tag is closed and
1111     * will be matched to a closing tag. Returns the current element
1112     * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
1113     *
1114     * @return The current element state
1115     * @throws IOException An I/O exception occured while
1116     * serializing
1117     */

1118    protected ElementState content()
1119        throws IOException JavaDoc
1120    {
1121        ElementState state;
1122
1123        state = getElementState();
1124        if ( ! isDocumentState() ) {
1125            // Need to close CData section first
1126
if ( state.inCData && ! state.doCData ) {
1127                _printer.printText( "]]>" );
1128                state.inCData = false;
1129            }
1130            // If this is the first content in the element,
1131
// change the state to not-empty and close the
1132
// opening element tag.
1133
if ( state.empty ) {
1134                _printer.printText( '>' );
1135                state.empty = false;
1136            }
1137            // Except for one content type, all of them
1138
// are not last element. That one content
1139
// type will take care of itself.
1140
state.afterElement = false;
1141            // Except for one content type, all of them
1142
// are not last comment. That one content
1143
// type will take care of itself.
1144
state.afterComment = false;
1145        }
1146        return state;
1147    }
1148
1149
1150    /**
1151     * Called to print the text contents in the prevailing element format.
1152     * Since this method is capable of printing text as CDATA, it is used
1153     * for that purpose as well. White space handling is determined by the
1154     * current element state. In addition, the output format can dictate
1155     * whether the text is printed as CDATA or unescaped.
1156     *
1157     * @param text The text to print
1158     * @param unescaped True is should print unescaped
1159     * @throws IOException An I/O exception occured while
1160     * serializing
1161     */

1162    protected void characters( String JavaDoc text )
1163        throws IOException JavaDoc
1164    {
1165        ElementState state;
1166
1167        state = content();
1168        // Check if text should be print as CDATA section or unescaped
1169
// based on elements listed in the output format (the element
1170
// state) or whether we are inside a CDATA section or entity.
1171

1172        if ( state.inCData || state.doCData ) {
1173            StringBuffer JavaDoc buffer;
1174            int index;
1175            int saveIndent;
1176
1177            // Print a CDATA section. The text is not escaped, but ']]>'
1178
// appearing in the code must be identified and dealt with.
1179
// The contents of a text node is considered space preserving.
1180
buffer = new StringBuffer JavaDoc( text.length() );
1181            if ( ! state.inCData ) {
1182                buffer.append( "<![CDATA[" );
1183                state.inCData = true;
1184            }
1185            index = text.indexOf( "]]>" );
1186            while ( index >= 0 ) {
1187                buffer.append( text.substring( 0, index + 2 ) ).append( "]]><![CDATA[" );
1188                text = text.substring( index + 2 );
1189                index = text.indexOf( "]]>" );
1190            }
1191            buffer.append( text );
1192            saveIndent = _printer.getNextIndent();
1193            _printer.setNextIndent( 0 );
1194            printText( buffer.toString(), true, true );
1195            _printer.setNextIndent( saveIndent );
1196
1197        } else {
1198
1199            int saveIndent;
1200
1201            if ( state.preserveSpace ) {
1202                // If preserving space then hold of indentation so no
1203
// excessive spaces are printed at line breaks, escape
1204
// the text content without replacing spaces and print
1205
// the text breaking only at line breaks.
1206
saveIndent = _printer.getNextIndent();
1207                _printer.setNextIndent( 0 );
1208                printText( text, true, state.unescaped );
1209                _printer.setNextIndent( saveIndent );
1210            } else {
1211                printText( text, false, state.unescaped );
1212            }
1213        }
1214    }
1215
1216
1217    /**
1218     * Returns the suitable entity reference for this character value,
1219     * or null if no such entity exists. Calling this method with <tt>'&amp;'</tt>
1220     * will return <tt>"&amp;amp;"</tt>.
1221     *
1222     * @param ch Character value
1223     * @return Character entity name, or null
1224     */

1225    protected abstract String JavaDoc getEntityRef( int ch );
1226
1227
1228    /**
1229     * Called to serializee the DOM element. The element is serialized based on
1230     * the serializer's method (XML, HTML, XHTML).
1231     *
1232     * @param elem The element to serialize
1233     * @throws IOException An I/O exception occured while
1234     * serializing
1235     */

1236    protected abstract void serializeElement( Element JavaDoc elem )
1237        throws IOException JavaDoc;
1238
1239
1240    /**
1241     * Comments and PIs cannot be serialized before the root element,
1242     * because the root element serializes the document type, which
1243     * generally comes first. Instead such PIs and comments are
1244     * accumulated inside a vector and serialized by calling this
1245     * method. Will be called when the root element is serialized
1246     * and when the document finished serializing.
1247     *
1248     * @throws IOException An I/O exception occured while
1249     * serializing
1250     */

1251    protected void serializePreRoot()
1252        throws IOException JavaDoc
1253    {
1254        int i;
1255
1256        if ( _preRoot != null ) {
1257            for ( i = 0 ; i < _preRoot.size() ; ++i ) {
1258                printText( (String JavaDoc) _preRoot.elementAt( i ), true, true );
1259                if ( _indenting )
1260                _printer.breakLine();
1261            }
1262            _preRoot.removeAllElements();
1263        }
1264    }
1265
1266
1267    //---------------------------------------------//
1268
// Text pretty printing and formatting methods //
1269
//---------------------------------------------//
1270

1271
1272    /**
1273     * Called to print additional text with whitespace handling.
1274     * If spaces are preserved, the text is printed as if by calling
1275     * {@link #printText(String)} with a call to {@link #breakLine}
1276     * for each new line. If spaces are not preserved, the text is
1277     * broken at space boundaries if longer than the line width;
1278     * Multiple spaces are printed as such, but spaces at beginning
1279     * of line are removed.
1280     *
1281     * @param text The text to print
1282     * @param preserveSpace Space preserving flag
1283     * @param unescaped Print unescaped
1284     */

1285    protected final void printText( char[] chars, int start, int length,
1286                                    boolean preserveSpace, boolean unescaped )
1287        throws IOException JavaDoc
1288    {
1289        int index;
1290        char ch;
1291
1292        if ( preserveSpace ) {
1293            // Preserving spaces: the text must print exactly as it is,
1294
// without breaking when spaces appear in the text and without
1295
// consolidating spaces. If a line terminator is used, a line
1296
// break will occur.
1297
while ( length-- > 0 ) {
1298                ch = chars[ start ];
1299                ++start;
1300                if ( ch == '\n' || ch == '\r' || unescaped )
1301                    _printer.printText( ch );
1302                else
1303                    printEscaped( ch );
1304            }
1305        } else {
1306            // Not preserving spaces: print one part at a time, and
1307
// use spaces between parts to break them into different
1308
// lines. Spaces at beginning of line will be stripped
1309
// by printing mechanism. Line terminator is treated
1310
// no different than other text part.
1311
while ( length-- > 0 ) {
1312                ch = chars[ start ];
1313                ++start;
1314                if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1315                    _printer.printSpace();
1316                else if ( unescaped )
1317                    _printer.printText( ch );
1318                else
1319                    printEscaped( ch );
1320            }
1321        }
1322    }
1323
1324
1325    protected final void printText( String JavaDoc text, boolean preserveSpace, boolean unescaped )
1326        throws IOException JavaDoc
1327    {
1328        int index;
1329        char ch;
1330
1331        if ( preserveSpace ) {
1332            // Preserving spaces: the text must print exactly as it is,
1333
// without breaking when spaces appear in the text and without
1334
// consolidating spaces. If a line terminator is used, a line
1335
// break will occur.
1336
for ( index = 0 ; index < text.length() ; ++index ) {
1337                ch = text.charAt( index );
1338                if ( ch == '\n' || ch == '\r' || unescaped )
1339                    _printer.printText( ch );
1340                else
1341                    printEscaped( ch );
1342            }
1343        } else {
1344            // Not preserving spaces: print one part at a time, and
1345
// use spaces between parts to break them into different
1346
// lines. Spaces at beginning of line will be stripped
1347
// by printing mechanism. Line terminator is treated
1348
// no different than other text part.
1349
for ( index = 0 ; index < text.length() ; ++index ) {
1350                ch = text.charAt( index );
1351                if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1352                    _printer.printSpace();
1353                else if ( unescaped )
1354                    _printer.printText( ch );
1355                else
1356                    printEscaped( ch );
1357            }
1358        }
1359    }
1360
1361
1362    /**
1363     * Print a document type public or system identifier URL.
1364     * Encapsulates the URL in double quotes, escapes non-printing
1365     * characters and print it equivalent to {@link #printText}.
1366     *
1367     * @param url The document type url to print
1368     */

1369    protected void printDoctypeURL( String JavaDoc url )
1370        throws IOException JavaDoc
1371    {
1372        int i;
1373
1374        _printer.printText( '"' );
1375        for( i = 0 ; i < url.length() ; ++i ) {
1376            if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) {
1377                _printer.printText( '%' );
1378                _printer.printText( Integer.toHexString( url.charAt( i ) ) );
1379            } else
1380                _printer.printText( url.charAt( i ) );
1381        }
1382        _printer.printText( '"' );
1383    }
1384
1385
1386    protected void printEscaped( int ch )
1387        throws IOException JavaDoc
1388    {
1389        String JavaDoc charRef;
1390
1391        // If there is a suitable entity reference for this
1392
// character, print it. The list of available entity
1393
// references is almost but not identical between
1394
// XML and HTML.
1395
charRef = getEntityRef( ch );
1396        if ( charRef != null ) {
1397            _printer.printText( '&' );
1398            _printer.printText( charRef );
1399            _printer.printText( ';' );
1400        } else if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0xF7 ) ||
1401                    ch == '\n' || ch == '\r' || ch == '\t' ) {
1402            // If the character is not printable, print as character reference.
1403
// Non printables are below ASCII space but not tab or line
1404
// terminator, ASCII delete, or above a certain Unicode threshold.
1405
if (ch < 0x10000) {
1406                _printer.printText((char)ch );
1407            } else {
1408                _printer.printText((char)(((ch-0x10000)>>10)+0xd800));
1409                _printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00));
1410            }
1411
1412        } else {
1413            _printer.printText( "&#x" );
1414            _printer.printText(Integer.toHexString(ch));
1415            _printer.printText( ';' );
1416        }
1417    }
1418
1419
1420    /**
1421     * Escapes a string so it may be printed as text content or attribute
1422     * value. Non printable characters are escaped using character references.
1423     * Where the format specifies a deault entity reference, that reference
1424     * is used (e.g. <tt>&amp;lt;</tt>).
1425     *
1426     * @param source The string to escape
1427     */

1428    protected void printEscaped( String JavaDoc source )
1429        throws IOException JavaDoc
1430    {
1431        for ( int i = 0 ; i < source.length() ; ++i ) {
1432            int ch = source.charAt(i);
1433            if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) {
1434                int lowch = source.charAt(i+1);
1435                if ((lowch & 0xfc00) == 0xdc00) {
1436                    ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00;
1437                    i++;
1438                }
1439            }
1440            printEscaped(ch);
1441        }
1442    }
1443
1444
1445    //--------------------------------//
1446
// Element state handling methods //
1447
//--------------------------------//
1448

1449
1450    /**
1451     * Return the state of the current element.
1452     *
1453     * @return Current element state
1454     */

1455    protected ElementState getElementState()
1456    {
1457        return _elementStates[ _elementStateCount ];
1458    }
1459
1460
1461    /**
1462     * Enter a new element state for the specified element.
1463     * Tag name and space preserving is specified, element
1464     * state is initially empty.
1465     *
1466     * @return Current element state, or null
1467     */

1468    protected ElementState enterElementState( String JavaDoc namespaceURI, String JavaDoc localName,
1469                                              String JavaDoc rawName, boolean preserveSpace )
1470    {
1471        ElementState state;
1472
1473        if ( _elementStateCount + 1 == _elementStates.length ) {
1474            ElementState[] newStates;
1475
1476            // Need to create a larger array of states. This does not happen
1477
// often, unless the document is really deep.
1478
newStates = new ElementState[ _elementStates.length + 10 ];
1479            for ( int i = 0 ; i < _elementStates.length ; ++i )
1480                newStates[ i ] = _elementStates[ i ];
1481            for ( int i = _elementStates.length ; i < newStates.length ; ++i )
1482                newStates[ i ] = new ElementState();
1483            _elementStates = newStates;
1484        }
1485
1486        ++_elementStateCount;
1487        state = _elementStates[ _elementStateCount ];
1488        state.namespaceURI = namespaceURI;
1489        state.localName = localName;
1490        state.rawName = rawName;
1491        state.preserveSpace = preserveSpace;
1492        state.empty = true;
1493        state.afterElement = false;
1494        state.afterComment = false;
1495        state.doCData = state.inCData = false;
1496        state.unescaped = false;
1497        state.prefixes = _prefixes;
1498
1499        _prefixes = null;
1500        return state;
1501    }
1502
1503
1504    /**
1505     * Leave the current element state and return to the
1506     * state of the parent element. If this was the root
1507     * element, return to the state of the document.
1508     *
1509     * @return Previous element state
1510     */

1511    protected ElementState leaveElementState()
1512    {
1513        if ( _elementStateCount > 0 ) {
1514            /*Corrected by David Blondeau (blondeau@intalio.com)*/
1515        _prefixes = null;
1516        //_prefixes = _elementStates[ _elementStateCount ].prefixes;
1517
-- _elementStateCount;
1518            return _elementStates[ _elementStateCount ];
1519        } else
1520            throw new IllegalStateException JavaDoc( "Internal error: element state is zero" );
1521    }
1522
1523
1524    /**
1525     * Returns true if in the state of the document.
1526     * Returns true before entering any element and after
1527     * leaving the root element.
1528     *
1529     * @return True if in the state of the document
1530     */

1531    protected boolean isDocumentState()
1532    {
1533        return _elementStateCount == 0;
1534    }
1535
1536
1537    /**
1538     * Returns the namespace prefix for the specified URI.
1539     * If the URI has been mapped to a prefix, returns the
1540     * prefix, otherwise returns null.
1541     *
1542     * @param namespaceURI The namespace URI
1543     * @return The namespace prefix if known, or null
1544     */

1545    protected String JavaDoc getPrefix( String JavaDoc namespaceURI )
1546    {
1547        String JavaDoc prefix;
1548
1549        if ( _prefixes != null ) {
1550            prefix = (String JavaDoc) _prefixes.get( namespaceURI );
1551            if ( prefix != null )
1552                return prefix;
1553        }
1554        if ( _elementStateCount == 0 )
1555            return null;
1556        else {
1557            for ( int i = _elementStateCount ; i > 0 ; --i ) {
1558                if ( _elementStates[ i ].prefixes != null ) {
1559                    prefix = (String JavaDoc) _elementStates[ i ].prefixes.get( namespaceURI );
1560                    if ( prefix != null )
1561                        return prefix;
1562                }
1563            }
1564        }
1565        return null;
1566    }
1567
1568
1569}
1570
Popular Tags