XML11Serializer


1   /*
2    * The Apache Software License, Version 1.1
3    *
4    *
5    * Copyright (c) 1999-2002 The Apache Software Foundation.  All rights
6    * reserved.
7    *
8    * Redistribution and use in source and binary forms, with or without
9    * modification, are permitted provided that the following conditions
10   * are met:
11   *
12   * 1. Redistributions of source code must retain the above copyright
13   *    notice, this list of conditions and the following disclaimer.
14   *
15   * 2. Redistributions in binary form must reproduce the above copyright
16   *    notice, this list of conditions and the following disclaimer in
17   *    the documentation and/or other materials provided with the
18   *    distribution.
19   *
20   * 3. The end-user documentation included with the redistribution,
21   *    if any, must include the following acknowledgment:
22   *       "This product includes software developed by the
23   *        Apache Software Foundation (http://www.apache.org/)."
24   *    Alternately, this acknowledgment may appear in the software itself,
25   *    if and wherever such third-party acknowledgments normally appear.
26   *
27   * 4. The names "Xerces" and "Apache Software Foundation" must
28   *    not be used to endorse or promote products derived from this
29   *    software without prior written permission. For written
30   *    permission, please contact apache@apache.org.
31   *
32   * 5. Products derived from this software may not be called "Apache",
33   *    nor may "Apache" appear in their name, without prior written
34   *    permission of the Apache Software Foundation.
35   *
36   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47   * SUCH DAMAGE.
48   * ====================================================================
49   *
50   * This software consists of voluntary contributions made by many
51   * individuals on behalf of the Apache Software Foundation and was
52   * originally based on software copyright (c) 1999, International
53   * Business Machines, Inc., http://www.apache.org.  For more
54   * information on the Apache Software Foundation, please see
55   * <http://www.apache.org/>.
56   */
57  
58  
59  
60  // Sep 14, 2000:
61  //  Fixed problem with namespace handling. Contributed by
62  //  David Blondeau <blondeau@intalio.com>
63  // Sep 14, 2000:
64  //  Fixed serializer to report IO exception directly, instead at
65  //  the end of document processing.
66  //  Reported by Patrick Higgins <phiggins@transzap.com>
67  // Aug 21, 2000:
68  //  Fixed bug in startDocument not calling prepare.
69  //  Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
70  // Aug 21, 2000:
71  //  Added ability to omit DOCTYPE declaration.
72  
73  
74  package com.sun.org.apache.xml.internal.serialize;
75  
76  
77  import java.io.IOException  ;
78  import java.io.OutputStream  ;
79  import java.io.Writer  ;
80  
81  import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
82  import org.w3c.dom.DOMError  ;
83  import com.sun.org.apache.xerces.internal.impl.Constants;
84  import com.sun.org.apache.xerces.internal.util.NamespaceSupport;
85  import com.sun.org.apache.xerces.internal.util.SymbolTable;
86  import com.sun.org.apache.xerces.internal.util.XML11Char;
87  import com.sun.org.apache.xerces.internal.util.XMLChar;
88  import org.xml.sax.SAXException  ;
89  
90  /**
91   * Implements an XML serializer supporting both DOM and SAX pretty
92   * serializing. For usage instructions see {@link Serializer}.
93   * <p>
94   * If an output stream is used, the encoding is taken from the
95   * output format (defaults to <tt>UTF-8</tt>). If a writer is
96   * used, make sure the writer uses the same encoding (if applies)
97   * as specified in the output format.
98   * <p>
99   * The serializer supports both DOM and SAX. SAX serializing is done by firing
100  * SAX events and using the serializer as a document handler. DOM serializing is done
101  * by calling {@link #serialize(Document)} or by using DOM Level 3  
102  * {@link org.w3c.dom.ls.DOMSerializer} and
103  * serializing with {@link org.w3c.dom.ls.DOMSerializer#write},
104  * {@link org.w3c.dom.ls.DOMSerializer#writeToString}.
105  * <p>
106  * If an I/O exception occurs while serializing, the serializer
107  * will not throw an exception directly, but only throw it
108  * at the end of serializing (either DOM or SAX's {@link
109  * org.xml.sax.DocumentHandler#endDocument}.
110  * <p>
111  * For elements that are not specified as whitespace preserving,
112  * the serializer will potentially break long text lines at space
113  * boundaries, indent lines, and serialize elements on separate
114  * lines. Line terminators will be regarded as spaces, and
115  * spaces at beginning of line will be stripped.
116  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
117  * @author <a HREF="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
118  * @author Elena Litani IBM
119  * @version $Revision: 1.8 $ $Date: 2004/01/29 21:11:30 $
120  * @see Serializer
121  */
122 public class XML11Serializer
123 extends XMLSerializer {
124 
125     //
126     // constants
127     //
128 
129     protected static final boolean DEBUG = false;
130 
131     // 
132     // data
133     //
134 
135     // 
136     // DOM Level 3 implementation: variables intialized in DOMSerializerImpl
137     // 
138 
139     /** stores namespaces in scope */
140     protected NamespaceSupport fNSBinder;
141 
142     /** stores all namespace bindings on the current element */
143     protected NamespaceSupport fLocalNSBinder;
144 
145     /** symbol table for serialization */
146     protected SymbolTable fSymbolTable;    
147 
148     // is node dom level 1 node?
149     protected boolean fDOML1 = false;
150     // counter for new prefix names
151     protected int fNamespaceCounter = 1;
152     protected final static String   PREFIX = "NS";
153 
154     /**
155      * Controls whether namespace fixup should be performed during
156      * the serialization. 
157      * NOTE: if this field is set to true the following 
158      * fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable, 
159      * XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter.
160      */
161     protected boolean fNamespaces = false;
162 
163 
164     private boolean fPreserveSpace;
165 
166 
167     /**
168      * Constructs a new serializer. The serializer cannot be used without
169      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
170      * first.
171      */
172     public XML11Serializer() {
173         super( );
174         _format.setVersion("1.1");
175     }
176 
177 
178     /**
179      * Constructs a new serializer. The serializer cannot be used without
180      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
181      * first.
182      */
183     public XML11Serializer( OutputFormat format ) {
184         super( format );
185         _format.setVersion("1.1");
186     }
187 
188 
189     /**
190      * Constructs a new serializer that writes to the specified writer
191      * using the specified output format. If <tt>format</tt> is null,
192      * will use a default output format.
193      *
194      * @param writer The writer to use
195      * @param format The output format to use, null for the default
196      */
197     public XML11Serializer( Writer   writer, OutputFormat format ) {
198         super( writer, format );
199         _format.setVersion("1.1");
200     }
201 
202 
203     /**
204      * Constructs a new serializer that writes to the specified output
205      * stream using the specified output format. If <tt>format</tt>
206      * is null, will use a default output format.
207      *
208      * @param output The output stream to use
209      * @param format The output format to use, null for the default
210      */
211     public XML11Serializer( OutputStream   output, OutputFormat format ) {
212         super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) );
213         _format.setVersion("1.1");
214     }
215 
216     //-----------------------------------------//
217     // SAX content handler serializing methods //
218     //-----------------------------------------//
219 
220 
221     public void characters( char[] chars, int start, int length )
222         throws SAXException  
223     {
224         ElementState state;
225 
226         try {
227             state = content();
228 
229             // Check if text should be print as CDATA section or unescaped
230             // based on elements listed in the output format (the element
231             // state) or whether we are inside a CDATA section or entity.
232 
233             if ( state.inCData || state.doCData ) {
234                 int          saveIndent;
235 
236                 // Print a CDATA section. The text is not escaped, but ']]>'
237                 // appearing in the code must be identified and dealt with.
238                 // The contents of a text node is considered space preserving.
239                 if ( ! state.inCData ) {
240                     _printer.printText( "<![CDATA[" );
241                     state.inCData = true;
242                 }
243                 saveIndent = _printer.getNextIndent();
244                 _printer.setNextIndent( 0 );
245                 char ch;
246                 for ( int index = start ; index < length ; ++index ) {
247                     ch = chars[index];
248                     if ( ch == ']' && index + 2 < length &&
249                         chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
250                         _printer.printText("]]]]><![CDATA[>");
251                         index +=2; 
252                         continue;
253                     }
254                     if (!XML11Char.isXML11Valid(ch)) {
255                         // check if it is surrogate
256                         if (++index <length) {
257                             surrogates(ch, chars[index]);
258                         } 
259                         else {
260                             fatalError("The character '"+(char)ch+"' is an invalid XML character"); 
261                         }
262                         continue;
263                     } else {
264                         if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
265                             _printer.printText((char)ch);
266                         } else {
267                             // The character is not printable -- split CDATA section
268                             _printer.printText("]]>&#x");                        
269                             _printer.printText(Integer.toHexString(ch));                        
270                             _printer.printText(";<![CDATA[");
271                         }
272                     }
273                 }
274                 _printer.setNextIndent( saveIndent );
275 
276             } else {
277 
278                 int saveIndent;
279 
280                 if ( state.preserveSpace ) {
281                     // If preserving space then hold of indentation so no
282                     // excessive spaces are printed at line breaks, escape
283                     // the text content without replacing spaces and print
284                     // the text breaking only at line breaks.
285                     saveIndent = _printer.getNextIndent();
286                     _printer.setNextIndent( 0 );
287                     printText( chars, start, length, true, state.unescaped );
288                     _printer.setNextIndent( saveIndent );
289                 } else {
290                     printText( chars, start, length, false, state.unescaped );
291                 }
292             }
293         } catch ( IOException   except ) {
294             throw new SAXException  ( except );
295         }
296     }
297 
298 
299     //
300     // overwrite printing functions to make sure serializer prints out valid XML
301     //
302     protected void printEscaped( String   source ) throws IOException   {
303         int length = source.length();
304         for ( int i = 0 ; i < length ; ++i ) {
305             int ch = source.charAt(i);
306             if (!XML11Char.isXML11Valid(ch)) {
307                 if (++i <length) {
308                     surrogates(ch, source.charAt(i));
309                 } else {
310                     fatalError("The character '"+(char)ch+"' is an invalid XML character"); 
311                 }
312                 continue;
313             }
314             if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 0x2028){
315                 printHex(ch);
316             } else if (ch == '<') {
317                 _printer.printText("&lt;");
318             } else if (ch == '&') {
319                 _printer.printText("&amp;");
320             } else if (ch == '"') {
321                 _printer.printText("&quot;");
322             } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) {
323                 _printer.printText((char) ch);
324             } else {
325                 printHex(ch);
326             }
327         }
328     }
329 
330     protected final void printCDATAText(String   text) throws IOException   {
331         int length = text.length();
332         char ch;
333 
334         for (int index = 0; index < length; ++index) {
335             ch = text.charAt(index);
336 
337             if (ch == ']'
338                 && index + 2 < length
339                 && text.charAt(index + 1) == ']'
340                 && text.charAt(index + 2) == '>') { // check for ']]>'
341                 if (fDOMErrorHandler != null){
342                     // REVISIT: this means that if DOM Error handler is not registered we don't report any
343                     // fatal errors and might serialize not wellformed document
344                 if ((features & DOMSerializerImpl.SPLITCDATA) == 0
345                     && (features & DOMSerializerImpl.WELLFORMED) == 0) {
346                     // issue fatal error
347                     String   msg =
348                         DOMMessageFormatter.formatMessage(
349                             DOMMessageFormatter.SERIALIZER_DOMAIN,
350                             "EndingCDATA",
351                             null);
352                     modifyDOMError(
353                         msg,
354                         DOMError.SEVERITY_FATAL_ERROR,
355                         fCurrentNode);
356                     boolean continueProcess =
357                         fDOMErrorHandler.handleError(fDOMError);
358                     if (!continueProcess) {
359                         throw new IOException  ();
360                     }
361                 } else {
362                     // issue warning
363                     String   msg =
364                         DOMMessageFormatter.formatMessage(
365                             DOMMessageFormatter.SERIALIZER_DOMAIN,
366                             "SplittingCDATA",
367                             null);
368                     modifyDOMError(
369                         msg,
370                         DOMError.SEVERITY_WARNING,
371                         fCurrentNode);
372                     fDOMErrorHandler.handleError(fDOMError);
373                 }
374                 }
375                 // split CDATA section
376                 _printer.printText("]]]]><![CDATA[>");
377                 index += 2;
378                 continue;
379             }
380 
381             if (!XML11Char.isXML11Valid(ch)) {
382                 // check if it is surrogate
383                 if (++index < length) {
384                     surrogates(ch, text.charAt(index));
385                 } else {
386                     fatalError(
387                         "The character '"
388                             + (char) ch
389                             + "' is an invalid XML character");
390                 }
391                 continue;
392             } else {
393                 if (_encodingInfo.isPrintable((char) ch)
394                     && XML11Char.isXML11ValidLiteral(ch)) {
395                     _printer.printText((char) ch);
396                 } else {
397 
398                     // The character is not printable -- split CDATA section
399                     _printer.printText("]]>&#x");
400                     _printer.printText(Integer.toHexString(ch));
401                     _printer.printText(";<![CDATA[");
402                 }
403             }
404         }
405     }
406 
407 
408     // note that this "int" should, in all cases, be a char.
409     // REVISIT:  make it a char...
410     protected final void printXMLChar( int ch ) throws IOException   {
411         
412         if (ch == '\r' || ch == 0x0085 || ch == 0x2028) {
413             printHex(ch);
414         } else if ( ch == '<') {
415             _printer.printText("&lt;");
416         } else if (ch == '&') {
417             _printer.printText("&amp;");
418         } else if (ch == '>'){
419             // character sequence "]]>" can't appear in content, therefore
420             // we should escape '>' 
421             _printer.printText("&gt;");
422         } else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) { 
423             _printer.printText((char)ch);
424         } else {
425              printHex(ch);
426         }
427     }
428 
429 
430 
431     protected final void surrogates(int high, int low) throws IOException  {
432         if (XMLChar.isHighSurrogate(high)) {
433             if (!XMLChar.isLowSurrogate(low)) {
434                 //Invalid XML
435                 fatalError("The character '"+(char)low+"' is an invalid XML character"); 
436             }
437             else {
438                 int supplemental = XMLChar.supplemental((char)high, (char)low);
439                 if (!XML11Char.isXML11Valid(supplemental)) {
440                     //Invalid XML
441                     fatalError("The character '"+(char)supplemental+"' is an invalid XML character"); 
442                 }
443                 else {
444                     if (content().inCData ) {
445                         _printer.printText("]]>&#x");                        
446                         _printer.printText(Integer.toHexString(supplemental));                        
447                         _printer.printText(";<![CDATA[");
448                     }  
449                     else {
450                         printHex(supplemental);
451                     }
452                 }
453             }
454         } else {
455             fatalError("The character '"+(char)high+"' is an invalid XML character"); 
456         }
457 
458     }
459 
460 
461     protected void printText( String   text, boolean preserveSpace, boolean unescaped )
462     throws IOException   {
463         int index;
464         char ch;
465         int length = text.length();
466         if ( preserveSpace ) {
467             // Preserving spaces: the text must print exactly as it is,
468             // without breaking when spaces appear in the text and without
469             // consolidating spaces. If a line terminator is used, a line
470             // break will occur.
471             for ( index = 0 ; index < length ; ++index ) {
472                 ch = text.charAt( index );
473                 if (!XML11Char.isXML11Valid(ch)) {
474                     // check if it is surrogate
475                     if (++index <length) {
476                         surrogates(ch, text.charAt(index));
477                     } else {
478                         fatalError("The character '"+(char)ch+"' is an invalid XML character"); 
479                     }
480                     continue;
481                 }
482                 if ( unescaped  && XML11Char.isXML11ValidLiteral(ch)) {
483                     _printer.printText( ch );
484                 } else
485                     printXMLChar( ch );
486             }
487         } else {
488             // Not preserving spaces: print one part at a time, and
489             // use spaces between parts to break them into different
490             // lines. Spaces at beginning of line will be stripped
491             // by printing mechanism. Line terminator is treated
492             // no different than other text part.
493             for ( index = 0 ; index < length ; ++index ) {
494                 ch = text.charAt( index );
495                 if (!XML11Char.isXML11Valid(ch)) {
496                     // check if it is surrogate
497                     if (++index <length) {
498                         surrogates(ch, text.charAt(index));
499                     } else {
500                         fatalError("The character '"+(char)ch+"' is an invalid XML character"); 
501                     }
502                     continue;
503                 }
504 
505                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
506                     _printer.printText( ch );
507                 else
508                     printXMLChar( ch);
509             }
510         }
511     }
512 
513 
514 
515     protected void printText( char[] chars, int start, int length,
516                               boolean preserveSpace, boolean unescaped ) throws IOException   {
517         int index;
518         char ch;
519 
520         if ( preserveSpace ) {
521             // Preserving spaces: the text must print exactly as it is,
522             // without breaking when spaces appear in the text and without
523             // consolidating spaces. If a line terminator is used, a line
524             // break will occur.
525             while ( length-- > 0 ) {
526                 ch = chars[ start ];
527                 ++start;
528                 if (!XML11Char.isXML11Valid(ch)) {
529                     // check if it is surrogate
530                     if (++start <length) {
531                         surrogates(ch, chars[start]);
532                     } else {
533                         fatalError("The character '"+(char)ch+"' is an invalid XML character"); 
534                     }
535                     continue;
536                 }
537                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
538                     _printer.printText( ch );
539                 else
540                     printXMLChar( ch );
541             }
542         } else {
543             // Not preserving spaces: print one part at a time, and
544             // use spaces between parts to break them into different
545             // lines. Spaces at beginning of line will be stripped
546             // by printing mechanism. Line terminator is treated
547             // no different than other text part.
548             while ( length-- > 0 ) {
549                 ch = chars[ start ];
550                 ++start;
551 
552                 if (!XML11Char.isXML11Valid(ch)) {
553                     // check if it is surrogate
554                     if (++start <length) {
555                         surrogates(ch, chars[start]);
556                     } else {
557                         fatalError("The character '"+(char)ch+"' is an invalid XML character"); 
558                     }
559                     continue;
560                 }
561               
562                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
563                     _printer.printText( ch );
564                 else
565                     printXMLChar( ch );
566             }
567         }
568     }
569 
570 
571     public boolean reset() {
572         super.reset();
573         return true;
574 
575     }
576 
577 }
578 
579 
580 
581 
582
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags