KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xml > internal > serialize > XML11Serializer


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58
59
60 // Sep 14, 2000:
61
// Fixed problem with namespace handling. Contributed by
62
// David Blondeau <blondeau@intalio.com>
63
// Sep 14, 2000:
64
// Fixed serializer to report IO exception directly, instead at
65
// the end of document processing.
66
// Reported by Patrick Higgins <phiggins@transzap.com>
67
// Aug 21, 2000:
68
// Fixed bug in startDocument not calling prepare.
69
// Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
70
// Aug 21, 2000:
71
// Added ability to omit DOCTYPE declaration.
72

73
74 package com.sun.org.apache.xml.internal.serialize;
75
76
77 import java.io.IOException JavaDoc;
78 import java.io.OutputStream JavaDoc;
79 import java.io.Writer JavaDoc;
80
81 import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
82 import org.w3c.dom.DOMError JavaDoc;
83 import com.sun.org.apache.xerces.internal.impl.Constants;
84 import com.sun.org.apache.xerces.internal.util.NamespaceSupport;
85 import com.sun.org.apache.xerces.internal.util.SymbolTable;
86 import com.sun.org.apache.xerces.internal.util.XML11Char;
87 import com.sun.org.apache.xerces.internal.util.XMLChar;
88 import org.xml.sax.SAXException JavaDoc;
89
90 /**
91  * Implements an XML serializer supporting both DOM and SAX pretty
92  * serializing. For usage instructions see {@link Serializer}.
93  * <p>
94  * If an output stream is used, the encoding is taken from the
95  * output format (defaults to <tt>UTF-8</tt>). If a writer is
96  * used, make sure the writer uses the same encoding (if applies)
97  * as specified in the output format.
98  * <p>
99  * The serializer supports both DOM and SAX. SAX serializing is done by firing
100  * SAX events and using the serializer as a document handler. DOM serializing is done
101  * by calling {@link #serialize(Document)} or by using DOM Level 3
102  * {@link org.w3c.dom.ls.DOMSerializer} and
103  * serializing with {@link org.w3c.dom.ls.DOMSerializer#write},
104  * {@link org.w3c.dom.ls.DOMSerializer#writeToString}.
105  * <p>
106  * If an I/O exception occurs while serializing, the serializer
107  * will not throw an exception directly, but only throw it
108  * at the end of serializing (either DOM or SAX's {@link
109  * org.xml.sax.DocumentHandler#endDocument}.
110  * <p>
111  * For elements that are not specified as whitespace preserving,
112  * the serializer will potentially break long text lines at space
113  * boundaries, indent lines, and serialize elements on separate
114  * lines. Line terminators will be regarded as spaces, and
115  * spaces at beginning of line will be stripped.
116  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
117  * @author <a HREF="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
118  * @author Elena Litani IBM
119  * @version $Revision: 1.8 $ $Date: 2004/01/29 21:11:30 $
120  * @see Serializer
121  */

122 public class XML11Serializer
123 extends XMLSerializer {
124
125     //
126
// constants
127
//
128

129     protected static final boolean DEBUG = false;
130
131     //
132
// data
133
//
134

135     //
136
// DOM Level 3 implementation: variables intialized in DOMSerializerImpl
137
//
138

139     /** stores namespaces in scope */
140     protected NamespaceSupport fNSBinder;
141
142     /** stores all namespace bindings on the current element */
143     protected NamespaceSupport fLocalNSBinder;
144
145     /** symbol table for serialization */
146     protected SymbolTable fSymbolTable;
147
148     // is node dom level 1 node?
149
protected boolean fDOML1 = false;
150     // counter for new prefix names
151
protected int fNamespaceCounter = 1;
152     protected final static String JavaDoc PREFIX = "NS";
153
154     /**
155      * Controls whether namespace fixup should be performed during
156      * the serialization.
157      * NOTE: if this field is set to true the following
158      * fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable,
159      * XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter.
160      */

161     protected boolean fNamespaces = false;
162
163
164     private boolean fPreserveSpace;
165
166
167     /**
168      * Constructs a new serializer. The serializer cannot be used without
169      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
170      * first.
171      */

172     public XML11Serializer() {
173         super( );
174         _format.setVersion("1.1");
175     }
176
177
178     /**
179      * Constructs a new serializer. The serializer cannot be used without
180      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
181      * first.
182      */

183     public XML11Serializer( OutputFormat format ) {
184         super( format );
185         _format.setVersion("1.1");
186     }
187
188
189     /**
190      * Constructs a new serializer that writes to the specified writer
191      * using the specified output format. If <tt>format</tt> is null,
192      * will use a default output format.
193      *
194      * @param writer The writer to use
195      * @param format The output format to use, null for the default
196      */

197     public XML11Serializer( Writer JavaDoc writer, OutputFormat format ) {
198         super( writer, format );
199         _format.setVersion("1.1");
200     }
201
202
203     /**
204      * Constructs a new serializer that writes to the specified output
205      * stream using the specified output format. If <tt>format</tt>
206      * is null, will use a default output format.
207      *
208      * @param output The output stream to use
209      * @param format The output format to use, null for the default
210      */

211     public XML11Serializer( OutputStream JavaDoc output, OutputFormat format ) {
212         super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) );
213         _format.setVersion("1.1");
214     }
215
216     //-----------------------------------------//
217
// SAX content handler serializing methods //
218
//-----------------------------------------//
219

220
221     public void characters( char[] chars, int start, int length )
222         throws SAXException JavaDoc
223     {
224         ElementState state;
225
226         try {
227             state = content();
228
229             // Check if text should be print as CDATA section or unescaped
230
// based on elements listed in the output format (the element
231
// state) or whether we are inside a CDATA section or entity.
232

233             if ( state.inCData || state.doCData ) {
234                 int saveIndent;
235
236                 // Print a CDATA section. The text is not escaped, but ']]>'
237
// appearing in the code must be identified and dealt with.
238
// The contents of a text node is considered space preserving.
239
if ( ! state.inCData ) {
240                     _printer.printText( "<![CDATA[" );
241                     state.inCData = true;
242                 }
243                 saveIndent = _printer.getNextIndent();
244                 _printer.setNextIndent( 0 );
245                 char ch;
246                 for ( int index = start ; index < length ; ++index ) {
247                     ch = chars[index];
248                     if ( ch == ']' && index + 2 < length &&
249                         chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
250                         _printer.printText("]]]]><![CDATA[>");
251                         index +=2;
252                         continue;
253                     }
254                     if (!XML11Char.isXML11Valid(ch)) {
255                         // check if it is surrogate
256
if (++index <length) {
257                             surrogates(ch, chars[index]);
258                         }
259                         else {
260                             fatalError("The character '"+(char)ch+"' is an invalid XML character");
261                         }
262                         continue;
263                     } else {
264                         if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
265                             _printer.printText((char)ch);
266                         } else {
267                             // The character is not printable -- split CDATA section
268
_printer.printText("]]>&#x");
269                             _printer.printText(Integer.toHexString(ch));
270                             _printer.printText(";<![CDATA[");
271                         }
272                     }
273                 }
274                 _printer.setNextIndent( saveIndent );
275
276             } else {
277
278                 int saveIndent;
279
280                 if ( state.preserveSpace ) {
281                     // If preserving space then hold of indentation so no
282
// excessive spaces are printed at line breaks, escape
283
// the text content without replacing spaces and print
284
// the text breaking only at line breaks.
285
saveIndent = _printer.getNextIndent();
286                     _printer.setNextIndent( 0 );
287                     printText( chars, start, length, true, state.unescaped );
288                     _printer.setNextIndent( saveIndent );
289                 } else {
290                     printText( chars, start, length, false, state.unescaped );
291                 }
292             }
293         } catch ( IOException JavaDoc except ) {
294             throw new SAXException JavaDoc( except );
295         }
296     }
297
298
299     //
300
// overwrite printing functions to make sure serializer prints out valid XML
301
//
302
protected void printEscaped( String JavaDoc source ) throws IOException JavaDoc {
303         int length = source.length();
304         for ( int i = 0 ; i < length ; ++i ) {
305             int ch = source.charAt(i);
306             if (!XML11Char.isXML11Valid(ch)) {
307                 if (++i <length) {
308                     surrogates(ch, source.charAt(i));
309                 } else {
310                     fatalError("The character '"+(char)ch+"' is an invalid XML character");
311                 }
312                 continue;
313             }
314             if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 0x2028){
315                 printHex(ch);
316             } else if (ch == '<') {
317                 _printer.printText("&lt;");
318             } else if (ch == '&') {
319                 _printer.printText("&amp;");
320             } else if (ch == '"') {
321                 _printer.printText("&quot;");
322             } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) {
323                 _printer.printText((char) ch);
324             } else {
325                 printHex(ch);
326             }
327         }
328     }
329
330     protected final void printCDATAText(String JavaDoc text) throws IOException JavaDoc {
331         int length = text.length();
332         char ch;
333
334         for (int index = 0; index < length; ++index) {
335             ch = text.charAt(index);
336
337             if (ch == ']'
338                 && index + 2 < length
339                 && text.charAt(index + 1) == ']'
340                 && text.charAt(index + 2) == '>') { // check for ']]>'
341
if (fDOMErrorHandler != null){
342                     // REVISIT: this means that if DOM Error handler is not registered we don't report any
343
// fatal errors and might serialize not wellformed document
344
if ((features & DOMSerializerImpl.SPLITCDATA) == 0
345                     && (features & DOMSerializerImpl.WELLFORMED) == 0) {
346                     // issue fatal error
347
String JavaDoc msg =
348                         DOMMessageFormatter.formatMessage(
349                             DOMMessageFormatter.SERIALIZER_DOMAIN,
350                             "EndingCDATA",
351                             null);
352                     modifyDOMError(
353                         msg,
354                         DOMError.SEVERITY_FATAL_ERROR,
355                         fCurrentNode);
356                     boolean continueProcess =
357                         fDOMErrorHandler.handleError(fDOMError);
358                     if (!continueProcess) {
359                         throw new IOException JavaDoc();
360                     }
361                 } else {
362                     // issue warning
363
String JavaDoc msg =
364                         DOMMessageFormatter.formatMessage(
365                             DOMMessageFormatter.SERIALIZER_DOMAIN,
366                             "SplittingCDATA",
367                             null);
368                     modifyDOMError(
369                         msg,
370                         DOMError.SEVERITY_WARNING,
371                         fCurrentNode);
372                     fDOMErrorHandler.handleError(fDOMError);
373                 }
374                 }
375                 // split CDATA section
376
_printer.printText("]]]]><![CDATA[>");
377                 index += 2;
378                 continue;
379             }
380
381             if (!XML11Char.isXML11Valid(ch)) {
382                 // check if it is surrogate
383
if (++index < length) {
384                     surrogates(ch, text.charAt(index));
385                 } else {
386                     fatalError(
387                         "The character '"
388                             + (char) ch
389                             + "' is an invalid XML character");
390                 }
391                 continue;
392             } else {
393                 if (_encodingInfo.isPrintable((char) ch)
394                     && XML11Char.isXML11ValidLiteral(ch)) {
395                     _printer.printText((char) ch);
396                 } else {
397
398                     // The character is not printable -- split CDATA section
399
_printer.printText("]]>&#x");
400                     _printer.printText(Integer.toHexString(ch));
401                     _printer.printText(";<![CDATA[");
402                 }
403             }
404         }
405     }
406
407
408     // note that this "int" should, in all cases, be a char.
409
// REVISIT: make it a char...
410
protected final void printXMLChar( int ch ) throws IOException JavaDoc {
411         
412         if (ch == '\r' || ch == 0x0085 || ch == 0x2028) {
413             printHex(ch);
414         } else if ( ch == '<') {
415             _printer.printText("&lt;");
416         } else if (ch == '&') {
417             _printer.printText("&amp;");
418         } else if (ch == '>'){
419             // character sequence "]]>" can't appear in content, therefore
420
// we should escape '>'
421
_printer.printText("&gt;");
422         } else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
423             _printer.printText((char)ch);
424         } else {
425              printHex(ch);
426         }
427     }
428
429
430
431     protected final void surrogates(int high, int low) throws IOException JavaDoc{
432         if (XMLChar.isHighSurrogate(high)) {
433             if (!XMLChar.isLowSurrogate(low)) {
434                 //Invalid XML
435
fatalError("The character '"+(char)low+"' is an invalid XML character");
436             }
437             else {
438                 int supplemental = XMLChar.supplemental((char)high, (char)low);
439                 if (!XML11Char.isXML11Valid(supplemental)) {
440                     //Invalid XML
441
fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
442                 }
443                 else {
444                     if (content().inCData ) {
445                         _printer.printText("]]>&#x");
446                         _printer.printText(Integer.toHexString(supplemental));
447                         _printer.printText(";<![CDATA[");
448                     }
449                     else {
450                         printHex(supplemental);
451                     }
452                 }
453             }
454         } else {
455             fatalError("The character '"+(char)high+"' is an invalid XML character");
456         }
457
458     }
459
460
461     protected void printText( String JavaDoc text, boolean preserveSpace, boolean unescaped )
462     throws IOException JavaDoc {
463         int index;
464         char ch;
465         int length = text.length();
466         if ( preserveSpace ) {
467             // Preserving spaces: the text must print exactly as it is,
468
// without breaking when spaces appear in the text and without
469
// consolidating spaces. If a line terminator is used, a line
470
// break will occur.
471
for ( index = 0 ; index < length ; ++index ) {
472                 ch = text.charAt( index );
473                 if (!XML11Char.isXML11Valid(ch)) {
474                     // check if it is surrogate
475
if (++index <length) {
476                         surrogates(ch, text.charAt(index));
477                     } else {
478                         fatalError("The character '"+(char)ch+"' is an invalid XML character");
479                     }
480                     continue;
481                 }
482                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) {
483                     _printer.printText( ch );
484                 } else
485                     printXMLChar( ch );
486             }
487         } else {
488             // Not preserving spaces: print one part at a time, and
489
// use spaces between parts to break them into different
490
// lines. Spaces at beginning of line will be stripped
491
// by printing mechanism. Line terminator is treated
492
// no different than other text part.
493
for ( index = 0 ; index < length ; ++index ) {
494                 ch = text.charAt( index );
495                 if (!XML11Char.isXML11Valid(ch)) {
496                     // check if it is surrogate
497
if (++index <length) {
498                         surrogates(ch, text.charAt(index));
499                     } else {
500                         fatalError("The character '"+(char)ch+"' is an invalid XML character");
501                     }
502                     continue;
503                 }
504
505                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
506                     _printer.printText( ch );
507                 else
508                     printXMLChar( ch);
509             }
510         }
511     }
512
513
514
515     protected void printText( char[] chars, int start, int length,
516                               boolean preserveSpace, boolean unescaped ) throws IOException JavaDoc {
517         int index;
518         char ch;
519
520         if ( preserveSpace ) {
521             // Preserving spaces: the text must print exactly as it is,
522
// without breaking when spaces appear in the text and without
523
// consolidating spaces. If a line terminator is used, a line
524
// break will occur.
525
while ( length-- > 0 ) {
526                 ch = chars[ start ];
527                 ++start;
528                 if (!XML11Char.isXML11Valid(ch)) {
529                     // check if it is surrogate
530
if (++start <length) {
531                         surrogates(ch, chars[start]);
532                     } else {
533                         fatalError("The character '"+(char)ch+"' is an invalid XML character");
534                     }
535                     continue;
536                 }
537                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
538                     _printer.printText( ch );
539                 else
540                     printXMLChar( ch );
541             }
542         } else {
543             // Not preserving spaces: print one part at a time, and
544
// use spaces between parts to break them into different
545
// lines. Spaces at beginning of line will be stripped
546
// by printing mechanism. Line terminator is treated
547
// no different than other text part.
548
while ( length-- > 0 ) {
549                 ch = chars[ start ];
550                 ++start;
551
552                 if (!XML11Char.isXML11Valid(ch)) {
553                     // check if it is surrogate
554
if (++start <length) {
555                         surrogates(ch, chars[start]);
556                     } else {
557                         fatalError("The character '"+(char)ch+"' is an invalid XML character");
558                     }
559                     continue;
560                 }
561               
562                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
563                     _printer.printText( ch );
564                 else
565                     printXMLChar( ch );
566             }
567         }
568     }
569
570
571     public boolean reset() {
572         super.reset();
573         return true;
574
575     }
576
577 }
578
579
580
581
582
Popular Tags