KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xml > serialize > XML11Serializer


1 /*
2  * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17
18
19 // Sep 14, 2000:
20
// Fixed problem with namespace handling. Contributed by
21
// David Blondeau <blondeau@intalio.com>
22
// Sep 14, 2000:
23
// Fixed serializer to report IO exception directly, instead at
24
// the end of document processing.
25
// Reported by Patrick Higgins <phiggins@transzap.com>
26
// Aug 21, 2000:
27
// Fixed bug in startDocument not calling prepare.
28
// Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
29
// Aug 21, 2000:
30
// Added ability to omit DOCTYPE declaration.
31

32
33 package org.apache.xml.serialize;
34
35
36 import java.io.IOException JavaDoc;
37 import java.io.OutputStream JavaDoc;
38 import java.io.Writer JavaDoc;
39
40 import org.apache.xerces.dom.DOMMessageFormatter;
41 import org.apache.xerces.impl.Constants;
42 import org.apache.xerces.util.NamespaceSupport;
43 import org.apache.xerces.util.SymbolTable;
44 import org.apache.xerces.util.XML11Char;
45 import org.apache.xerces.util.XMLChar;
46 import org.xml.sax.SAXException JavaDoc;
47 import org.w3c.dom.DOMError JavaDoc;
48
49 /**
50  * Implements an XML serializer supporting both DOM and SAX pretty
51  * serializing. For usage instructions see {@link Serializer}.
52  * <p>
53  * If an output stream is used, the encoding is taken from the
54  * output format (defaults to <tt>UTF-8</tt>). If a writer is
55  * used, make sure the writer uses the same encoding (if applies)
56  * as specified in the output format.
57  * <p>
58  * The serializer supports both DOM and SAX. SAX serializing is done by firing
59  * SAX events and using the serializer as a document handler. DOM serializing is done
60  * by calling {@link #serialize(Document)} or by using DOM Level 3
61  * {@link org.w3c.dom.ls.DOMSerializer} and
62  * serializing with {@link org.w3c.dom.ls.DOMSerializer#write},
63  * {@link org.w3c.dom.ls.DOMSerializer#writeToString}.
64  * <p>
65  * If an I/O exception occurs while serializing, the serializer
66  * will not throw an exception directly, but only throw it
67  * at the end of serializing (either DOM or SAX's {@link
68  * org.xml.sax.DocumentHandler#endDocument}.
69  * <p>
70  * For elements that are not specified as whitespace preserving,
71  * the serializer will potentially break long text lines at space
72  * boundaries, indent lines, and serialize elements on separate
73  * lines. Line terminators will be regarded as spaces, and
74  * spaces at beginning of line will be stripped.
75  * @author <a HREF="mailto:arkin@intalio.com">Assaf Arkin</a>
76  * @author <a HREF="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
77  * @author Elena Litani IBM
78  * @version $Revision: 1.14 $ $Date: 2005/05/03 11:12:21 $
79  * @see Serializer
80  */

81 public class XML11Serializer
82 extends XMLSerializer {
83
84     //
85
// constants
86
//
87

88     protected static final boolean DEBUG = false;
89
90     //
91
// data
92
//
93

94     //
95
// DOM Level 3 implementation: variables intialized in DOMSerializerImpl
96
//
97

98     /** stores namespaces in scope */
99     protected NamespaceSupport fNSBinder;
100
101     /** stores all namespace bindings on the current element */
102     protected NamespaceSupport fLocalNSBinder;
103
104     /** symbol table for serialization */
105     protected SymbolTable fSymbolTable;
106
107     // is node dom level 1 node?
108
protected boolean fDOML1 = false;
109     // counter for new prefix names
110
protected int fNamespaceCounter = 1;
111     protected final static String JavaDoc PREFIX = "NS";
112
113     /**
114      * Controls whether namespace fixup should be performed during
115      * the serialization.
116      * NOTE: if this field is set to true the following
117      * fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable,
118      * XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter.
119      */

120     protected boolean fNamespaces = false;
121
122
123     private boolean fPreserveSpace;
124
125
126     /**
127      * Constructs a new serializer. The serializer cannot be used without
128      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
129      * first.
130      */

131     public XML11Serializer() {
132         super( );
133         _format.setVersion("1.1");
134     }
135
136
137     /**
138      * Constructs a new serializer. The serializer cannot be used without
139      * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
140      * first.
141      */

142     public XML11Serializer( OutputFormat format ) {
143         super( format );
144         _format.setVersion("1.1");
145     }
146
147
148     /**
149      * Constructs a new serializer that writes to the specified writer
150      * using the specified output format. If <tt>format</tt> is null,
151      * will use a default output format.
152      *
153      * @param writer The writer to use
154      * @param format The output format to use, null for the default
155      */

156     public XML11Serializer( Writer JavaDoc writer, OutputFormat format ) {
157         super( writer, format );
158         _format.setVersion("1.1");
159     }
160
161
162     /**
163      * Constructs a new serializer that writes to the specified output
164      * stream using the specified output format. If <tt>format</tt>
165      * is null, will use a default output format.
166      *
167      * @param output The output stream to use
168      * @param format The output format to use, null for the default
169      */

170     public XML11Serializer( OutputStream JavaDoc output, OutputFormat format ) {
171         super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) );
172         _format.setVersion("1.1");
173     }
174
175     //-----------------------------------------//
176
// SAX content handler serializing methods //
177
//-----------------------------------------//
178

179
180     public void characters( char[] chars, int start, int length )
181         throws SAXException JavaDoc
182     {
183         ElementState state;
184
185         try {
186             state = content();
187
188             // Check if text should be print as CDATA section or unescaped
189
// based on elements listed in the output format (the element
190
// state) or whether we are inside a CDATA section or entity.
191

192             if ( state.inCData || state.doCData ) {
193                 int saveIndent;
194
195                 // Print a CDATA section. The text is not escaped, but ']]>'
196
// appearing in the code must be identified and dealt with.
197
// The contents of a text node is considered space preserving.
198
if ( ! state.inCData ) {
199                     _printer.printText( "<![CDATA[" );
200                     state.inCData = true;
201                 }
202                 saveIndent = _printer.getNextIndent();
203                 _printer.setNextIndent( 0 );
204                 char ch;
205                 final int end = start + length;
206                 for ( int index = start; index < end; ++index ) {
207                     ch = chars[index];
208                     if ( ch == ']' && index + 2 < end &&
209                         chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
210                         _printer.printText("]]]]><![CDATA[>");
211                         index +=2;
212                         continue;
213                     }
214                     if (!XML11Char.isXML11Valid(ch)) {
215                         // check if it is surrogate
216
if (++index < end) {
217                             surrogates(ch, chars[index]);
218                         }
219                         else {
220                             fatalError("The character '"+(char)ch+"' is an invalid XML character");
221                         }
222                         continue;
223                     } else {
224                         if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
225                             _printer.printText((char)ch);
226                         } else {
227                             // The character is not printable -- split CDATA section
228
_printer.printText("]]>&#x");
229                             _printer.printText(Integer.toHexString(ch));
230                             _printer.printText(";<![CDATA[");
231                         }
232                     }
233                 }
234                 _printer.setNextIndent( saveIndent );
235
236             } else {
237
238                 int saveIndent;
239
240                 if ( state.preserveSpace ) {
241                     // If preserving space then hold of indentation so no
242
// excessive spaces are printed at line breaks, escape
243
// the text content without replacing spaces and print
244
// the text breaking only at line breaks.
245
saveIndent = _printer.getNextIndent();
246                     _printer.setNextIndent( 0 );
247                     printText( chars, start, length, true, state.unescaped );
248                     _printer.setNextIndent( saveIndent );
249                 } else {
250                     printText( chars, start, length, false, state.unescaped );
251                 }
252             }
253         } catch ( IOException JavaDoc except ) {
254             throw new SAXException JavaDoc( except );
255         }
256     }
257
258
259     //
260
// overwrite printing functions to make sure serializer prints out valid XML
261
//
262
protected void printEscaped( String JavaDoc source ) throws IOException JavaDoc {
263         int length = source.length();
264         for ( int i = 0 ; i < length ; ++i ) {
265             int ch = source.charAt(i);
266             if (!XML11Char.isXML11Valid(ch)) {
267                 if (++i <length) {
268                     surrogates(ch, source.charAt(i));
269                 } else {
270                     fatalError("The character '"+(char)ch+"' is an invalid XML character");
271                 }
272                 continue;
273             }
274             if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 0x2028){
275                 printHex(ch);
276             } else if (ch == '<') {
277                 _printer.printText("&lt;");
278             } else if (ch == '&') {
279                 _printer.printText("&amp;");
280             } else if (ch == '"') {
281                 _printer.printText("&quot;");
282             } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) {
283                 _printer.printText((char) ch);
284             } else {
285                 printHex(ch);
286             }
287         }
288     }
289
290     protected final void printCDATAText(String JavaDoc text) throws IOException JavaDoc {
291         int length = text.length();
292         char ch;
293
294         for (int index = 0; index < length; ++index) {
295             ch = text.charAt(index);
296
297             if (ch == ']'
298                 && index + 2 < length
299                 && text.charAt(index + 1) == ']'
300                 && text.charAt(index + 2) == '>') { // check for ']]>'
301
if (fDOMErrorHandler != null){
302                     // REVISIT: this means that if DOM Error handler is not registered we don't report any
303
// fatal errors and might serialize not wellformed document
304
if ((features & DOMSerializerImpl.SPLITCDATA) == 0
305                     && (features & DOMSerializerImpl.WELLFORMED) == 0) {
306                     // issue fatal error
307
String JavaDoc msg =
308                         DOMMessageFormatter.formatMessage(
309                             DOMMessageFormatter.SERIALIZER_DOMAIN,
310                             "EndingCDATA",
311                             null);
312                     modifyDOMError(
313                         msg,
314                         DOMError.SEVERITY_FATAL_ERROR,
315                         null, fCurrentNode);
316                     boolean continueProcess =
317                         fDOMErrorHandler.handleError(fDOMError);
318                     if (!continueProcess) {
319                         throw new IOException JavaDoc();
320                     }
321                 } else {
322                     // issue warning
323
String JavaDoc msg =
324                         DOMMessageFormatter.formatMessage(
325                             DOMMessageFormatter.SERIALIZER_DOMAIN,
326                             "SplittingCDATA",
327                             null);
328                     modifyDOMError(
329                         msg,
330                         DOMError.SEVERITY_WARNING,
331                         null, fCurrentNode);
332                     fDOMErrorHandler.handleError(fDOMError);
333                 }
334                 }
335                 // split CDATA section
336
_printer.printText("]]]]><![CDATA[>");
337                 index += 2;
338                 continue;
339             }
340
341             if (!XML11Char.isXML11Valid(ch)) {
342                 // check if it is surrogate
343
if (++index < length) {
344                     surrogates(ch, text.charAt(index));
345                 } else {
346                     fatalError(
347                         "The character '"
348                             + (char) ch
349                             + "' is an invalid XML character");
350                 }
351                 continue;
352             } else {
353                 if (_encodingInfo.isPrintable((char) ch)
354                     && XML11Char.isXML11ValidLiteral(ch)) {
355                     _printer.printText((char) ch);
356                 } else {
357
358                     // The character is not printable -- split CDATA section
359
_printer.printText("]]>&#x");
360                     _printer.printText(Integer.toHexString(ch));
361                     _printer.printText(";<![CDATA[");
362                 }
363             }
364         }
365     }
366
367
368     // note that this "int" should, in all cases, be a char.
369
// REVISIT: make it a char...
370
protected final void printXMLChar( int ch ) throws IOException JavaDoc {
371         
372         if (ch == '\r' || ch == 0x0085 || ch == 0x2028) {
373             printHex(ch);
374         } else if ( ch == '<') {
375             _printer.printText("&lt;");
376         } else if (ch == '&') {
377             _printer.printText("&amp;");
378         } else if (ch == '>'){
379             // character sequence "]]>" can't appear in content, therefore
380
// we should escape '>'
381
_printer.printText("&gt;");
382         } else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
383             _printer.printText((char)ch);
384         } else {
385              printHex(ch);
386         }
387     }
388
389
390
391     protected final void surrogates(int high, int low) throws IOException JavaDoc{
392         if (XMLChar.isHighSurrogate(high)) {
393             if (!XMLChar.isLowSurrogate(low)) {
394                 //Invalid XML
395
fatalError("The character '"+(char)low+"' is an invalid XML character");
396             }
397             else {
398                 int supplemental = XMLChar.supplemental((char)high, (char)low);
399                 if (!XML11Char.isXML11Valid(supplemental)) {
400                     //Invalid XML
401
fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
402                 }
403                 else {
404                     if (content().inCData ) {
405                         _printer.printText("]]>&#x");
406                         _printer.printText(Integer.toHexString(supplemental));
407                         _printer.printText(";<![CDATA[");
408                     }
409                     else {
410                         printHex(supplemental);
411                     }
412                 }
413             }
414         } else {
415             fatalError("The character '"+(char)high+"' is an invalid XML character");
416         }
417
418     }
419
420
421     protected void printText( String JavaDoc text, boolean preserveSpace, boolean unescaped )
422     throws IOException JavaDoc {
423         int index;
424         char ch;
425         int length = text.length();
426         if ( preserveSpace ) {
427             // Preserving spaces: the text must print exactly as it is,
428
// without breaking when spaces appear in the text and without
429
// consolidating spaces. If a line terminator is used, a line
430
// break will occur.
431
for ( index = 0 ; index < length ; ++index ) {
432                 ch = text.charAt( index );
433                 if (!XML11Char.isXML11Valid(ch)) {
434                     // check if it is surrogate
435
if (++index <length) {
436                         surrogates(ch, text.charAt(index));
437                     } else {
438                         fatalError("The character '"+(char)ch+"' is an invalid XML character");
439                     }
440                     continue;
441                 }
442                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) {
443                     _printer.printText( ch );
444                 } else
445                     printXMLChar( ch );
446             }
447         } else {
448             // Not preserving spaces: print one part at a time, and
449
// use spaces between parts to break them into different
450
// lines. Spaces at beginning of line will be stripped
451
// by printing mechanism. Line terminator is treated
452
// no different than other text part.
453
for ( index = 0 ; index < length ; ++index ) {
454                 ch = text.charAt( index );
455                 if (!XML11Char.isXML11Valid(ch)) {
456                     // check if it is surrogate
457
if (++index <length) {
458                         surrogates(ch, text.charAt(index));
459                     } else {
460                         fatalError("The character '"+(char)ch+"' is an invalid XML character");
461                     }
462                     continue;
463                 }
464
465                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
466                     _printer.printText( ch );
467                 else
468                     printXMLChar( ch);
469             }
470         }
471     }
472
473
474
475     protected void printText( char[] chars, int start, int length,
476                               boolean preserveSpace, boolean unescaped ) throws IOException JavaDoc {
477         int index;
478         char ch;
479
480         if ( preserveSpace ) {
481             // Preserving spaces: the text must print exactly as it is,
482
// without breaking when spaces appear in the text and without
483
// consolidating spaces. If a line terminator is used, a line
484
// break will occur.
485
while ( length-- > 0 ) {
486                 ch = chars[start++];
487                 if (!XML11Char.isXML11Valid(ch)) {
488                     // check if it is surrogate
489
if ( length-- > 0) {
490                         surrogates(ch, chars[start++]);
491                     } else {
492                         fatalError("The character '"+(char)ch+"' is an invalid XML character");
493                     }
494                     continue;
495                 }
496                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
497                     _printer.printText( ch );
498                 else
499                     printXMLChar( ch );
500             }
501         } else {
502             // Not preserving spaces: print one part at a time, and
503
// use spaces between parts to break them into different
504
// lines. Spaces at beginning of line will be stripped
505
// by printing mechanism. Line terminator is treated
506
// no different than other text part.
507
while ( length-- > 0 ) {
508                 ch = chars[start++];
509                 if (!XML11Char.isXML11Valid(ch)) {
510                     // check if it is surrogate
511
if ( length-- > 0) {
512                         surrogates(ch, chars[start++]);
513                     } else {
514                         fatalError("The character '"+(char)ch+"' is an invalid XML character");
515                     }
516                     continue;
517                 }
518               
519                 if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
520                     _printer.printText( ch );
521                 else
522                     printXMLChar( ch );
523             }
524         }
525     }
526
527
528     public boolean reset() {
529         super.reset();
530         return true;
531
532     }
533
534 }
535
536
537
538
539
Popular Tags