KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > nu > xom > Serializer


1 /* Copyright 2002-2005 Elliotte Rusty Harold
2    
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6    
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10    GNU Lesser General Public License for more details.
11    
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307 USA
16    
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@metalab.unc.edu. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */

21
22 package nu.xom;
23
24 import java.io.BufferedWriter JavaDoc;
25 import java.io.IOException JavaDoc;
26 import java.io.OutputStream JavaDoc;
27 import java.io.OutputStreamWriter JavaDoc;
28 import java.io.UnsupportedEncodingException JavaDoc;
29 import java.io.Writer JavaDoc;
30 import java.util.Locale JavaDoc;
31
32 /**
33  * <p>
34  * Outputs a <code>Document</code> object in a specific encoding using
35  * various options for controlling white space, normalization,
36  * indenting, line breaking, and base URIs. However, in general these
37  * options do affect the document's infoset. In particular, if you set
38  * either the maximum line length or the indent size to a positive
39  * value, then the serializer will not respect input white space. It
40  * may trim leading and trailing space, condense runs of white
41  * space to a single space, convert carriage returns and line
42  * feeds to spaces, add extra space where none was present before,
43  * and otherwise muck with the document's white space.
44  * The defaults, however, preserve all significant white space
45  * including ignorable white space and boundary white space.
46  * </p>
47  *
48  * @author Elliotte Rusty Harold
49  * @version 1.0
50  *
51  */

52 public class Serializer {
53
54     private TextWriter escaper;
55     private boolean preserveBaseURI = false;
56
57     
58     /**
59      * <p>
60      * Create a new serializer that uses the UTF-8 encoding.
61      * </p>
62      *
63      * @param out the output stream to write the document on
64      *
65      * @throws NullPointerException if <code>out</code> is null
66      */

67     public Serializer(OutputStream JavaDoc out) {
68         
69         if (out == null) {
70             throw new NullPointerException JavaDoc("Null OutputStream");
71         }
72         try {
73             Writer JavaDoc writer = new OutputStreamWriter JavaDoc(out, "UTF8");
74             writer = new BufferedWriter JavaDoc(writer);
75             escaper = TextWriterFactory.getTextWriter(writer, "UTF-8");
76         }
77         catch (UnsupportedEncodingException JavaDoc ex) {
78             throw new RuntimeException JavaDoc(
79               "The VM is broken. It does not understand UTF-8.");
80         }
81         
82     }
83     
84     
85     /**
86      * <p>
87      * Create a new serializer that uses the specified encoding.
88      * The encoding must be recognized by the Java virtual machine. If
89      * you attempt to use an encoding that the local Java virtual
90      * machine does not support, the constructor will throw an
91      * <code>UnsupportedEncodingException</code>.
92      * Currently the following encodings are recognized by XOM:
93      * </p>
94      *
95      * <ul>
96      * <li>UTF-8</li>
97      * <li>UTF-16</li>
98      * <li>UTF-16BE</li>
99      * <li>UTF-16LE</li>
100      * <li>ISO-10646-UCS-2</li>
101      * <li>ISO-8859-1</li>
102      * <li>ISO-8859-2</li>
103      * <li>ISO-8859-3</li>
104      * <li>ISO-8859-4</li>
105      * <li>ISO-8859-5</li>
106      * <li>ISO-8859-6</li>
107      * <li>ISO-8859-7</li>
108      * <li>ISO-8859-8</li>
109      * <li>ISO-8859-9</li>
110      * <li>ISO-8859-10</li>
111      * <li>ISO-8859-11 (a.k.a. TIS-620)</li>
112      * <li>ISO-8859-13</li>
113      * <li>ISO-8859-14</li>
114      * <li>ISO-8859-15</li>
115      * <li>ISO-8859-16</li>
116      * <li>IBM037 (a.k.a. CP037, EBCDIC-CP-US, EBCDIC-CP-CA,
117      * EBCDIC-CP-WA, EBCDIC-CP-NL, and CSIBM037)</li>
118      * <li>GB18030</li>
119      * </ul>
120      *
121      * <p>
122      * You can use encodings not in this list if the virtual
123      * machine supports them. However, they may be
124      * significantly slower than the encodings in this list.
125      * </p>
126      *
127      * <p>
128      * I've noticed Java has significant bugs in its handling of some
129      * of these encodings. In some cases such as 0x80 in Big5, XOM
130      * will escape a character that should not need to be escaped
131      * because Java can't output that character in the specified
132      * encoding, even though the output character set does contain it.
133      * :-(
134      * </p>
135      *
136      * @param out the output stream to write the document on
137      * @param encoding the character encoding for the serialization
138
139      * @throws NullPointerException if <code>out</code>
140      * or <code>encoding</code> is null
141      * @throws UnsupportedEncodingException if the VM does not
142      * support the requested encoding
143      *
144      */

145     public Serializer(OutputStream JavaDoc out, String JavaDoc encoding)
146       throws UnsupportedEncodingException JavaDoc {
147         
148         if (out == null) {
149             throw new NullPointerException JavaDoc("Null OutputStream");
150         }
151         if (encoding == null) {
152             throw new NullPointerException JavaDoc("Null encoding");
153         }
154         
155         this.setOutputStream(out, encoding);
156         
157     }
158     
159     
160     /**
161      * <p>
162      * Flushes the previous output stream and
163      * redirects further output to the new output stream.
164      * </p>
165      *
166      *
167      * @param out the output stream to write the document on
168
169      * @throws NullPointerException if <code>out</code> is null
170      * @throws IOException if the previous output stream
171      * encounters an I/O error when flushed
172      *
173      */

174     public void setOutputStream(OutputStream JavaDoc out)
175       throws IOException JavaDoc {
176         
177         // flush any data onto the old output stream
178
this.flush();
179         int maxLength = getMaxLength();
180         int indent = this.getIndent();
181         String JavaDoc lineSeparator = getLineSeparator();
182         boolean nfc = getUnicodeNormalizationFormC();
183         String JavaDoc encoding = escaper.getEncoding();
184         setOutputStream(out, encoding);
185         setIndent(indent);
186         setMaxLength(maxLength);
187         setUnicodeNormalizationFormC(nfc);
188         setLineSeparator(lineSeparator);
189         
190     }
191
192     
193     private void setOutputStream(OutputStream JavaDoc out, String JavaDoc encoding)
194         throws UnsupportedEncodingException JavaDoc {
195         
196         Writer JavaDoc writer;
197         String JavaDoc encodingUpperCase = encoding.toUpperCase(Locale.ENGLISH);
198         // Java's Cp037 encoding is broken, so we have to
199
// provide our own.
200
if (encodingUpperCase.equals("IBM037")
201           || encodingUpperCase.equals("CP037")
202           || encodingUpperCase.equals("EBCDIC-CP-US")
203           || encodingUpperCase.equals("EBCDIC-CP-CA")
204           || encodingUpperCase.equals("EBCDIC-CP-WA")
205           || encodingUpperCase.equals("EBCDIC-CP-NL")
206           || encodingUpperCase.equals("CSIBM037")) {
207             writer = new EBCDICWriter(out);
208         }
209         else if (encodingUpperCase.equals("UTF-16")
210           || encodingUpperCase.equals("ISO-10646-UCS-2")) {
211            // For compatibility with Java 1.2 and earlier
212
writer = new OutputStreamWriter JavaDoc(out, "UnicodeBig");
213         }
214         else if (encodingUpperCase.equals("ISO-8859-11")
215           || encodingUpperCase.equals("TIS-620")) {
216            // Java doesn't recognize the name ISO-8859-11 and
217
// Java 1.3 and earlier don't recognize TIS-620
218
writer = new OutputStreamWriter JavaDoc(out, "TIS620");
219         }
220         else writer = new OutputStreamWriter JavaDoc(out, encoding);
221         writer = new BufferedWriter JavaDoc(writer);
222         this.escaper = TextWriterFactory.getTextWriter(writer, encoding);
223         
224     }
225
226     
227     /**
228      * <p>
229      * Serializes a document onto the output
230      * stream using the current options.
231      * </p>
232      *
233      * @param doc the <code>Document</code> to serialize
234      *
235      * @throws IOException if the underlying output stream
236      * encounters an I/O error
237      * @throws NullPointerException if <code>doc</code> is null
238      * @throws UnavailableCharacterException if the document contains
239      * an unescapable character (e.g. in an element name) that is
240      * not available in the current encoding
241      */

242     public void write(Document doc) throws IOException JavaDoc {
243         
244         escaper.reset();
245         // The OutputStreamWriter automatically inserts
246
// the byte order mark if necessary.
247
writeXMLDeclaration();
248         int childCount = doc.getChildCount();
249         for (int i = 0; i < childCount; i++) {
250             writeChild(doc.getChild(i));
251             
252             // Might want to remove this line break in a
253
// non-XML serializer where it's not guaranteed to be
254
// OK to add extra line breaks in the prolog
255
escaper.breakLine();
256         }
257         escaper.flush();
258         
259     }
260
261
262     /**
263      * <p>
264      * Writes the XML declaration onto the output stream,
265      * followed by a line break.
266      * </p>
267      *
268      * @throws IOException if the underlying output stream
269      * encounters an I/O error
270      */

271     protected void writeXMLDeclaration() throws IOException JavaDoc {
272         
273         escaper.writeMarkup("<?xml version=\"1.0\" encoding=\"");
274         escaper.writeMarkup(escaper.getEncoding());
275         escaper.writeMarkup("\"?>");
276         escaper.breakLine();
277         
278     }
279     
280     
281     /**
282      * <p>
283      * Serializes an element onto the output stream using the current
284      * options. The result is guaranteed to be well-formed. If
285      * <code>element</code> does not have a parent element, the output
286      * will also be namespace well-formed.
287      * </p>
288      *
289      * <p>
290      * If the element is empty, this method invokes
291      * <code>writeEmptyElementTag</code>. If the element is not
292      * empty, then:
293      * </p>
294      *
295      * <ol>
296      * <li>It calls <code>writeStartTag</code></li>
297      * <li>It passes each of the element's children to
298      * <code>writeChild</code> in order.</li>
299      * <li>It calls <code>writeEndTag</code></li>
300      * </ol>
301      *
302      * <p>
303      * It may break lines or add white space if the serializer has
304      * been configured to indent or use a maximum line length.
305      * </p>
306      *
307      * @param element the <code>Element</code> to serialize
308      *
309      * @throws IOException if the underlying output stream
310      * encounters an I/O error
311      * @throws UnavailableCharacterException if the element name
312      * contains a character that is not available in the
313      * current encoding
314      */

315     protected void write(Element element) throws IOException JavaDoc {
316
317         if (escaper.isIndenting()
318           && !escaper.isPreserveSpace()
319           && !escaper.justBroke()) {
320             escaper.breakLine();
321         }
322         
323         // workaround for case where only children are empty text nodes
324
boolean hasRealChildren = false;
325         int childCount = element.getChildCount();
326         for (int i = 0; i < childCount; i++) {
327             Node child = element.getChild(i);
328             if (child.isText()) {
329                 Text t = (Text) child;
330                 if (t.isEmpty()) continue;
331             }
332             hasRealChildren = true;
333             break;
334         }
335         
336         if (hasRealChildren) {
337             writeStartTag(element);
338             // adjust for xml:space
339
boolean wasPreservingWhiteSpace = escaper.isPreserveSpace();
340             String JavaDoc newXMLSpaceValue = element.getAttributeValue(
341               "space", "http://www.w3.org/XML/1998/namespace");
342             if (newXMLSpaceValue != null) {
343                 if ("preserve".equals(newXMLSpaceValue)){
344                     escaper.setPreserveSpace(true);
345                 }
346                 else if ("default".equals(newXMLSpaceValue)){
347                     escaper.setPreserveSpace(false);
348                 }
349             }
350             
351             escaper.incrementIndent();
352             // children
353
for (int i = 0; i < childCount; i++) {
354                 writeChild(element.getChild(i));
355             }
356             escaper.decrementIndent();
357             if (escaper.getIndent() > 0 && !escaper.isPreserveSpace()) {
358                 if (hasNonTextChildren(element)) {
359                     escaper.breakLine();
360                 }
361             }
362             writeEndTag(element);
363             
364             // restore parent value
365
if (newXMLSpaceValue != null) {
366                 escaper.setPreserveSpace(wasPreservingWhiteSpace);
367             }
368                         
369         }
370         else {
371             writeEmptyElementTag(element);
372         }
373         escaper.flush();
374         
375     }
376
377     
378     private boolean hasNonTextChildren(Element element) {
379         
380         int childCount = element.getChildCount();
381         for (int i = 0; i < childCount; i++) {
382             if (! element.getChild(i).isText()) return true;
383         }
384         return false;
385         
386     }
387
388
389     // writeEndTag should not normally throw UnavailableCharacterException
390
// because that would already have been thrown for the
391
// corresponding start-tag.
392
/**
393      * <p>
394      * Writes the end-tag for an element in the form
395      * <code>&lt;/<i>name</i>&gt;</code>.
396      * </p>
397      *
398      * @param element the element whose end-tag is written
399      *
400      * @throws IOException if the underlying output stream
401      * encounters an I/O error
402      */

403     protected void writeEndTag(Element element) throws IOException JavaDoc {
404         escaper.writeMarkup("</");
405         escaper.writeMarkup(element.getQualifiedName());
406         escaper.writeMarkup(">");
407     }
408
409     
410     /**
411      *
412      * <p>
413      * Writes the start-tag for the element including
414      * all its namespace declarations and attributes.
415      * </p>
416      *
417      * <p>
418      * The <code>writeAttributes</code> method is called to write
419      * all the non-namespace-declaration attributes.
420      * The <code>writeNamespaceDeclarations</code> method
421      * is called to write all the namespace declaration attributes.
422      * </p>
423      *
424      * @param element the element whose start-tag is written
425      *
426      * @throws IOException if the underlying output stream
427      * encounters an I/O error
428      * @throws UnavailableCharacterException if the name of the element
429      * or the name of any of its attributes contains a character
430      * that is not available in the current encoding
431      */

432     protected void writeStartTag(Element element) throws IOException JavaDoc {
433         writeTagBeginning(element);
434         escaper.writeMarkup('>');
435     }
436
437     
438     /**
439      *
440      * <p>
441      * Writes an empty-element tag for the element
442      * including all its namespace declarations and attributes.
443      * </p>
444      *
445      * <p>
446      * The <code>writeAttributes</code> method is called to write
447      * all the non-namespace-declaration attributes.
448      * The <code>writeNamespaceDeclarations</code> method
449      * is called to write all the namespace declaration attributes.
450      * </p>
451      *
452      * <p>
453      * If subclasses don't wish empty-element tags to be used,
454      * they can override this method to simply invoke
455      * <code>writeStartTag</code> followed by
456      * <code>writeEndTag</code>.
457      * </p>
458      *
459      * @param element the element whose empty-element tag is written
460      *
461      * @throws IOException if the underlying output stream
462      * encounters an I/O error
463      * @throws UnavailableCharacterException if the name of the element or the name of
464      * any of its attributes contains a character that is not
465      * available in the current encoding
466      */

467     protected void writeEmptyElementTag(Element element)
468       throws IOException JavaDoc {
469         writeTagBeginning(element);
470         escaper.writeMarkup("/>");
471     }
472
473     
474     // This just extracts the commonality between writeStartTag
475
// and writeEmptyElementTag
476
private void writeTagBeginning(Element element)
477       throws IOException JavaDoc {
478         escaper.writeMarkup('<');
479         escaper.writeMarkup(element.getQualifiedName());
480         writeAttributes(element);
481         writeNamespaceDeclarations(element);
482     }
483
484
485     /**
486      * <p>
487      * Writes all the attributes of the specified
488      * element onto the output stream, one at a time, separated
489      * by white space. If preserveBaseURI is true, and it is
490      * necessary to add an <code>xml:base</code> attribute
491      * to the element in order to preserve the base URI, then
492      * that attribute is also written here.
493      * Each individual attribute is written by invoking
494      * <code>write(Attribute)</code>.
495      * </p>
496      *
497      * @param element the <code>Element</code> whose attributes are
498      * written
499      * @throws IOException if the underlying output stream
500      * encounters an I/O error
501      * @throws UnavailableCharacterException if the name of any of
502      * the element's attributes contains a character that is not
503      * available in the current encoding
504      */

505     protected void writeAttributes(Element element)
506       throws IOException JavaDoc {
507           
508         // check to see if we need an xml:base attribute
509
if (preserveBaseURI) {
510             ParentNode parent = element.getParent();
511             if (element.getAttribute("base",
512               "http://www.w3.org/XML/1998/namespace") == null) {
513                 String JavaDoc baseValue = element.getBaseURI();
514                 if (parent == null
515                   || parent.isDocument()
516                   || !element.getBaseURI()
517                        .equals(parent.getBaseURI())) {
518                        
519                     escaper.writeMarkup(' ');
520                     Attribute baseAttribute = new Attribute(
521                       "xml:base",
522                       "http://www.w3.org/XML/1998/namespace",
523                       baseValue);
524                     write(baseAttribute);
525                 }
526             }
527         }
528         
529         int attributeCount = element.getAttributeCount();
530         for (int i = 0; i < attributeCount; i++) {
531             Attribute attribute = element.getAttribute(i);
532             escaper.writeMarkup(' ');
533             write(attribute);
534         }
535     }
536
537     
538     /**
539      * <p>
540      * Writes all the namespace declaration
541      * attributes of the specified element onto the output stream,
542      * one at a time, separated by white space. Each individual
543      * declaration is written by invoking
544      * <code>writeNamespaceDeclaration</code>.
545      * </p>
546      *
547      * @param element the <code>Element</code> whose attributes are
548      * written
549      * @throws IOException if the underlying output stream
550      * encounters an I/O error
551      * @throws UnavailableCharacterException if any of the element's namespace prefixes
552      * contains a character that is not available in the current
553      * encoding
554      */

555     protected void writeNamespaceDeclarations(Element element)
556       throws IOException JavaDoc {
557         
558         ParentNode parent = element.getParent();
559         int count = element.getNamespaceDeclarationCount();
560         for (int i = 0; i < count; i++) {
561             String JavaDoc additionalPrefix = element.getNamespacePrefix(i);
562             String JavaDoc uri = element.getNamespaceURI(additionalPrefix);
563             if (parent.isElement()) {
564                Element parentElement = (Element) parent;
565                if (uri.equals(
566                  parentElement.getNamespaceURI(additionalPrefix))) {
567                    continue;
568                }
569             }
570             else if (uri.equals("")) {
571                 continue; // no need to say xmlns=""
572
}
573             
574             escaper.writeMarkup(' ');
575             writeNamespaceDeclaration(additionalPrefix, uri);
576         }
577     }
578
579
580     /**
581      * <p>
582      * Writes a namespace declaration in the form
583      * <code>xmlns:<i>prefix</i>="<i>uri</i>"</code> or
584      * <code>xmlns="<i>uri</i>"</code>. It does not write
585      * the spaces on either side of the namespace declaration.
586      * These are written by <code>writeNamespaceDeclarations</code>.
587      * </p>
588      *
589      * @param prefix the namespace prefix; the empty string for the
590      * default namespace
591      * @param uri the namespace URI
592      *
593      * @throws IOException if the underlying output stream
594      * encounters an I/O error
595      * @throws UnavailableCharacterException if the namespace prefix contains a
596      * character that is not available in the current encoding
597      */

598     protected void writeNamespaceDeclaration(String JavaDoc prefix, String JavaDoc uri)
599       throws IOException JavaDoc {
600         
601         if ("".equals(prefix)) {
602             escaper.writeMarkup("xmlns");
603         }
604         else {
605             escaper.writeMarkup("xmlns:");
606             escaper.writeMarkup(prefix);
607         }
608         escaper.writeMarkup("=\"");
609         escaper.writePCDATA(uri);
610         escaper.writeMarkup('\"');
611         
612     }
613
614     
615     /**
616      * <p>
617      * Writes an attribute in the form
618      * <code><i>name</i>="<i>value</i>"</code>.
619      * Characters in the attribute value are escaped as necessary.
620      * </p>
621      *
622      * @param attribute the <code>Attribute</code> to write
623      *
624      * @throws IOException if the underlying output stream
625      * encounters an I/O error
626      * @throws UnavailableCharacterException if the attribute name contains a character
627      * that is not available in the current encoding
628      *
629      */

630     protected void write(Attribute attribute) throws IOException JavaDoc {
631         escaper.writeMarkup(attribute.getQualifiedName());
632         escaper.writeMarkup("=\"");
633         escaper.writeAttributeValue(attribute.getValue());
634         escaper.writeMarkup('\"');
635     }
636     
637     
638     /**
639      * <p>
640      * Writes a comment onto the output stream using the current
641      * options. Since character and entity references are not resolved
642      * in comments, comments can only be serialized when all
643      * characters they contain are available in the current
644      * encoding.
645      * </p>
646      *
647      * @param comment the <code>Comment</code> to serialize
648      *
649      * @throws IOException if the underlying output stream
650      * encounters an I/O error
651      * @throws UnavailableCharacterException if the comment contains a
652      * character that is not available in the current encoding
653      */

654     protected void write(Comment comment) throws IOException JavaDoc {
655         if (escaper.isIndenting()) escaper.breakLine();
656         escaper.writeMarkup("<!--");
657         escaper.writeMarkup(comment.getValue());
658         escaper.writeMarkup("-->");
659     }
660     
661     
662     /**
663      * <p>
664      * Writes a processing instruction
665      * onto the output stream using the current options.
666      * Since character and entity references are not resolved
667      * in processing instructions, processing instructions
668      * can only be serialized when all
669      * characters they contain are available in the current
670      * encoding.
671      * </p>
672      *
673      * @param instruction the <code>ProcessingInstruction</code>
674      * to serialize
675      *
676      * @throws IOException if the underlying output stream
677      * encounters an I/O error
678      * @throws UnavailableCharacterException if the comment contains a
679      * character that is not available in the current encoding
680      */

681     protected void write(ProcessingInstruction instruction)
682       throws IOException JavaDoc {
683         
684         if (escaper.isIndenting()) escaper.breakLine();
685         escaper.writeMarkup("<?");
686         escaper.writeMarkup(instruction.getTarget());
687         String JavaDoc value = instruction.getValue();
688         // for canonical XML, only output a space after the target
689
// if there is a value
690
if (!"".equals(value)) {
691             escaper.writeMarkup(' ');
692             escaper.writeMarkup(value);
693         }
694         escaper.writeMarkup("?>");
695         
696     }
697     
698     /**
699      * <p>
700      * Writes a <code>Text</code> object
701      * onto the output stream using the current options.
702      * Reserved characters such as &lt;, &gt; and "
703      * are escaped using the standard entity references
704      * such as <code>&amp;lt;</code>, <code>&amp;gt;</code>,
705      * and <code>&amp;quot;</code>.
706      * </p>
707      *
708      * <p>
709      * Characters which cannot be encoded in the current character set
710      * (for example, &Omega; in ISO-8859-1) are encoded using
711      * character references.
712      * </p>
713      *
714      * @param text the <code>Text</code> to serialize
715      *
716      * @throws IOException if the underlying output stream
717      * encounters an I/O error
718      */

719     protected void write(Text text) throws IOException JavaDoc {
720         
721         // XXX Is there a shortcut that takes advantage of the
722
// data being stored in UTF-8 here? perhaps even if only
723
// when serializing to UTF-8?
724
String JavaDoc value = text.getValue();
725         if (text.isCDATASection()
726           && value.indexOf("]]>") == -1) {
727             if (!(escaper instanceof UnicodeWriter)) {
728                 int length = value.length();
729                 for (int i = 0; i < length; i++) {
730                    if (escaper.needsEscaping(value.charAt(i))) {
731                         // can't use CDATA section
732
escaper.writePCDATA(value);
733                         return;
734                    }
735                 }
736             }
737             escaper.writeMarkup("<![CDATA[");
738             escaper.writeMarkup(value);
739             escaper.writeMarkup("]]>");
740         }
741         // is this boundary whitespace we can ignore?
742
else if (isBoundaryWhitespace(text)) {
743             return; // without writing node
744
}
745         else {
746             escaper.writePCDATA(value);
747         }
748         
749     }
750     
751     
752     private boolean isBoundaryWhitespace(Text text) {
753         
754         if (getIndent() <= 0) return false;
755         
756         // XXX check this without getValue
757
if (!"".equals(text.getValue().trim())) return false;
758         ParentNode parent = text.getParent();
759         
760         int position = parent.indexOf(text);
761         
762         if (position == 0 && parent.getChildCount() == 1) return false;
763         Node previous = null;
764         Node next = null;
765         if (position != 0) previous = parent.getChild(position-1);
766         if (position != parent.getChildCount()-1) {
767             next = parent.getChild(position+1);
768         }
769         if (previous == null || !previous.isText()) {
770             if (next == null || !next.isText()) {
771                 return true;
772             }
773         }
774         
775         return false;
776         
777     }
778
779     
780     /**
781      * <p>
782      * Writes a <code>DocType</code> object
783      * onto the output stream using the current options.
784      * </p>
785      *
786      * @param doctype the document type declaration to serialize
787      *
788      * @throws IOException if the underlying
789      * output stream encounters an I/O error
790      * @throws UnavailableCharacterException if the document type
791      * declaration contains a character that is not available
792      * in the current encoding
793      */

794     protected void write(DocType doctype) throws IOException JavaDoc {
795         
796         escaper.writeMarkup("<!DOCTYPE ");
797         escaper.writeMarkup(doctype.getRootElementName());
798         if (doctype.getPublicID() != null) {
799           escaper.writeMarkup(" PUBLIC \"" + doctype.getPublicID()
800            + "\" \"" + doctype.getSystemID() + "\"");
801         }
802         else if (doctype.getSystemID() != null) {
803           escaper.writeMarkup(
804             " SYSTEM \"" + doctype.getSystemID() + "\"");
805         }
806         
807         String JavaDoc internalDTDSubset = doctype.getInternalDTDSubset();
808         if (!internalDTDSubset.equals("")) {
809             escaper.writeMarkup(" [");
810             escaper.breakLine();
811             escaper.setInDocType(true);
812             escaper.writeMarkup(internalDTDSubset);
813             escaper.setInDocType(false);
814             escaper.writeMarkup("]");
815         }
816
817         escaper.writeMarkup(">");
818         
819     }
820
821     
822     /**
823      * <p>
824      * Writes a child node onto the output stream using the
825      * current options. It is invoked when walking the tree to
826      * serialize the entire document. It is not called, and indeed
827      * should not be called, for either the <code>Document</code>
828      * node or for attributes.
829      * </p>
830      *
831      * @param node the <code>Node</code> to serialize
832      *
833      * @throws IOException if the underlying output stream
834      * encounters an I/O error
835      * @throws XMLException if an <code>Attribute</code> or a
836      * <code>Document</code> is passed to this method
837      */

838     protected void writeChild(Node node) throws IOException JavaDoc {
839         
840         if (node.isElement()) {
841             write((Element) node);
842         }
843         else if (node.isText()) {
844             write((Text) node);
845         }
846         else if (node.isComment()) {
847             write((Comment) node);
848         }
849         else if (node.isProcessingInstruction()) {
850             write((ProcessingInstruction) node);
851         }
852         else if (node.isDocType()) {
853             write((DocType) node);
854         }
855         else {
856             throw new XMLException("Cannot write a " +
857               node.getClass().getName() +
858               " from the writeChildNode() method");
859         }
860         
861     }
862  
863     
864     /** <p>
865      * Writes a string onto the underlying output stream.
866      * Non-ASCII characters that are not available in the
867      * current character set are encoded with numeric character
868      * references. The three reserved characters &lt;, &gt;, and &amp;
869      * are escaped using the standard entity references
870      * <code>&amp;lt;</code>, <code>&amp;gt;</code>,
871      * and <code>&amp;amp;</code>.
872      * Double and single quotes are not escaped.
873      * </p>
874      *
875      * @param text the parsed character data to serialize
876      *
877      * @throws IOException if the underlying output stream
878      * encounters an I/O error
879      */

880     protected final void writeEscaped(String JavaDoc text) throws IOException JavaDoc {
881         escaper.writePCDATA(text);
882     }
883  
884     /** <p>
885      * Writes a string onto the underlying output stream.
886      * Non-ASCII characters that are not available in the
887      * current character set are escaped using hexadecimal numeric
888      * character references. Carriage returns, line feeds, and tabs
889      * are also escaped using hexadecimal numeric character
890      * references in order to ensure their preservation on a round
891      * trip. The four reserved characters &lt;, &gt;, &amp;,
892      * and &quot; are escaped using the standard entity references
893      * <code>&amp;lt;</code>, <code>&amp;gt;</code>,
894      * <code>&amp;amp;</code>, and <code>&amp;quot;</code>.
895      * The single quote is not escaped.
896      * </p>
897      *
898      * @param value the attribute value to serialize
899      *
900      * @throws IOException if the underlying output stream
901      * encounters an I/O error
902      */

903     protected final void writeAttributeValue(String JavaDoc value)
904       throws IOException JavaDoc {
905         escaper.writeAttributeValue(value);
906     }
907  
908     
909     /** <p>
910      * Writes a string onto the underlying output stream.
911      * without escaping any characters.
912      * Non-ASCII characters that are not available in the
913      * current character set cause an <code>IOException</code>.
914      * </p>
915      *
916      * @param text the <code>String</code> to serialize
917      *
918      * @throws IOException if the underlying output stream
919      * encounters an I/O error or <code>text</code> contains
920      * characters not available in the current character set
921      */

922     protected final void writeRaw(String JavaDoc text) throws IOException JavaDoc {
923         escaper.writeMarkup(text);
924     }
925  
926     
927     /** <p>
928      * Writes the current line break string
929      * onto the underlying output stream and indents
930      * as specified by the current level and the indent property.
931      * </p>
932      *
933      * @throws IOException if the underlying output stream
934      * encounters an I/O error
935      */

936     protected final void breakLine() throws IOException JavaDoc {
937         escaper.breakLine();
938     }
939     
940     
941     /**
942      * <p>
943      * Flushes the data onto the output stream.
944      * It is not enough to flush the output stream.
945      * You must flush the serializer object itself because it
946      * uses some internal buffering.
947      * The serializer will flush the underlying output stream.
948      * </p>
949      *
950      * @throws IOException if the underlying
951      * output stream encounters an I/O error
952      */

953     public void flush() throws IOException JavaDoc {
954         escaper.flush();
955     }
956
957     
958     /**
959      * <p>
960      * Returns the number of spaces this serializer indents.
961      * </p>
962      *
963      * @return the number of spaces this serializer indents
964      * each successive level beyond the previous one
965      */

966     public int getIndent() {
967         return escaper.getIndent();
968     }
969
970
971     /**
972      * <p>
973      * Sets the number of additional spaces to add to each successive
974      * level in the hierarchy. Use 0 for no extra indenting. The
975      * maximum indentation is in limited to approximately half the
976      * maximum line length. The serializer will not indent further
977      * than that no matter how many levels deep the hierarchy is.
978      * </p>
979      *
980      * <p>
981      * When this variable is set to a value greater than 0,
982      * the serializer does not preserve white space. Spaces,
983      * tabs, carriage returns, and line feeds can all be
984      * interchanged at the serializer's discretion, and additional
985      * white space may be added before and after tags.
986      * Carriage returns, line feeds, and tabs will not be
987      * escaped with numeric character references.
988      * </p>
989      *
990      * <p>
991      * Inside elements with an <code>xml:space="preserve"</code>
992      * attribute, white space is preserved and no indenting
993      * takes place, regardless of the setting of the indent
994      * property, unless, of course, an
995      * <code>xml:space="default"</code> attribute overrides the
996      * <code>xml:space="preserve"</code> attribute.
997      * </p>
998      *
999      * <p>
1000     * The default value for indent is 0; that is, the default is
1001     * not to add or subtract any white space from the source
1002     * document.
1003     * </p>
1004     *
1005     * @param indent the number of spaces to indent
1006     * each successive level of the hierarchy
1007     *
1008     * @throws IllegalArgumentException if indent is less than zero
1009     *
1010     */

1011    public void setIndent(int indent) {
1012        if (indent < 0) {
1013            throw new IllegalArgumentException JavaDoc(
1014              "Indent cannot be negative"
1015            );
1016        }
1017        escaper.setIndent(indent);
1018    }
1019
1020    
1021    /**
1022     * <p>
1023     * Returns the string used as a line separator.
1024     * This is always <code>"\n"</code>, <code>"\r"</code>,
1025     * or <code>"\r\n"</code>.
1026     * </p>
1027     *
1028     * @return the line separator
1029     */

1030    public String JavaDoc getLineSeparator() {
1031        return escaper.getLineSeparator();
1032    }
1033
1034    
1035    /**
1036     * <p>
1037     * Sets the line separator. This can only be one of the
1038     * three strings <code>"\n"</code>, <code>"\r"</code>,
1039     * or <code>"\r\n"</code>. All other values are forbidden.
1040     * If this method is invoked, then
1041     * line separators in the character data will be changed to this
1042     * string. Line separators in attribute values will be changed
1043     * to the hexadecimal numeric character references corresponding
1044     * to this string.
1045     * </p>
1046     *
1047     * <p>
1048     * The default line separator is <code>"\r\n"</code>. However,
1049     * line separators in character data and attribute values are not
1050     * changed to this string, unless this method is called first.
1051     * </p>
1052     *
1053     * @param lineSeparator the line separator to set
1054     *
1055     * @throws IllegalArgumentException if you attempt to use any line
1056     * separator other than <code>"\n"</code>, <code>"\r"</code>,
1057     * or <code>"\r\n"</code>.
1058     *
1059     */

1060    public void setLineSeparator(String JavaDoc lineSeparator) {
1061        escaper.setLineSeparator(lineSeparator);
1062    }
1063
1064    
1065    /**
1066     * <p>
1067     * Returns the preferred maximum line length.
1068     * </p>
1069     *
1070     * @return the preferred maximum line length.
1071     */

1072    public int getMaxLength() {
1073        return escaper.getMaxLength();
1074    }
1075
1076    
1077    /**
1078     * <p>
1079     * Sets the suggested maximum line length for this serializer.
1080     * Setting this to 0 indicates that no automatic wrapping is to be
1081     * performed. When a line approaches this length, the serializer
1082     * begins looking for opportunities to break the line. Generally
1083     * it will break on any ASCII white space character (tab, carriage
1084     * return, linefeed, and space). In some circumstances the
1085     * serializer may not be able to break the line before the maximum
1086     * length is reached. For instance, if an element name is longer
1087     * than the maximum line length the only way to correctly
1088     * serialize it is to exceed the maximum line length. In this case,
1089     * the serializer will exceed the maximum line length.
1090     * </p>
1091     *
1092     * <p>
1093     * The default value for maximum line length is 0, which is
1094     * interpreted as no maximum line length.
1095     * Setting this to a negative value just sets it to 0.
1096     * </p>
1097     *
1098     * <p>
1099     * When this variable is set to a value greater than 0,
1100     * the serializer does not preserve white space. Spaces,
1101     * tabs, carriage returns, and line feeds can all be
1102     * interchanged at the serializer's discretion.
1103     * Carriage returns, line feeds, and tabs will not be
1104     * escaped with numeric character references.
1105     * </p>
1106     *
1107     * <p>
1108     * Inside elements with an <code>xml:space="preserve"</code>
1109     * attribute, the maximum line length is not enforced,
1110     * regardless of the setting of the this property, unless,
1111     * of course, an <code>xml:space="default"</code> attribute
1112     * overrides the <code>xml:space="preserve"</code> attribute.
1113     * </p>
1114     *
1115     * @param maxLength the preferred maximum line length
1116     */

1117    public void setMaxLength(int maxLength) {
1118        escaper.setMaxLength(maxLength);
1119    }
1120
1121    
1122    /**
1123     * <p>
1124     * Returns true if this serializer preserves the original
1125     * base URIs by inserting extra <code>xml:base</code> attributes.
1126     * </p>
1127     *
1128     * @return true if this <code>Serializer</code> inserts
1129     * extra <code>xml:base</code> attributes to attempt to
1130     * preserve base URI information from the document.
1131     */

1132    public boolean getPreserveBaseURI() {
1133        return preserveBaseURI;
1134    }
1135
1136    
1137    /**
1138     * <p>
1139     * Determines whether this serializer inserts
1140     * extra <code>xml:base</code> attributes to attempt to
1141     * preserve base URI information from the document.
1142     * The default is false, do not preserve base URI information.
1143     * <code>xml:base</code> attributes that have been explicitly
1144     * added to an element are always output. This property only
1145     * determines whether or not extra <code>xml:base</code>
1146     * attributes are added.
1147     * </p>
1148     *
1149     * @param preserve true if <code>xml:base</code>
1150     * attributes should be added as necessary
1151     * to preserve base URI information
1152     */

1153    public void setPreserveBaseURI(boolean preserve) {
1154        this.preserveBaseURI = preserve;
1155    }
1156    
1157    
1158    /**
1159     * <p>
1160     * Returns the name of the character encoding used by
1161     * this serializer.
1162     * </p>
1163     *
1164     * @return the encoding used for the output document
1165     */

1166    public String JavaDoc getEncoding() {
1167        return escaper.getEncoding();
1168    }
1169    
1170    /**
1171     * <p>
1172     * If true, this property indicates serialization will
1173     * perform Unicode normalization on all data using normalization
1174     * form C (NFC). Performing Unicode normalization may change the
1175     * document's infoset. The default is false; do not normalize.
1176     * </p>
1177     *
1178     * <p>
1179     * The implementation used is IBM's <a target="_top" HREF=
1180     * "http://oss.software.ibm.com/icu4j/index.html">International
1181     * Components for Unicode <i>for Java</i> (ICU4J) 2.6</a>.
1182     * This version is based on Unicode 4.0.
1183     * </p>
1184     *
1185     * <p>
1186     * This feature has not yet been benchmarked or optimized.
1187     * It may result in substantially slower code.
1188     * </p>
1189     *
1190     * <p>
1191     * If all your data is in the first 256 code points of Unicode
1192     * (i.e. the ISO-8859-1, Latin-1 character set), then it's
1193     * already in normalization form C and normalizing won't change
1194     * anything.
1195     * </p>
1196     *
1197     * @param normalize true if normalization is performed;
1198     * false if it isn't
1199     */

1200    public void setUnicodeNormalizationFormC(boolean normalize) {
1201        escaper.setNFC(normalize);
1202    }
1203
1204    
1205    /**
1206     * <p>
1207     * Indicates whether serialization will
1208     * perform Unicode normalization on all data using normalization
1209     * form C (NFC). The default is false; do not normalize.
1210     * </p>
1211     *
1212     * @return true if this serializer performs Unicode
1213     * normalization; false if it doesn't
1214     */

1215    public boolean getUnicodeNormalizationFormC() {
1216        return escaper.getNFC();
1217    }
1218    
1219    
1220    /**
1221     * <p>
1222     * Returns the current column number of the output stream. This
1223     * method useful for subclasses that implement their own pretty
1224     * printing strategies by inserting white space and line breaks
1225     * at appropriate points.
1226     * </p>
1227     *
1228     * <p>
1229     * Columns are counted based on Unicode characters, not Java
1230     * chars. A surrogate pair counts as one character in this
1231     * context, not two. However, a character followed by a
1232     * combining character (e.g. e followed by combining accent
1233     * acute) counts as two characters. This latter choice
1234     * (treating combining characters like regular characters)
1235     * is under review, and may change in the future if it's not
1236     * too big a performance hit.
1237     * </p>
1238     *
1239     * @return the current column number
1240     */

1241    protected final int getColumnNumber() {
1242        return escaper.getColumnNumber();
1243    }
1244    
1245}
Popular Tags