KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > saxon > event > XMLEmitter


1 package net.sf.saxon.event;
2 import net.sf.saxon.charcode.UnicodeCharacterSet;
3 import net.sf.saxon.om.XMLChar;
4 import net.sf.saxon.om.FastStringBuffer;
5 import net.sf.saxon.trans.DynamicError;
6 import net.sf.saxon.trans.XPathException;
7 import net.sf.saxon.tinytree.CharSlice;
8
9 import javax.xml.transform.OutputKeys JavaDoc;
10 import javax.xml.transform.TransformerException JavaDoc;
11 import javax.xml.transform.TransformerFactory JavaDoc;
12 import javax.xml.transform.Templates JavaDoc;
13 import javax.xml.transform.stream.StreamResult JavaDoc;
14 import javax.xml.transform.stream.StreamSource JavaDoc;
15 import java.util.Stack JavaDoc;
16 import java.io.CharArrayWriter JavaDoc;
17 import java.io.File JavaDoc;
18
19 /**
20   * XMLEmitter is an Emitter that generates XML output
21   * to a specified destination.
22   */

23
24 public class XMLEmitter extends Emitter
25 {
26     protected boolean empty = true;
27     protected boolean openStartTag = false;
28     protected boolean declarationIsWritten = false;
29     protected int elementCode;
30
31     protected boolean preferHex = false;
32     protected boolean undeclareNamespaces = false;
33     private boolean warningIssued = false;
34
35     // The element stack holds the display names (lexical QNames) of elements that
36
// have been started but not finished. It is used to obtain the element name
37
// for the end tag.
38

39     protected Stack JavaDoc elementStack = new Stack JavaDoc();
40
41     // Namecodes in the range 0..2048 are common. So for these codes,
42
// we maintain a direct lookup from the namecode to the display name
43
// that bypasses reference to the namepool
44

45     private String JavaDoc[] nameLookup = new String JavaDoc[2048];
46
47     private boolean indenting = false;
48     private int indentSpaces = 3;
49     private String JavaDoc indentChars = "\n ";
50     private int totalAttributeLength = 0;
51
52
53     static boolean[] specialInText; // lookup table for special characters in text
54
static boolean[] specialInAtt; // lookup table for special characters in attributes
55
// create look-up table for ASCII characters that need special treatment
56

57     static {
58         specialInText = new boolean[128];
59         for (int i=0; i<=15; i++) specialInText[i] = true; // allowed in XML 1.1 as character references
60
for (int i=16; i<=127; i++) specialInText[i] = false;
61         specialInText[(char)0] = true;
62             // used to switch escaping on and off for mapped characters
63
specialInText['\n'] = false;
64         specialInText['\t'] = false;
65         specialInText['\r'] = true;
66         specialInText['<'] = true;
67         specialInText['>'] = true;
68         specialInText['&'] = true;
69
70         specialInAtt = new boolean[128];
71         for (int i=0; i<=15; i++) specialInAtt[i] = true; // allowed in XML 1.1 as character references
72
for (int i=16; i<=127; i++) specialInAtt[i] = false;
73         specialInAtt[(char)0] = true;
74             // used to switch escaping on and off for mapped characters
75
specialInAtt['\r'] = true;
76         specialInAtt['\n'] = true;
77         specialInAtt['\t'] = true;
78         specialInAtt['<'] = true;
79         specialInAtt['>'] = true;
80         specialInAtt['&'] = true;
81         specialInAtt['\"'] = true;
82     }
83
84     /**
85      * Start of the event stream. Nothing is done at this stage: the opening of the output
86      * file is deferred until some content is written to it.
87     */

88
89     public void open() throws XPathException {}
90
91     /**
92      * Start of a document node. Nothing is done at this stage: the opening of the output
93      * file is deferred until some content is written to it.
94     */

95
96     public void startDocument(int properties) throws XPathException {}
97
98     /**
99      * Notify the end of a document node
100      */

101
102     public void endDocument() throws XPathException {
103         if (!elementStack.isEmpty()) {
104             throw new IllegalStateException JavaDoc("Attempt to end document in serializer when elements are unclosed");
105         }
106     }
107
108     /**
109      * Do the real work of starting the document. This happens when the first
110      * content is written.
111      * @throws XPathException
112      */

113
114     protected void openDocument () throws XPathException
115     {
116         if (writer==null) {
117             makeWriter();
118         }
119         if (characterSet==null) {
120             characterSet = UnicodeCharacterSet.getInstance();
121         }
122         writeDeclaration();
123         String JavaDoc rep = outputProperties.getProperty(SaxonOutputKeys.CHARACTER_REPRESENTATION);
124         if (rep!=null) {
125             preferHex = (rep.trim().equalsIgnoreCase("hex"));
126         }
127         rep = outputProperties.getProperty(SaxonOutputKeys.UNDECLARE_PREFIXES);
128         if (rep!=null) {
129             undeclareNamespaces = (rep.trim().equalsIgnoreCase("yes"));
130         }
131     }
132
133     /**
134     * Output the XML declaration
135     */

136
137     public void writeDeclaration() throws XPathException {
138         if (declarationIsWritten) return;
139         declarationIsWritten = true;
140         try {
141             indenting = "yes".equals(outputProperties.getProperty(OutputKeys.INDENT));
142             String JavaDoc s = outputProperties.getProperty(SaxonOutputKeys.INDENT_SPACES);
143             if (s!=null) {
144                 try {
145                     indentSpaces = Integer.parseInt(s.trim());
146                 } catch (NumberFormatException JavaDoc err) {}
147             }
148
149             String JavaDoc byteOrderMark = outputProperties.getProperty(SaxonOutputKeys.BYTE_ORDER_MARK);
150
151             if ("yes".equals(byteOrderMark) &&
152                     "UTF-8".equalsIgnoreCase(outputProperties.getProperty(OutputKeys.ENCODING))) {
153                 // For UTF-16, Java outputs a BOM whether we like it or not
154
writer.write('\uFEFF');
155             }
156
157             String JavaDoc omit = outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION);
158             if (omit==null) {
159                 omit = "no";
160             }
161
162             String JavaDoc version = outputProperties.getProperty(OutputKeys.VERSION);
163             if (version==null) {
164                 version = "1.0";
165             }
166
167             String JavaDoc encoding = outputProperties.getProperty(OutputKeys.ENCODING);
168             if (encoding==null || encoding.equalsIgnoreCase("utf8")) {
169                 encoding = "UTF-8";
170             }
171
172             String JavaDoc standalone = outputProperties.getProperty(OutputKeys.STANDALONE);
173             if ("omit".equals(standalone)) {
174                 standalone = null;
175             }
176
177             if (omit.equals("no")) {
178                 writer.write("<?xml version=\"" + version + "\" " + "encoding=\"" + encoding + '\"' +
179                         (standalone != null ? " standalone=\"" + standalone + '\"' : "") + "?>");
180                     // no longer write a newline character: it's wrong if the output is an
181
// external general parsed entity
182
}
183         } catch (java.io.IOException JavaDoc err) {
184             throw new DynamicError(err);
185         }
186     }
187
188     /**
189     * Output the document type declaration
190     */

191
192     protected void writeDocType(String JavaDoc type, String JavaDoc systemId, String JavaDoc publicId) throws XPathException {
193         try {
194             if (declarationIsWritten && !indenting) {
195                 // don't add a newline if indenting, because the indenter will already have done so
196
writer.write("\n");
197             }
198             writer.write("<!DOCTYPE " + type + '\n');
199             if (systemId!=null && publicId==null) {
200                 writer.write(" SYSTEM \"" + systemId + "\">\n");
201             } else if (systemId==null && publicId!=null) { // handles the HTML case
202
writer.write(" PUBLIC \"" + publicId + "\">\n");
203             } else {
204                 writer.write(" PUBLIC \"" + publicId + "\" \"" + systemId + "\">\n");
205             }
206         } catch (java.io.IOException JavaDoc err) {
207             throw new DynamicError(err);
208         }
209     }
210
211     /**
212     * End of the document. Close the output stream.
213     */

214
215     public void close() throws XPathException
216     {
217         try {
218             if (writer != null) {
219                 writer.flush();
220             }
221         } catch (java.io.IOException JavaDoc err) {
222             throw new DynamicError(err);
223         }
224     }
225
226     /**
227     * Start of an element. Output the start tag, escaping special characters.
228     */

229
230     public void startElement (int nameCode, int typeCode, int locationId, int properties) throws XPathException
231     {
232         if (empty) {
233             openDocument();
234         }
235         String JavaDoc displayName = null;
236
237         // See if we've seen this name before
238
if (nameCode < 2048) {
239             displayName = nameLookup[nameCode];
240         }
241
242         // Otherwise, look it up in the namepool and check that it's encodable
243
if (displayName == null) {
244             displayName = namePool.getDisplayName(nameCode);
245             if (nameCode < 2048) {
246                 nameLookup[nameCode] = displayName;
247             }
248             int badchar = testCharacters(displayName);
249             if (badchar!=0) {
250                 DynamicError err = new DynamicError("Element name contains a character (decimal + " +
251                                                 badchar + ") not available in the selected encoding");
252                 err.setErrorCode("SERE0008");
253                 throw err;
254             }
255         }
256
257         elementStack.push(displayName);
258         elementCode = nameCode;
259
260         try {
261             if (empty) {
262                 String JavaDoc systemId = outputProperties.getProperty(OutputKeys.DOCTYPE_SYSTEM);
263                 String JavaDoc publicId = outputProperties.getProperty(OutputKeys.DOCTYPE_PUBLIC);
264                 if (systemId!=null) {
265                     writeDocType(displayName, systemId, publicId);
266                 }
267                 empty = false;
268             }
269             if (openStartTag) {
270                 closeStartTag();
271             }
272             writer.write('<');
273             writer.write(displayName);
274             openStartTag = true;
275             totalAttributeLength = 0;
276
277         } catch (java.io.IOException JavaDoc err) {
278             throw new DynamicError(err);
279         }
280     }
281
282     public void namespace(int namespaceCode, int properties) throws XPathException {
283         try {
284             String JavaDoc nsprefix = namePool.getPrefixFromNamespaceCode(namespaceCode);
285             String JavaDoc nsuri = namePool.getURIFromNamespaceCode(namespaceCode);
286
287             int len = nsuri.length() + nsprefix.length() + 8;
288             String JavaDoc sep = " ";
289             if (indenting && (totalAttributeLength + len) > 80 && totalAttributeLength != 0) {
290                 sep = getAttributeIndentString();
291             }
292             totalAttributeLength += len;
293
294             if (nsprefix.equals("")) {
295                 writer.write(sep);
296                 writeAttribute(elementCode, "xmlns", nsuri, 0);
297             } else if (nsprefix.equals("xml")) {
298                 return;
299             } else {
300                 int badchar = testCharacters(nsprefix);
301                 if (badchar!=0) {
302                     DynamicError err = new DynamicError("Namespace prefix contains a character (decimal + " +
303                                                     badchar + ") not available in the selected encoding");
304                     err.setErrorCode("SERE0008");
305                     throw err;
306                 }
307                 if (undeclareNamespaces || !nsuri.equals("")) {
308                     writer.write(sep);
309                     writeAttribute(elementCode, "xmlns:" + nsprefix, nsuri, 0);
310                 }
311             }
312         } catch (java.io.IOException JavaDoc err) {
313             throw new DynamicError(err);
314         }
315     }
316
317     public void attribute(int nameCode, int typeCode, CharSequence JavaDoc value, int locationId, int properties)
318     throws XPathException {
319         String JavaDoc displayName = null;
320
321         // See if we've seen this name before
322
if (nameCode < 2048) {
323             displayName = nameLookup[nameCode];
324         }
325
326         // Otherwise, look it up in the namepool and check that it's encodable
327
if (displayName == null) {
328             displayName = namePool.getDisplayName(nameCode);
329             if (nameCode < 2048) {
330                 nameLookup[nameCode] = displayName;
331             }
332             int badchar = testCharacters(displayName);
333             if (badchar!=0) {
334                 DynamicError err = new DynamicError("Attribute name contains a character (decimal + " +
335                                                 badchar + ") not available in the selected encoding");
336                 err.setErrorCode("SERE0008");
337                 throw err;
338             }
339         }
340
341         int len = displayName.length() + value.length() + 4;
342         String JavaDoc sep = " ";
343         if (indenting && (totalAttributeLength + len) > 80 && totalAttributeLength != 0) {
344             sep = getAttributeIndentString();
345         }
346         totalAttributeLength += len;
347
348         try {
349             writer.write(sep);
350             writeAttribute(
351                 elementCode,
352                 displayName,
353                 value,
354                 properties );
355
356         } catch (java.io.IOException JavaDoc err) {
357             throw new DynamicError(err);
358         }
359     }
360
361     private String JavaDoc getAttributeIndentString() {
362         int indent = (elementStack.size()-1) * indentSpaces + ((String JavaDoc)elementStack.peek()).length() + 3;
363         while (indent >= indentChars.length()) {
364             indentChars += " ";
365         }
366         return indentChars.substring(0, indent);
367     }
368
369     public void startContent() throws XPathException {
370         // don't add ">" to the start tag until we know whether the element has content
371
}
372
373     public void closeStartTag() throws XPathException {
374         try {
375             if (openStartTag) {
376                 writer.write('>');
377                 openStartTag = false;
378             }
379         } catch (java.io.IOException JavaDoc err) {
380             throw new DynamicError(err);
381         }
382     }
383
384     /**
385     * Close an empty element tag. (This is overridden in XHTMLEmitter).
386     */

387
388     protected String JavaDoc emptyElementTagCloser(String JavaDoc displayName, int nameCode) {
389         return "/>";
390     }
391
392     /**
393     * Write attribute name=value pair.
394      * @param elCode The element name is not used in this version of the
395     * method, but is used in the HTML subclass.
396      * @param attname The attribute name, which has already been validated to ensure
397      * it can be written in this encoding
398      * @param value The value of the attribute
399      * @param properties Any special properties of the attribute
400     */

401
402     protected void writeAttribute(int elCode, String JavaDoc attname, CharSequence JavaDoc value, int properties) throws XPathException {
403         try {
404             String JavaDoc val = value.toString();
405             writer.write(attname);
406             if ((properties & ReceiverOptions.NO_SPECIAL_CHARS) != 0) {
407                 writer.write('=');
408                 writer.write('"');
409                 writer.write(val);
410                 writer.write('"');
411             } else if ((properties & ReceiverOptions.USE_NULL_MARKERS) != 0) {
412                 // null (0) characters will be used before and after any section of
413
// the value where escaping is to be disabled
414
writer.write('=');
415                 char delimiter = (val.indexOf('"') >= 0 ? '\'' : '"');
416                 writer.write(delimiter);
417                 writeEscape(value, true);
418                 writer.write(delimiter);
419             } else {
420                 writer.write("=\"");
421                 writeEscape(value, true);
422                 writer.write('\"');
423             }
424         } catch (java.io.IOException JavaDoc err) {
425             throw new DynamicError(err);
426         }
427     }
428
429
430     /**
431     * Test that all characters in a name are supported in the target encoding.
432      * @return zero if all the characters are available, or the value of the
433      * first offending character if not
434     */

435
436     protected int testCharacters(CharSequence JavaDoc chars) throws XPathException {
437         for (int i=0; i<chars.length(); i++) {
438             char c = chars.charAt(i);
439             if (c > 127) {
440                 if (XMLChar.isHighSurrogate(c)) {
441                     int cc = XMLChar.supplemental(c, chars.charAt(++i));
442                     if (!characterSet.inCharset(cc)) {
443                         return cc;
444                     }
445                 } else if (!characterSet.inCharset(c)) {
446                     return c;
447                 }
448             }
449         }
450         return 0;
451     }
452
453     /**
454     * End of an element.
455     */

456
457     public void endElement () throws XPathException
458     {
459         String JavaDoc displayName = (String JavaDoc)elementStack.pop();
460         try {
461             if (openStartTag) {
462                 writer.write(emptyElementTagCloser(displayName, elementCode));
463                 openStartTag = false;
464             } else {
465                 writer.write("</");
466                 writer.write(displayName);
467                 writer.write('>');
468             }
469         } catch (java.io.IOException JavaDoc err) {
470             throw new DynamicError(err);
471         }
472     }
473
474     /**
475     * Character data.
476     */

477
478     public void characters (CharSequence JavaDoc chars, int locationId, int properties) throws XPathException
479     {
480         if (empty) {
481             openDocument();
482         }
483         try {
484             if (openStartTag) {
485                 closeStartTag();
486             }
487
488             if ((properties & ReceiverOptions.NO_SPECIAL_CHARS) != 0) {
489                 writeCharSequence(chars);
490             } else if ((properties & ReceiverOptions.DISABLE_ESCAPING) == 0) {
491                 writeEscape(chars, false);
492             } else {
493                 // disable-output-escaping="yes"
494
if (testCharacters(chars) == 0) {
495                     writeCharSequence(chars);
496                 } else {
497                     // Recoverable error: using disable output escaping with characters
498
// that are not available in the target encoding
499
if (!warningIssued) {
500                         try {
501                             getPipelineConfiguration().getErrorListener().warning(
502                                 new TransformerException JavaDoc("disable-output-escaping is ignored for characters " +
503                                                          "not available in the chosen encoding"));
504                         } catch (TransformerException JavaDoc e) {
505                             throw DynamicError.makeDynamicError(e);
506                         }
507                         warningIssued = true;
508                     }
509                     writeEscape(chars, false);
510                 }
511             }
512         } catch (java.io.IOException JavaDoc err) {
513             throw new DynamicError(err);
514         }
515     }
516
517     /**
518      * Write a CharSequence: various implementations
519      */

520
521     public void writeCharSequence(CharSequence JavaDoc s) throws java.io.IOException JavaDoc {
522         if (s instanceof String JavaDoc) {
523             writer.write((String JavaDoc)s);
524         } else if (s instanceof CharSlice) {
525             ((CharSlice)s).write(writer);
526         } else if (s instanceof FastStringBuffer) {
527             ((FastStringBuffer)s).write(writer);
528         } else {
529             writer.write(s.toString());
530         }
531     }
532
533
534     /**
535     * Handle a processing instruction.
536     */

537
538     public void processingInstruction (String JavaDoc target, CharSequence JavaDoc data, int locationId, int properties)
539         throws XPathException {
540         if (empty) {
541             openDocument();
542         }
543         try {
544             if (openStartTag) {
545                 closeStartTag();
546             }
547             writer.write("<?" + target + (data.length()>0 ? ' ' + data.toString() : "") + "?>");
548         } catch (java.io.IOException JavaDoc err) {
549             throw new DynamicError(err);
550         }
551     }
552
553     /**
554     * Write contents of array to current writer, after escaping special characters.
555     * This method converts the XML special characters (such as < and &) into their
556     * predefined entities.
557     * @param chars The character sequence containing the string
558     * @param inAttribute Set to true if the text is in an attribute value
559     */

560
561     protected void writeEscape(final CharSequence JavaDoc chars, final boolean inAttribute)
562     throws java.io.IOException JavaDoc, XPathException {
563         int segstart = 0;
564         boolean disabled = false;
565         final boolean[] specialChars = (inAttribute ? specialInAtt : specialInText);
566
567         while (segstart < chars.length()) {
568             int i = segstart;
569
570             // find a maximal sequence of "ordinary" characters
571
while (i < chars.length()) {
572                 final char c = chars.charAt(i);
573                 if (c < 127) {
574                     if (specialChars[c]) {
575                         break;
576                     } else {
577                         i++;
578                     }
579                 } else if (c < 160) {
580                     break;
581                 } else if (XMLChar.isHighSurrogate(c)) {
582                     break;
583                 } else if (!characterSet.inCharset(c)) {
584                     break;
585                 } else {
586                     i++;
587                 }
588             }
589
590             // if this was the whole string write it out and exit
591
if (i >= chars.length()) {
592                 if (segstart == 0) {
593                     writeCharSequence(chars);
594                 } else {
595                     writeCharSequence(chars.subSequence(segstart, i));
596                 }
597                 return;
598             }
599
600             // otherwise write out this sequence
601
if (i > segstart) {
602                 writeCharSequence(chars.subSequence(segstart, i));
603             }
604
605             // examine the special character that interrupted the scan
606
final char c = chars.charAt(i);
607             if (c==0) {
608                 // used to switch escaping on and off
609
disabled = !disabled;
610             } else if (disabled) {
611                 writer.write(c);
612             } else if (c>=127 && c<160) {
613                 // XML 1.1 requires these characters to be written as character references
614
outputCharacterReference(c);
615             } else if (c>=160) {
616                 if (XMLChar.isHighSurrogate(c)) {
617                     char d = chars.charAt(++i);
618                     int charval = XMLChar.supplemental(c, d);
619                     if (characterSet.inCharset(charval)) {
620                         writer.write(c);
621                         writer.write(d);
622                     } else {
623                         outputCharacterReference(charval);
624                     }
625                 } else {
626                     // process characters not available in the current encoding
627
outputCharacterReference(c);
628                 }
629
630             } else {
631
632                 // process special ASCII characters
633

634                 if (c=='<') {
635                     writer.write("&lt;");
636                 } else if (c=='>') {
637                     writer.write("&gt;");
638                 } else if (c=='&') {
639                     writer.write("&amp;");
640                 } else if (c=='\"') {
641                     writer.write("&#34;");
642                 } else if (c=='\n') {
643                     writer.write("&#xA;");
644                 } else if (c=='\r') {
645                     writer.write("&#xD;");
646                 } else if (c=='\t') {
647                     writer.write("&#x9;");
648                 }
649             }
650             segstart = ++i;
651         }
652     }
653
654     /**
655     * Output a decimal or hexadecimal character reference
656     */

657
658     private char[] charref = new char[10];
659     protected void outputCharacterReference(int charval) throws java.io.IOException JavaDoc {
660         if (preferHex) {
661             int o = 0;
662             charref[o++]='&';
663             charref[o++]='#';
664             charref[o++]='x';
665             String JavaDoc code = Integer.toHexString(charval);
666             int len = code.length();
667             for (int k=0; k<len; k++) {
668                 charref[o++]=code.charAt(k);
669             }
670             charref[o++]=';';
671             writer.write(charref, 0, o);
672         } else {
673             int o = 0;
674             charref[o++]='&';
675             charref[o++]='#';
676             String JavaDoc code = Integer.toString(charval);
677             int len = code.length();
678             for (int k=0; k<len; k++) {
679                 charref[o++]=code.charAt(k);
680             }
681             charref[o++]=';';
682             writer.write(charref, 0, o);
683         }
684     }
685
686     /**
687     * Handle a comment.
688     */

689
690     public void comment (CharSequence JavaDoc chars, int locationId, int properties) throws XPathException
691     {
692         if (empty) {
693             openDocument();
694         }
695         try {
696             if (openStartTag) {
697                 closeStartTag();
698             }
699             writer.write("<!--");
700             writer.write(chars.toString());
701             writer.write("-->");
702         } catch (java.io.IOException JavaDoc err) {
703             throw new DynamicError(err);
704         }
705     }
706
707     public static void main(String JavaDoc[] params) throws Exception JavaDoc {
708         StreamResult JavaDoc iStreamResult = new StreamResult JavaDoc(new CharArrayWriter JavaDoc());
709         XMLEmitter iResult = new XMLEmitter();
710         iResult.setStreamResult(iStreamResult);
711
712         StreamSource JavaDoc iSource = new StreamSource JavaDoc(new File JavaDoc("c:\\temp\\test.xml"));
713
714         System.setProperty("javax.xml.transform.TransformerFactory",
715         "net.sf.saxon.TransformerFactoryImpl");
716         TransformerFactory JavaDoc iTfactory = TransformerFactory.newInstance();
717         Templates JavaDoc iTemplates = iTfactory.newTemplates(
718                 new StreamSource JavaDoc(new File JavaDoc("c:\\temp\\test.xsl")));
719         iTemplates.newTransformer().transform(iSource, iResult);
720
721     }
722
723
724 }
725
726 //
727
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
728
// you may not use this file except in compliance with the License. You may obtain a copy of the
729
// License at http://www.mozilla.org/MPL/
730
//
731
// Software distributed under the License is distributed on an "AS IS" basis,
732
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
733
// See the License for the specific language governing rights and limitations under the License.
734
//
735
// The Original Code is: all this file.
736
//
737
// The Initial Developer of the Original Code is Michael H. Kay.
738
//
739
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
740
//
741
// Contributor(s): none.
742
//
743
Popular Tags