XMLWriter


1   package org.jivesoftware.util;
2   
3   import org.dom4j.*;
4   import org.dom4j.io.OutputFormat;
5   import org.dom4j.tree.NamespaceStack;
6   import org.xml.sax.*;
7   import org.xml.sax.ext.LexicalHandler  ;
8   import org.xml.sax.helpers.XMLFilterImpl  ;
9   
10  import java.io.*;
11  import java.util.*;
12  
13  /**
14   * Replacement class of the original XMLWriter.java (version: 1.77) since the original is still
15   * using StringBuffer which is not fast. 
16   */
17  public class XMLWriter extends XMLFilterImpl   implements LexicalHandler   {
18  
19      private static final String   PAD_TEXT = " ";
20  
21      protected static final String  [] LEXICAL_HANDLER_NAMES = {
22          "http://xml.org/sax/properties/lexical-handler",
23          "http://xml.org/sax/handlers/LexicalHandler"
24      };
25  
26      protected static final OutputFormat DEFAULT_FORMAT = new OutputFormat();
27  
28      /** Should entityRefs by resolved when writing ? */
29      private boolean resolveEntityRefs = true;
30  
31      /** Stores the last type of node written so algorithms can refer to the
32        * previous node type */
33      protected int lastOutputNodeType;
34  
35      /** Stores the xml:space attribute value of preserve for whitespace flag */
36      protected boolean preserve=false;
37  
38      /** The Writer used to output to */
39      protected Writer writer;
40  
41      /** The Stack of namespaceStack written so far */
42      private NamespaceStack namespaceStack = new NamespaceStack();
43  
44      /** The format used by this writer */
45      private OutputFormat format;
46  
47      /** whether we should escape text */
48      private boolean escapeText = true;
49      /** The initial number of indentations (so you can print a whole
50          document indented, if you like) **/
51      private int indentLevel = 0;
52  
53      /** buffer used when escaping strings */
54      private StringBuilder   buffer = new StringBuilder  ();
55  
56      /** whether we have added characters before from the same chunk of characters */
57      private boolean charactersAdded = false;
58      private char lastChar;
59  
60      /** Whether a flush should occur after writing a document */
61      private boolean autoFlush;
62  
63      /** Lexical handler we should delegate to */
64      private LexicalHandler   lexicalHandler;
65  
66      /** Whether comments should appear inside DTD declarations - defaults to false */
67      private boolean showCommentsInDTDs;
68  
69      /** Is the writer curerntly inside a DTD definition? */
70      private boolean inDTD;
71  
72      /** The namespaces used for the current element when consuming SAX events */
73      private Map namespacesMap;
74  
75      /**
76       * what is the maximum allowed character code
77       * such as 127 in US-ASCII (7 bit) or 255 in ISO-* (8 bit)
78       * or -1 to not escape any characters (other than the special XML characters like < > &)
79       */
80      private int maximumAllowedCharacter;
81  
82      public XMLWriter(Writer writer) {
83          this( writer, DEFAULT_FORMAT );
84      }
85  
86      public XMLWriter(Writer writer, OutputFormat format) {
87          this.writer = writer;
88          this.format = format;
89          namespaceStack.push(Namespace.NO_NAMESPACE);
90      }
91  
92      public XMLWriter() {
93          this.format = DEFAULT_FORMAT;
94          this.writer = new BufferedWriter( new OutputStreamWriter( System.out ) );
95          this.autoFlush = true;
96          namespaceStack.push(Namespace.NO_NAMESPACE);
97      }
98  
99      public XMLWriter(OutputStream out) throws UnsupportedEncodingException {
100         this.format = DEFAULT_FORMAT;
101         this.writer = createWriter(out, format.getEncoding());
102         this.autoFlush = true;
103         namespaceStack.push(Namespace.NO_NAMESPACE);
104     }
105 
106     public XMLWriter(OutputStream out, OutputFormat format) throws UnsupportedEncodingException {
107         this.format = format;
108         this.writer = createWriter(out, format.getEncoding());
109         this.autoFlush = true;
110         namespaceStack.push(Namespace.NO_NAMESPACE);
111     }
112 
113     public XMLWriter(OutputFormat format) throws UnsupportedEncodingException {
114         this.format = format;
115         this.writer = createWriter( System.out, format.getEncoding() );
116         this.autoFlush = true;
117         namespaceStack.push(Namespace.NO_NAMESPACE);
118     }
119 
120     public void setWriter(Writer writer) {
121         this.writer = writer;
122         this.autoFlush = false;
123     }
124 
125     public void setOutputStream(OutputStream out) throws UnsupportedEncodingException {
126         this.writer = createWriter(out, format.getEncoding());
127         this.autoFlush = true;
128     }
129 
130     /**
131      * @return true if text thats output should be escaped.
132      * This is enabled by default. It could be disabled if
133      * the output format is textual, like in XSLT where we can have
134      * xml, html or text output.
135      */
136     public boolean isEscapeText() {
137         return escapeText;
138     }
139 
140     /**
141      * Sets whether text output should be escaped or not.
142      * This is enabled by default. It could be disabled if
143      * the output format is textual, like in XSLT where we can have
144      * xml, html or text output.
145      */
146     public void setEscapeText(boolean escapeText) {
147         this.escapeText = escapeText;
148     }
149 
150 
151     /** Set the initial indentation level.  This can be used to output
152       * a document (or, more likely, an element) starting at a given
153       * indent level, so it's not always flush against the left margin.
154       * Default: 0
155       *
156       * @param indentLevel the number of indents to start with
157       */
158     public void setIndentLevel(int indentLevel) {
159         this.indentLevel = indentLevel;
160     }
161 
162     /**
163      * Returns the maximum allowed character code that should be allowed
164      * unescaped which defaults to 127 in US-ASCII (7 bit) or
165      * 255 in ISO-* (8 bit).
166      */
167     public int getMaximumAllowedCharacter() {
168         if (maximumAllowedCharacter == 0) {
169             maximumAllowedCharacter = defaultMaximumAllowedCharacter();
170         }
171         return maximumAllowedCharacter;
172     }
173 
174     /**
175      * Sets the maximum allowed character code that should be allowed
176      * unescaped
177      * such as 127 in US-ASCII (7 bit) or 255 in ISO-* (8 bit)
178      * or -1 to not escape any characters (other than the special XML characters like < > &)
179      *
180      * If this is not explicitly set then it is defaulted from the encoding.
181      *
182      * @param maximumAllowedCharacter The maximumAllowedCharacter to set
183      */
184     public void setMaximumAllowedCharacter(int maximumAllowedCharacter) {
185         this.maximumAllowedCharacter = maximumAllowedCharacter;
186     }
187 
188     /** Flushes the underlying Writer */
189     public void flush() throws IOException {
190         writer.flush();
191     }
192 
193     /** Closes the underlying Writer */
194     public void close() throws IOException {
195         writer.close();
196     }
197 
198     /** Writes the new line text to the underlying Writer */
199     public void println() throws IOException {
200         writer.write( format.getLineSeparator() );
201     }
202 
203     /** Writes the given {@link org.dom4j.Attribute}.
204       *
205       * @param attribute <code>Attribute</code> to output.
206       */
207     public void write(Attribute attribute) throws IOException {
208         writeAttribute(attribute);
209 
210         if ( autoFlush ) {
211             flush();
212         }
213     }
214 
215 
216     /** <p>This will print the <code>Document</code> to the current Writer.</p>
217      *
218      * <p> Warning: using your own Writer may cause the writer's
219      * preferred character encoding to be ignored.  If you use
220      * encodings other than UTF8, we recommend using the method that
221      * takes an OutputStream instead.  </p>
222      *
223      * <p>Note: as with all Writers, you may need to flush() yours
224      * after this method returns.</p>
225      *
226      * @param doc <code>Document</code> to format.
227      * @throws IOException - if there's any problem writing.
228      **/
229     public void write(Document doc) throws IOException {
230         writeDeclaration();
231 
232         if (doc.getDocType() != null) {
233             indent();
234             writeDocType(doc.getDocType());
235         }
236 
237         for ( int i = 0, size = doc.nodeCount(); i < size; i++ ) {
238             Node node = doc.node(i);
239             writeNode( node );
240         }
241         writePrintln();
242 
243         if ( autoFlush ) {
244             flush();
245         }
246     }
247 
248     /** <p>Writes the <code>{@link org.dom4j.Element}</code>, including
249       * its <code>{@link Attribute}</code>s, and its value, and all
250       * its content (child nodes) to the current Writer.</p>
251       *
252       * @param element <code>Element</code> to output.
253       */
254     public void write(Element element) throws IOException {
255         writeElement(element);
256 
257         if ( autoFlush ) {
258             flush();
259         }
260     }
261 
262 
263     /** Writes the given {@link CDATA}.
264       *
265       * @param cdata <code>CDATA</code> to output.
266       */
267     public void write(CDATA cdata) throws IOException {
268         writeCDATA( cdata.getText() );
269 
270         if ( autoFlush ) {
271             flush();
272         }
273     }
274 
275     /** Writes the given {@link Comment}.
276       *
277       * @param comment <code>Comment</code> to output.
278       */
279     public void write(Comment comment) throws IOException {
280         writeComment( comment.getText() );
281 
282         if ( autoFlush ) {
283             flush();
284         }
285     }
286 
287     /** Writes the given {@link DocumentType}.
288       *
289       * @param docType <code>DocumentType</code> to output.
290       */
291     public void write(DocumentType docType) throws IOException {
292         writeDocType(docType);
293 
294         if ( autoFlush ) {
295             flush();
296         }
297     }
298 
299 
300     /** Writes the given {@link Entity}.
301       *
302       * @param entity <code>Entity</code> to output.
303       */
304     public void write(Entity entity) throws IOException {
305         writeEntity( entity );
306 
307         if ( autoFlush ) {
308             flush();
309         }
310     }
311 
312 
313     /** Writes the given {@link Namespace}.
314       *
315       * @param namespace <code>Namespace</code> to output.
316       */
317     public void write(Namespace namespace) throws IOException {
318         writeNamespace(namespace);
319 
320         if ( autoFlush ) {
321             flush();
322         }
323     }
324 
325     /** Writes the given {@link ProcessingInstruction}.
326       *
327       * @param processingInstruction <code>ProcessingInstruction</code> to output.
328       */
329     public void write(ProcessingInstruction processingInstruction) throws IOException {
330         writeProcessingInstruction(processingInstruction);
331 
332         if ( autoFlush ) {
333             flush();
334         }
335     }
336 
337     /** <p>Print out a {@link String}, Perfoms
338       * the necessary entity escaping and whitespace stripping.</p>
339       *
340       * @param text is the text to output
341       */
342     public void write(String   text) throws IOException {
343         writeString(text);
344 
345         if ( autoFlush ) {
346             flush();
347         }
348     }
349 
350     /** Writes the given {@link Text}.
351       *
352       * @param text <code>Text</code> to output.
353       */
354     public void write(Text text) throws IOException {
355         writeString(text.getText());
356 
357         if ( autoFlush ) {
358             flush();
359         }
360     }
361 
362     /** Writes the given {@link Node}.
363       *
364       * @param node <code>Node</code> to output.
365       */
366     public void write(Node node) throws IOException {
367         writeNode(node);
368 
369         if ( autoFlush ) {
370             flush();
371         }
372     }
373 
374     /** Writes the given object which should be a String, a Node or a List
375       * of Nodes.
376       *
377       * @param object is the object to output.
378       */
379     public void write(Object   object) throws IOException {
380         if (object instanceof Node) {
381             write((Node) object);
382         }
383         else if (object instanceof String  ) {
384             write((String  ) object);
385         }
386         else if (object instanceof List) {
387             List list = (List) object;
388             for ( int i = 0, size = list.size(); i < size; i++ ) {
389                 write( list.get(i) );
390             }
391         }
392         else if (object != null) {
393             throw new IOException( "Invalid object: " + object );
394         }
395     }
396 
397 
398     /** <p>Writes the opening tag of an {@link Element},
399       * including its {@link Attribute}s
400       * but without its content.</p>
401       *
402       * @param element <code>Element</code> to output.
403       */
404     public void writeOpen(Element element) throws IOException {
405         writer.write("<");
406         writer.write( element.getQualifiedName() );
407         writeAttributes(element);
408         writer.write(">");
409     }
410 
411     /** <p>Writes the closing tag of an {@link Element}</p>
412       *
413       * @param element <code>Element</code> to output.
414       */
415     public void writeClose(Element element) throws IOException {
416         writeClose( element.getQualifiedName() );
417     }
418 
419 
420     // XMLFilterImpl methods
421     //-------------------------------------------------------------------------
422     public void parse(InputSource source) throws IOException, SAXException {
423         installLexicalHandler();
424         super.parse(source);
425     }
426 
427 
428     public void setProperty(String   name, Object   value) throws SAXNotRecognizedException, SAXNotSupportedException {
429         for (int i = 0; i < LEXICAL_HANDLER_NAMES.length; i++) {
430             if (LEXICAL_HANDLER_NAMES[i].equals(name)) {
431                 setLexicalHandler((LexicalHandler  ) value);
432                 return;
433             }
434         }
435         super.setProperty(name, value);
436     }
437 
438     public Object   getProperty(String   name) throws SAXNotRecognizedException, SAXNotSupportedException {
439         for (int i = 0; i < LEXICAL_HANDLER_NAMES.length; i++) {
440             if (LEXICAL_HANDLER_NAMES[i].equals(name)) {
441                 return getLexicalHandler();
442             }
443         }
444         return super.getProperty(name);
445     }
446 
447     public void setLexicalHandler (LexicalHandler   handler) {
448         if (handler == null) {
449             throw new NullPointerException  ("Null lexical handler");
450         }
451         else {
452             this.lexicalHandler = handler;
453         }
454     }
455 
456     public LexicalHandler   getLexicalHandler(){
457         return lexicalHandler;
458     }
459 
460 
461     // ContentHandler interface
462     //-------------------------------------------------------------------------
463     public void setDocumentLocator(Locator locator) {
464         super.setDocumentLocator(locator);
465     }
466 
467     public void startDocument() throws SAXException {
468         try {
469             writeDeclaration();
470             super.startDocument();
471         }
472         catch (IOException e) {
473             handleException(e);
474         }
475     }
476 
477     public void endDocument() throws SAXException {
478         super.endDocument();
479 
480         if ( autoFlush ) {
481             try {
482                 flush();
483             } catch ( IOException e) {}
484         }
485     }
486 
487     public void startPrefixMapping(String   prefix, String   uri) throws SAXException {
488         if ( namespacesMap == null ) {
489             namespacesMap = new HashMap();
490         }
491         namespacesMap.put(prefix, uri);
492         super.startPrefixMapping(prefix, uri);
493     }
494 
495     public void endPrefixMapping(String   prefix) throws SAXException {
496         super.endPrefixMapping(prefix);
497     }
498 
499     public void startElement(String   namespaceURI, String   localName, String   qName, Attributes attributes) throws SAXException {
500         try {
501             charactersAdded = false;
502 
503             writePrintln();
504             indent();
505             writer.write("<");
506             writer.write(qName);
507             writeNamespaces();
508             writeAttributes( attributes );
509             writer.write(">");
510             ++indentLevel;
511             lastOutputNodeType = Node.ELEMENT_NODE;
512 
513             super.startElement( namespaceURI, localName, qName, attributes );
514         }
515         catch (IOException e) {
516             handleException(e);
517         }
518     }
519 
520     public void endElement(String   namespaceURI, String   localName, String   qName) throws SAXException {
521         try {
522             charactersAdded = false;
523             --indentLevel;
524             if ( lastOutputNodeType == Node.ELEMENT_NODE ) {
525                 writePrintln();
526                 indent();
527             }
528 
529             // XXXX: need to determine this using a stack and checking for
530             // content / children
531             boolean hadContent = true;
532             if ( hadContent ) {
533                 writeClose(qName);
534             }
535             else {
536                 writeEmptyElementClose(qName);
537             }
538             lastOutputNodeType = Node.ELEMENT_NODE;
539 
540             super.endElement( namespaceURI, localName, qName );
541         }
542         catch (IOException e) {
543             handleException(e);
544         }
545     }
546 
547     public void characters(char[] ch, int start, int length) throws SAXException {
548         if (ch == null || ch.length == 0 || length <= 0) {
549             return;
550         }
551 
552         try {
553             /*
554              * we can't use the writeString method here because it's possible
555              * we don't receive all characters at once and calling writeString
556              * would cause unwanted spaces to be added in between these chunks
557              * of character arrays.
558              */
559             String   string = new String  (ch, start, length);
560 
561             if (escapeText) {
562                 string = escapeElementEntities(string);
563             }
564 
565             if (format.isTrimText()) {
566                 if ((lastOutputNodeType == Node.TEXT_NODE) && !charactersAdded) {
567                     writer.write(" ");
568                 } else if (charactersAdded && Character.isWhitespace(lastChar)) {
569                     writer.write(lastChar);
570                 }
571 
572                 String   delim = "";
573                 StringTokenizer tokens = new StringTokenizer(string);
574                 while (tokens.hasMoreTokens()) {
575                     writer.write(delim);
576                     writer.write(tokens.nextToken());
577                     delim = " ";
578                 }
579             } else {
580                 writer.write(string);
581             }
582 
583             charactersAdded = true;
584             lastChar = ch[start + length - 1];
585             lastOutputNodeType = Node.TEXT_NODE;
586 
587             super.characters(ch, start, length);
588         }
589         catch (IOException e) {
590             handleException(e);
591         }
592     }
593 
594     public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
595         super.ignorableWhitespace(ch, start, length);
596     }
597 
598     public void processingInstruction(String   target, String   data) throws SAXException {
599         try {
600             indent();
601             writer.write("<?");
602             writer.write(target);
603             writer.write(" ");
604             writer.write(data);
605             writer.write("?>");
606             writePrintln();
607             lastOutputNodeType = Node.PROCESSING_INSTRUCTION_NODE;
608 
609             super.processingInstruction(target, data);
610         }
611         catch (IOException e) {
612             handleException(e);
613         }
614     }
615 
616 
617 
618     // DTDHandler interface
619     //-------------------------------------------------------------------------
620     public void notationDecl(String   name, String   publicID, String   systemID) throws SAXException {
621         super.notationDecl(name, publicID, systemID);
622     }
623 
624     public void unparsedEntityDecl(String   name, String   publicID, String   systemID, String   notationName) throws SAXException {
625         super.unparsedEntityDecl(name, publicID, systemID, notationName);
626     }
627 
628 
629     // LexicalHandler interface
630     //-------------------------------------------------------------------------
631     public void startDTD(String   name, String   publicID, String   systemID) throws SAXException {
632         inDTD = true;
633         try {
634             writeDocType(name, publicID, systemID);
635         }
636         catch (IOException e) {
637             handleException(e);
638         }
639 
640         if (lexicalHandler != null) {
641             lexicalHandler.startDTD(name, publicID, systemID);
642         }
643     }
644 
645     public void endDTD() throws SAXException {
646         inDTD = false;
647         if (lexicalHandler != null) {
648             lexicalHandler.endDTD();
649         }
650     }
651 
652     public void startCDATA() throws SAXException {
653         try {
654             writer.write( "<![CDATA[" );
655         }
656         catch (IOException e) {
657             handleException(e);
658         }
659 
660         if (lexicalHandler != null) {
661             lexicalHandler.startCDATA();
662         }
663     }
664 
665     public void endCDATA() throws SAXException {
666         try {
667             writer.write( "]]>" );
668         }
669         catch (IOException e) {
670             handleException(e);
671         }
672 
673         if (lexicalHandler != null) {
674             lexicalHandler.endCDATA();
675         }
676     }
677 
678     public void startEntity(String   name) throws SAXException {
679         try {
680             writeEntityRef(name);
681         }
682         catch (IOException e) {
683             handleException(e);
684         }
685 
686         if (lexicalHandler != null) {
687             lexicalHandler.startEntity(name);
688         }
689     }
690 
691     public void endEntity(String   name) throws SAXException {
692         if (lexicalHandler != null) {
693             lexicalHandler.endEntity(name);
694         }
695     }
696 
697     public void comment(char[] ch, int start, int length) throws SAXException {
698         if ( showCommentsInDTDs || ! inDTD ) {
699             try {
700                 charactersAdded = false;
701                 writeComment( new String  (ch, start, length) );
702             }
703             catch (IOException e) {
704                 handleException(e);
705             }
706         }
707 
708         if (lexicalHandler != null) {
709             lexicalHandler.comment(ch, start, length);
710         }
711     }
712 
713 
714 
715     // Implementation methods
716     //-------------------------------------------------------------------------
717     protected void writeElement(Element element) throws IOException {
718         int size = element.nodeCount();
719         String   qualifiedName = element.getQualifiedName();
720 
721         writePrintln();
722         indent();
723 
724         writer.write("<");
725         writer.write(qualifiedName);
726 
727         int previouslyDeclaredNamespaces = namespaceStack.size();
728         Namespace ns = element.getNamespace();
729         if (isNamespaceDeclaration( ns ) ) {
730             namespaceStack.push(ns);
731             writeNamespace(ns);
732         }
733 
734         // Print out additional namespace declarations
735         boolean textOnly = true;
736         for ( int i = 0; i < size; i++ ) {
737             Node node = element.node(i);
738             if ( node instanceof Namespace ) {
739                 Namespace additional = (Namespace) node;
740                 if (isNamespaceDeclaration( additional ) ) {
741                     namespaceStack.push(additional);
742                     writeNamespace(additional);
743                 }
744             }
745             else if ( node instanceof Element) {
746                 textOnly = false;
747             }
748             else if ( node instanceof Comment) {
749                 textOnly = false;
750             }
751         }
752 
753         writeAttributes(element);
754 
755         lastOutputNodeType = Node.ELEMENT_NODE;
756 
757         if ( size <= 0 ) {
758             writeEmptyElementClose(qualifiedName);
759         }
760         else {
761             writer.write(">");
762             if ( textOnly ) {
763                 // we have at least one text node so lets assume
764                 // that its non-empty
765                 writeElementContent(element);
766             }
767             else {
768                 // we know it's not null or empty from above
769                 ++indentLevel;
770 
771                 writeElementContent(element);
772 
773                 --indentLevel;
774 
775                 writePrintln();
776                 indent();
777             }
778             writer.write("</");
779             writer.write(qualifiedName);
780             writer.write(">");
781         }
782 
783         // remove declared namespaceStack from stack
784         while (namespaceStack.size() > previouslyDeclaredNamespaces) {
785             namespaceStack.pop();
786         }
787 
788         lastOutputNodeType = Node.ELEMENT_NODE;
789     }
790 
791     /**
792      * Determines if element is a special case of XML elements
793      * where it contains an xml:space attribute of "preserve".
794      * If it does, then retain whitespace.
795      */
796     protected final boolean isElementSpacePreserved(Element element) {
797       final Attribute attr = (Attribute)element.attribute("space");
798       boolean preserveFound=preserve; //default to global state
799       if (attr!=null) {
800         if ("xml".equals(attr.getNamespacePrefix()) &&
801             "preserve".equals(attr.getText())) {
802           preserveFound = true;
803         }
804         else {
805           preserveFound = false;
806         }
807       }
808       return preserveFound;
809     }
810     /** Outputs the content of the given element. If whitespace trimming is
811      * enabled then all adjacent text nodes are appended together before
812      * the whitespace trimming occurs to avoid problems with multiple
813      * text nodes being created due to text content that spans parser buffers
814      * in a SAX parser.
815      */
816     protected void writeElementContent(Element element) throws IOException {
817         boolean trim = format.isTrimText();
818         boolean oldPreserve=preserve;
819         if (trim) { //verify we have to before more expensive test
820           preserve=isElementSpacePreserved(element);
821           trim = !preserve;
822         }
823         if (trim) {
824             // concatenate adjacent text nodes together
825             // so that whitespace trimming works properly
826             Text lastTextNode = null;
827             StringBuilder   buffer = null;
828             boolean textOnly = true;
829             for ( int i = 0, size = element.nodeCount(); i < size; i++ ) {
830                 Node node = element.node(i);
831                 if ( node instanceof Text ) {
832                     if ( lastTextNode == null ) {
833                         lastTextNode = (Text) node;
834                     }
835                     else {
836                         if (buffer == null) {
837                             buffer = new StringBuilder  ( lastTextNode.getText() );
838                         }
839                       buffer.append( ((Text) node).getText() );
840                     }
841                 }
842                 else {
843                     if (!textOnly && format.isPadText()) {
844                         writer.write(PAD_TEXT);
845                     }
846 
847                     textOnly = false;
848 
849                     if ( lastTextNode != null ) {
850                         if ( buffer != null ) {
851                             writeString( buffer.toString() );
852                             buffer = null;
853                         }
854                         else {
855                             writeString( lastTextNode.getText() );
856                         }
857                         lastTextNode = null;
858 
859                         if (format.isPadText()) {
860                             writer.write(PAD_TEXT);
861                         }
862                     }
863                     writeNode(node);
864                 }
865             }
866             if ( lastTextNode != null ) {
867                 if (!textOnly && format.isPadText()) {
868                     writer.write(PAD_TEXT);
869                 }
870                 if ( buffer != null ) {
871                     writeString( buffer.toString() );
872                     buffer = null;
873                 }
874                 else {
875                     writeString( lastTextNode.getText() );
876                 }
877                 lastTextNode = null;
878             }
879         }
880         else {
881             Node lastTextNode = null;
882             for ( int i = 0, size = element.nodeCount(); i < size; i++ ) {
883                 Node node = element.node(i);
884                 if (node instanceof Text) {
885                     writeNode(node);
886                     lastTextNode = node;
887                 } else {
888                     if ((lastTextNode != null) && format.isPadText()) {
889                         writer.write(PAD_TEXT);
890                     }
891                     writeNode(node);
892                     if ((lastTextNode != null) && format.isPadText()) {
893                         writer.write(PAD_TEXT);
894                     }
895                     lastTextNode = null;
896                 }
897             }
898         }
899         preserve=oldPreserve;
900     }
901     protected void writeCDATA(String   text) throws IOException {
902         writer.write( "<![CDATA[" );
903         if (text != null) {
904             writer.write( text );
905         }
906         writer.write( "]]>" );
907 
908         lastOutputNodeType = Node.CDATA_SECTION_NODE;
909     }
910 
911     protected void writeDocType(DocumentType docType) throws IOException {
912         if (docType != null) {
913             docType.write( writer );
914             //writeDocType( docType.getElementName(), docType.getPublicID(), docType.getSystemID() );
915             writePrintln();
916         }
917     }
918 
919 
920     protected void writeNamespace(Namespace namespace) throws IOException {
921         if ( namespace != null ) {
922             writeNamespace(namespace.getPrefix(), namespace.getURI());
923         }
924     }
925 
926     /**
927      * Writes the SAX namepsaces
928      */
929     protected void writeNamespaces() throws IOException {
930         if ( namespacesMap != null ) {
931             for ( Iterator iter = namespacesMap.entrySet().iterator(); iter.hasNext(); ) {
932                 Map.Entry entry = (Map.Entry) iter.next();
933                 String   prefix = (String  ) entry.getKey();
934                 String   uri = (String  ) entry.getValue();
935                 writeNamespace(prefix, uri);
936             }
937             namespacesMap = null;
938         }
939     }
940 
941     /**
942      * Writes the SAX namepsaces
943      */
944     protected void writeNamespace(String   prefix, String   uri) throws IOException {
945         if ( prefix != null && prefix.length() > 0 ) {
946             writer.write(" xmlns:");
947             writer.write(prefix);
948             writer.write("=\"");
949         }
950         else {
951             writer.write(" xmlns=\"");
952         }
953         writer.write(uri);
954         writer.write("\"");
955     }
956 
957     protected void writeProcessingInstruction(ProcessingInstruction processingInstruction) throws IOException {
958         //indent();
959         writer.write( "<?" );
960         writer.write( processingInstruction.getName() );
961         writer.write( " " );
962         writer.write( processingInstruction.getText() );
963         writer.write( "?>" );
964         writePrintln();
965 
966         lastOutputNodeType = Node.PROCESSING_INSTRUCTION_NODE;
967     }
968 
969     protected void writeString(String   text) throws IOException {
970         if ( text != null && text.length() > 0 ) {
971             if ( escapeText ) {
972                 text = escapeElementEntities(text);
973             }
974 
975 //            if (format.isPadText()) {
976 //                if (lastOutputNodeType == Node.ELEMENT_NODE) {
977 //                    writer.write(PAD_TEXT);
978 //                }
979 //            }
980 
981             if (format.isTrimText()) {
982                 boolean first = true;
983                 StringTokenizer tokenizer = new StringTokenizer(text);
984                 while (tokenizer.hasMoreTokens()) {
985                     String   token = tokenizer.nextToken();
986                     if ( first ) {
987                         first = false;
988                         if ( lastOutputNodeType == Node.TEXT_NODE ) {
989                             writer.write(" ");
990                         }
991                     }
992                     else {
993                         writer.write(" ");
994                     }
995                     writer.write(token);
996                     lastOutputNodeType = Node.TEXT_NODE;
997                 }
998             }
999             else {
1000                lastOutputNodeType = Node.TEXT_NODE;
1001                writer.write(text);
1002            }
1003        }
1004    }
1005
1006    /**
1007     * This method is used to write out Nodes that contain text
1008     * and still allow for xml:space to be handled properly.
1009     *
1010     */
1011    protected void writeNodeText(Node node) throws IOException {
1012        String   text = node.getText();
1013        if (text != null && text.length() > 0) {
1014            if (escapeText) {
1015                text = escapeElementEntities(text);
1016            }
1017
1018            lastOutputNodeType = Node.TEXT_NODE;
1019            writer.write(text);
1020        }
1021    }
1022
1023    protected void writeNode(Node node) throws IOException {
1024        int nodeType = node.getNodeType();
1025        switch (nodeType) {
1026            case Node.ELEMENT_NODE:
1027                writeElement((Element) node);
1028                break;
1029            case Node.ATTRIBUTE_NODE:
1030                writeAttribute((Attribute) node);
1031                break;
1032            case Node.TEXT_NODE:
1033                writeNodeText(node);
1034                //write((Text) node);
1035                break;
1036            case Node.CDATA_SECTION_NODE:
1037                writeCDATA(node.getText());
1038                break;
1039            case Node.ENTITY_REFERENCE_NODE:
1040                writeEntity((Entity) node);
1041                break;
1042            case Node.PROCESSING_INSTRUCTION_NODE:
1043                writeProcessingInstruction((ProcessingInstruction) node);
1044                break;
1045            case Node.COMMENT_NODE:
1046                writeComment(node.getText());
1047                break;
1048            case Node.DOCUMENT_NODE:
1049                write((Document) node);
1050                break;
1051            case Node.DOCUMENT_TYPE_NODE:
1052                writeDocType((DocumentType) node);
1053                break;
1054            case Node.NAMESPACE_NODE:
1055                // Will be output with attributes
1056                //write((Namespace) node);
1057                break;
1058            default:
1059                throw new IOException( "Invalid node type: " + node );
1060        }
1061    }
1062
1063
1064
1065
1066    protected void installLexicalHandler() {
1067        XMLReader parent = getParent();
1068        if (parent == null) {
1069            throw new NullPointerException  ("No parent for filter");
1070        }
1071        // try to register for lexical events
1072        for (int i = 0; i < LEXICAL_HANDLER_NAMES.length; i++) {
1073            try {
1074                parent.setProperty(LEXICAL_HANDLER_NAMES[i], this);
1075                break;
1076            }
1077            catch (SAXNotRecognizedException ex) {
1078                // ignore
1079            }
1080            catch (SAXNotSupportedException ex) {
1081                // ignore
1082            }
1083        }
1084    }
1085
1086    protected void writeDocType(String   name, String   publicID, String   systemID) throws IOException {
1087        boolean hasPublic = false;
1088
1089        writer.write("<!DOCTYPE ");
1090        writer.write(name);
1091        if ((publicID != null) && (!publicID.equals(""))) {
1092            writer.write(" PUBLIC \"");
1093            writer.write(publicID);
1094            writer.write("\"");
1095            hasPublic = true;
1096        }
1097        if ((systemID != null) && (!systemID.equals(""))) {
1098            if (!hasPublic) {
1099                writer.write(" SYSTEM");
1100            }
1101            writer.write(" \"");
1102            writer.write(systemID);
1103            writer.write("\"");
1104        }
1105        writer.write(">");
1106        writePrintln();
1107    }
1108
1109    protected void writeEntity(Entity entity) throws IOException {
1110        if (!resolveEntityRefs()) {
1111            writeEntityRef( entity.getName() );
1112        } else {
1113            writer.write(entity.getText());
1114        }
1115    }
1116
1117    protected void writeEntityRef(String   name) throws IOException {
1118        writer.write( "&" );
1119        writer.write( name );
1120        writer.write( ";" );
1121
1122        lastOutputNodeType = Node.ENTITY_REFERENCE_NODE;
1123    }
1124
1125    protected void writeComment(String   text) throws IOException {
1126        if (format.isNewlines()) {
1127            println();
1128            indent();
1129        }
1130        writer.write( "<!--" );
1131        writer.write( text );
1132        writer.write( "-->" );
1133
1134        lastOutputNodeType = Node.COMMENT_NODE;
1135    }
1136
1137    /** Writes the attributes of the given element
1138      *
1139      */
1140    protected void writeAttributes( Element element ) throws IOException {
1141
1142        // I do not yet handle the case where the same prefix maps to
1143        // two different URIs. For attributes on the same element
1144        // this is illegal; but as yet we don't throw an exception
1145        // if someone tries to do this
1146        for ( int i = 0, size = element.attributeCount(); i < size; i++ ) {
1147            Attribute attribute = element.attribute(i);
1148            Namespace ns = attribute.getNamespace();
1149            if (ns != null && ns != Namespace.NO_NAMESPACE && ns != Namespace.XML_NAMESPACE) {
1150                String   prefix = ns.getPrefix();
1151                String   uri = namespaceStack.getURI(prefix);
1152                if (!ns.getURI().equals(uri)) { // output a new namespace declaration
1153                    writeNamespace(ns);
1154                    namespaceStack.push(ns);
1155                }
1156            }
1157
1158            // If the attribute is a namespace declaration, check if we have already
1159            // written that declaration elsewhere (if that's the case, it must be
1160            // in the namespace stack
1161            String   attName = attribute.getName();
1162            if (attName.startsWith("xmlns:")) {
1163                String   prefix = attName.substring(6);
1164                if (namespaceStack.getNamespaceForPrefix(prefix) == null) {
1165                    String   uri = attribute.getValue();
1166                    namespaceStack.push(prefix, uri);
1167                    writeNamespace(prefix, uri);
1168                }
1169            } else if (attName.equals("xmlns")) {
1170                if (namespaceStack.getDefaultNamespace() == null) {
1171                    String   uri = attribute.getValue();
1172                    namespaceStack.push(null, uri);
1173                    writeNamespace(null, uri);
1174                }
1175            } else {
1176                char quote = format.getAttributeQuoteCharacter();
1177                writer.write(" ");
1178                writer.write(attribute.getQualifiedName());
1179                writer.write("=");
1180                writer.write(quote);
1181                writeEscapeAttributeEntities(attribute.getValue());
1182                writer.write(quote);
1183            }
1184        }
1185    }
1186
1187    protected void writeAttribute(Attribute attribute) throws IOException {
1188        writer.write(" ");
1189        writer.write(attribute.getQualifiedName());
1190        writer.write("=");
1191
1192        char quote = format.getAttributeQuoteCharacter();
1193        writer.write(quote);
1194
1195        writeEscapeAttributeEntities(attribute.getValue());
1196
1197        writer.write(quote);
1198        lastOutputNodeType = Node.ATTRIBUTE_NODE;
1199    }
1200
1201    protected void writeAttributes(Attributes attributes) throws IOException {
1202        for (int i = 0, size = attributes.getLength(); i < size; i++) {
1203            writeAttribute( attributes, i );
1204        }
1205    }
1206
1207    protected void writeAttribute(Attributes attributes, int index) throws IOException {
1208        char quote = format.getAttributeQuoteCharacter();
1209        writer.write(" ");
1210        writer.write(attributes.getQName(index));
1211        writer.write("=");
1212        writer.write(quote);
1213        writeEscapeAttributeEntities(attributes.getValue(index));
1214        writer.write(quote);
1215    }
1216
1217
1218
1219    protected void indent() throws IOException {
1220        String   indent = format.getIndent();
1221        if ( indent != null && indent.length() > 0 ) {
1222            for ( int i = 0; i < indentLevel; i++ ) {
1223                writer.write(indent);
1224            }
1225        }
1226    }
1227
1228    /**
1229     * <p>
1230     * This will print a new line only if the newlines flag was set to true
1231     * </p>
1232     */
1233    protected void writePrintln() throws IOException  {
1234        if (format.isNewlines()) {
1235            writer.write( format.getLineSeparator() );
1236        }
1237    }
1238
1239    /**
1240     * Get an OutputStreamWriter, use preferred encoding.
1241     */
1242    protected Writer createWriter(OutputStream outStream, String   encoding) throws UnsupportedEncodingException {
1243        return new BufferedWriter(
1244            new OutputStreamWriter( outStream, encoding )
1245        );
1246    }
1247
1248    /**
1249     * <p>
1250     * This will write the declaration to the given Writer.
1251     *   Assumes XML version 1.0 since we don't directly know.
1252     * </p>
1253     */
1254    protected void writeDeclaration() throws IOException {
1255        String   encoding = format.getEncoding();
1256
1257        // Only print of declaration is not suppressed
1258        if (! format.isSuppressDeclaration()) {
1259            // Assume 1.0 version
1260            if (encoding.equals("UTF8")) {
1261                writer.write("<?xml version=\"1.0\"");
1262                if (!format.isOmitEncoding()) {
1263                    writer.write(" encoding=\"UTF-8\"");
1264                }
1265                writer.write("?>");
1266            } else {
1267                writer.write("<?xml version=\"1.0\"");
1268                if (! format.isOmitEncoding()) {
1269                    writer.write(" encoding=\"" + encoding + "\"");
1270                }
1271                writer.write("?>");
1272            }
1273            if (format.isNewLineAfterDeclaration()) {
1274                println();
1275            }
1276        }
1277    }
1278
1279    protected void writeClose(String   qualifiedName) throws IOException {
1280        writer.write("</");
1281        writer.write(qualifiedName);
1282        writer.write(">");
1283    }
1284
1285    protected void writeEmptyElementClose(String   qualifiedName) throws IOException {
1286        // Simply close up
1287        if (! format.isExpandEmptyElements()) {
1288            writer.write("/>");
1289        } else {
1290            writer.write("></");
1291            writer.write(qualifiedName);
1292            writer.write(">");
1293        }
1294    }
1295
1296    protected boolean isExpandEmptyElements() {
1297        return format.isExpandEmptyElements();
1298    }
1299
1300
1301    /** This will take the pre-defined entities in XML 1.0 and
1302      * convert their character representation to the appropriate
1303      * entity reference, suitable for XML attributes.
1304      */
1305    protected String   escapeElementEntities(String   text) {
1306        char[] block = null;
1307        int i, last = 0, size = text.length();
1308        for ( i = 0; i < size; i++ ) {
1309            String   entity = null;
1310            char c = text.charAt(i);
1311            switch( c ) {
1312                case '<' :
1313                    entity = "&lt;";
1314                    break;
1315                case '>' :
1316                    entity = "&gt;";
1317                    break;
1318                case '&' :
1319                    entity = "&amp;";
1320                    break;
1321                case '\t': case '\n': case '\r':
1322                    // don't encode standard whitespace characters
1323                    if (preserve) {
1324                      entity=String.valueOf(c);
1325                    }
1326                    break;
1327                default:
1328                    if (c < 32 || shouldEncodeChar(c)) {
1329                        entity = "&#" + (int) c + ";";
1330                    }
1331                    break;
1332            }
1333            if (entity != null) {
1334                if ( block == null ) {
1335                    block = text.toCharArray();
1336                }
1337                buffer.append(block, last, i - last);
1338                buffer.append(entity);
1339                last = i + 1;
1340            }
1341        }
1342        if ( last == 0 ) {
1343            return text;
1344        }
1345        if ( last < size ) {
1346            if ( block == null ) {
1347                block = text.toCharArray();
1348            }
1349            buffer.append(block, last, i - last);
1350        }
1351        String   answer = buffer.toString();
1352        buffer.setLength(0);
1353        return answer;
1354    }
1355
1356
1357    protected void writeEscapeAttributeEntities(String   text) throws IOException {
1358        if ( text != null ) {
1359            String   escapedText = escapeAttributeEntities( text );
1360            writer.write( escapedText );
1361        }
1362    }
1363    /** This will take the pre-defined entities in XML 1.0 and
1364      * convert their character representation to the appropriate
1365      * entity reference, suitable for XML attributes.
1366      */
1367    protected String   escapeAttributeEntities(String   text) {
1368        char quote = format.getAttributeQuoteCharacter();
1369
1370        char[] block = null;
1371        int i, last = 0, size = text.length();
1372        for ( i = 0; i < size; i++ ) {
1373            String   entity = null;
1374            char c = text.charAt(i);
1375            switch( c ) {
1376                case '<' :
1377                    entity = "&lt;";
1378                    break;
1379                case '>' :
1380                    entity = "&gt;";
1381                    break;
1382                case '\'' :
1383                    if (quote == '\'') {
1384                        entity = "&apos;";
1385                    }
1386                    break;
1387                case '\"' :
1388                    if (quote == '\"') {
1389                        entity = "&quot;";
1390                    }
1391                    break;
1392                case '&' :
1393                    entity = "&amp;";
1394                    break;
1395                case '\t': case '\n': case '\r':
1396                    // don't encode standard whitespace characters
1397                    break;
1398                default:
1399                    if (c < 32 || shouldEncodeChar(c)) {
1400                        entity = "&#" + (int) c + ";";
1401                    }
1402                    break;
1403            }
1404            if (entity != null) {
1405                if ( block == null ) {
1406                    block = text.toCharArray();
1407                }
1408                buffer.append(block, last, i - last);
1409                buffer.append(entity);
1410                last = i + 1;
1411            }
1412        }
1413        if ( last == 0 ) {
1414            return text;
1415        }
1416        if ( last < size ) {
1417            if ( block == null ) {
1418                block = text.toCharArray();
1419            }
1420            buffer.append(block, last, i - last);
1421        }
1422        String   answer = buffer.toString();
1423        buffer.setLength(0);
1424        return answer;
1425    }
1426
1427    /**
1428     * Should the given character be escaped. This depends on the
1429     * encoding of the document.
1430     *
1431     * @return boolean
1432     */
1433    protected boolean shouldEncodeChar(char c) {
1434        int max = getMaximumAllowedCharacter();
1435        return max > 0 && c > max;
1436    }
1437
1438    /**
1439     * Returns the maximum allowed character code that should be allowed
1440     * unescaped which defaults to 127 in US-ASCII (7 bit) or
1441     * 255 in ISO-* (8 bit).
1442     */
1443    protected int defaultMaximumAllowedCharacter() {
1444        String   encoding = format.getEncoding();
1445        if (encoding != null) {
1446            if (encoding.equals("US-ASCII")) {
1447                return 127;
1448            }
1449        }
1450        // no encoding for things like ISO-*, UTF-8 or UTF-16
1451        return -1;
1452    }
1453
1454    protected boolean isNamespaceDeclaration( Namespace ns ) {
1455        if (ns != null && ns != Namespace.XML_NAMESPACE) {
1456            String   uri = ns.getURI();
1457            if ( uri != null ) {
1458                if ( ! namespaceStack.contains( ns ) ) {
1459                    return true;
1460
1461                }
1462            }
1463        }
1464        return false;
1465
1466    }
1467
1468    protected void handleException(IOException e) throws SAXException {
1469        throw new SAXException(e);
1470    }
1471
1472    //Laramie Crocker 4/8/2002 10:38AM
1473    /** Lets subclasses get at the current format object, so they can call setTrimText, setNewLines, etc.
1474      * Put in to support the HTMLWriter, in the way
1475      *  that it pushes the current newline/trim state onto a stack and overrides
1476      *  the state within preformatted tags.
1477      */
1478    protected OutputFormat getOutputFormat() {
1479        return format;
1480    }
1481
1482    public boolean resolveEntityRefs() {
1483        return resolveEntityRefs;
1484    }
1485
1486    public void setResolveEntityRefs(boolean resolve) {
1487        this.resolveEntityRefs = resolve;
1488    }
1489}
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags