DocBookPrinter


1   /* ====================================================================
2    * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
3    *
4    * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
5    *
6    * Redistribution and use in source and binary forms, with or without
7    * modification, are permitted provided that the following conditions
8    * are met:
9    *
10   * 1. Redistributions of source code must retain the above copyright
11   *    notice, this list of conditions and the following disclaimer.
12   *
13   * 2. Redistributions in binary form must reproduce the above copyright
14   *    notice, this list of conditions and the following disclaimer in
15   *    the documentation and/or other materials provided with the
16   *    distribution.
17   *
18   * 3. The end-user documentation included with the redistribution,
19   *    if any, must include the following acknowledgment:
20   *       "This product includes software developed by Jcorporate Ltd.
21   *        (http://www.jcorporate.com/)."
22   *    Alternately, this acknowledgment may appear in the software itself,
23   *    if and wherever such third-party acknowledgments normally appear.
24   *
25   * 4. "Jcorporate" and product names such as "Expresso" must
26   *    not be used to endorse or promote products derived from this
27   *    software without prior written permission. For written permission,
28   *    please contact info@jcorporate.com.
29   *
30   * 5. Products derived from this software may not be called "Expresso",
31   *    or other Jcorporate product names; nor may "Expresso" or other
32   *    Jcorporate product names appear in their name, without prior
33   *    written permission of Jcorporate Ltd.
34   *
35   * 6. No product derived from this software may compete in the same
36   *    market space, i.e. framework, without prior written permission
37   *    of Jcorporate Ltd. For written permission, please contact
38   *    partners@jcorporate.com.
39   *
40   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43   * DISCLAIMED.  IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
44   * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45   * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46   * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51   * SUCH DAMAGE.
52   * ====================================================================
53   *
54   * This software consists of voluntary contributions made by many
55   * individuals on behalf of the Jcorporate Ltd. Contributions back
56   * to the project(s) are encouraged when you make modifications.
57   * Please send them to support@jcorporate.com. For more information
58   * on Jcorporate Ltd. and its products, please see
59   * <http://www.jcorporate.com/>.
60   *
61   * Portions of this software are based upon other open source
62   * products and are subject to their respective licenses.
63   */
64  
65  package com.jcorporate.expresso.ext.report;
66  
67  
68  import com.jcorporate.expresso.core.misc.StringUtil;
69  import com.jcorporate.expresso.kernel.util.FastStringBuffer;
70  import org.w3c.dom.Attr  ;
71  import org.w3c.dom.DocumentType  ;
72  import org.w3c.dom.Entity  ;
73  import org.w3c.dom.NamedNodeMap  ;
74  import org.w3c.dom.Node  ;
75  import org.w3c.dom.NodeList  ;
76  import org.xml.sax.SAXException  ;
77  import org.xml.sax.SAXParseException  ;
78  
79  import java.io.IOException  ;
80  import java.io.StringWriter  ;
81  import java.io.Writer  ;
82  import java.util.Arrays  ;
83  import java.util.HashSet  ;
84  import java.util.Set  ;
85  
86  
87  /**
88   * A (DOM) parser that understands the docbook schema and prints it nicely.
89   *
90   * @author David Lloyd
91   */
92  
93  public class DocBookPrinter extends XMLPrinter {
94  
95      /**
96       * The maximum character count (including indentions) allowed for single line text.
97       */
98      protected int _maxFormatLen = 70;
99      /**
100      * Set of inline tags.
101      */
102     protected Set   _inlineTagSet = null;
103 
104     public DocBookPrinter() {
105         setIndent('\t', 1);
106 
107         setOmitXmlDecl(true);
108 
109         setWriter(SYSTEM_OUT);
110 
111         _inlineTagSet = new HashSet  (Arrays.asList(getInlineTags()));
112     }
113 
114     /**
115      * Return the given node as a string.  If any error occurs
116      * in processing (likely an io exception from outputDocument()),
117      * null will be returned.
118      */
119     public static String   nodeToString(Node   node) {
120         StringWriter   writer = new StringWriter  ();
121         try {
122             nodeToString(node, writer);
123             return writer.getBuffer().toString();
124         } catch (IOException   e) {
125             return null;
126         }
127     }
128 
129     /**
130      * Print the given node to a writer.
131      */
132     public static void nodeToString(Node   node, Writer   writer) throws IOException   {
133         DocBookPrinter printer = new DocBookPrinter();
134         printer.setWriter(writer);
135         printer.outputDocument(node);
136     }
137 
138     /**
139      * Override to provide tag relative intelligence.
140      */
141     protected void printTree(Node   node) throws IOException   {
142         int nodeType = -1;
143 
144         if (node != null) {
145             nodeType = node.getNodeType();
146             switch (nodeType) {
147                 case Node.DOCUMENT_NODE:
148                     {
149                         NodeList   nodes = node.getChildNodes();
150 
151                         if (nodes != null) {
152                             for (int i = 0; i < nodes.getLength(); i++) {
153                                 printTree(nodes.item(i));
154                             }
155                         }
156 
157                         break;
158                     }
159 
160                 case Node.ELEMENT_NODE:
161                     {
162                         String   name = node.getNodeName();
163 
164                         boolean inlineTag = isInlineTag(name);
165                         if (inlineTag) {
166                             print("<" + name);
167                         } else {
168                             print("<" + name, getIndent(this._column));
169                         }
170 
171                         NamedNodeMap   attributes = node.getAttributes();
172                         for (int i = 0; i < attributes.getLength(); i++) {
173                             Attr   current = (Attr  ) attributes.item(i);
174 
175                             /*
176                                With some DOM implementations the default value shows up in addition to a
177                                specified value so you get duplicate attributes.  We will only write
178                                specified attributes.
179                             */
180                             if (current.getSpecified() == true) {
181                                 this.print(
182                                         " " + current.getNodeName() + "='" + toXML(current.getNodeValue(), true) + "'");
183                             }
184 
185                         }//for attrs
186 
187                         if (!node.hasChildNodes()) {
188                             // Close opening tag, because no children
189                             this.println(" />");
190                         } else {
191                             // Close opening tag normally to account for children
192                             this.print(">");
193 
194                             NodeList   children = node.getChildNodes();
195 
196                             // If a child is a text node, we don't want to print carriage returns that get picked
197                             //  up as string text
198                             boolean hasChildElements = false;
199                             if (children != null) {
200                                 int len = children.getLength();
201                                 for (int i = 0; i < len; i++) {
202                                     if (children.item(i).getNodeType() != Node.TEXT_NODE
203                                             && children.item(i).getNodeType() != Node.CDATA_SECTION_NODE
204                                             && (children.item(i).getNodeType() != Node.ELEMENT_NODE ||
205                                             !isInlineTag(children.item(i)))) {
206                                         hasChildElements = true;
207                                         break;
208                                     }
209                                 }
210                             }
211 
212                             // If non-text node as child, we can print enter
213                             if (!inlineTag && hasChildElements) {
214                                 this.println("");
215                             }
216 
217                             this._column++;
218 
219                             boolean preTag = isPreTag(name);
220                             boolean paraTag = isParaTag(name);
221 
222                             boolean isSpecialTag = preTag || paraTag;
223 
224                             FastStringBuffer fsb = new FastStringBuffer(1024);
225 
226                             for (int i = 0; i < children.getLength(); i++) {
227                                 if (inlineTag) {
228                                     fsb.append(nodeToString(children.item(i)));
229                                 } else if (!isSpecialTag || children.item(i).getNodeType() != Node.TEXT_NODE) {
230                                     Node   child = children.item(i);
231 
232                                     if (child.getNodeType() == Node.COMMENT_NODE || isInlineTag(child)) {
233                                         fsb.append(nodeToString(child));
234                                     } else {
235                                         if (fsb.length() > 0) {
236                                             if (printFormatted(fsb.toString())) {
237                                                 hasChildElements = true;
238                                             } else {
239                                                 println("");
240                                             }
241                                             fsb.clear();
242                                         }
243                                         printTree(child);
244                                     }
245                                 } else if (paraTag) {
246                                     String   value = toXML(children.item(i).getNodeValue(), false);
247                                     fsb.append(value);
248                                 } else if (preTag) {
249                                     if (children.item(i).getNodeValue() != null && children.item(i).getNodeValue().trim().length() > 0) {
250                                         if (fsb.length() > 0) {
251                                             if (printFormatted(fsb.toString())) {
252                                                 hasChildElements = true;
253                                             }
254                                             fsb.clear();
255                                         }
256 
257                                         print("<![CDATA[");
258                                         print(children.item(i).getNodeValue().trim());
259                                         print("]]>");
260                                         hasChildElements = true;
261                                     }
262                                 }
263                             }
264 
265                             if (printFormatted(fsb.toString())) {
266                                 hasChildElements = true;
267                             }
268                             fsb.setLength(0);
269 
270                             this._column--;
271 
272                             // Write closing tag.  Once again for text nodes treat differently
273                             if (hasChildElements && !inlineTag && !preTag) {
274                                 this.println("</" + name + ">", getIndent(this._column));
275                             } else if (!inlineTag) {
276                                 this.println("</" + name + ">");
277                             } else {
278                                 print("</" + name + ">");
279                             }
280                         }
281 
282                         break;
283                     }
284 
285                 case Node.TEXT_NODE:
286                     {
287                         String   nodeValue = node.getNodeValue().trim();
288                         if (!nodeValue.equals("")) {
289                             // Normalize string
290                             this.print(toXML(nodeValue, false));
291                         }
292                         break;
293                     }
294 
295                 case Node.CDATA_SECTION_NODE:
296                     {
297                         if (node.getNodeValue() != null && node.getNodeValue().trim().length() > 0) {
298                             this.print("<![CDATA[" /*, getIndent( this._column ) */);
299                             this.print(convertNewline(node.getNodeValue()));
300                             this.print("]]>");
301                         }
302                         break;
303                     }
304 
305                 case Node.PROCESSING_INSTRUCTION_NODE:
306                     {
307                         if (node.getNodeName() != null) {
308                             if (!_omitXmlDecl && (false == node.getNodeName().startsWith("xml")) && (false == node.getNodeName().startsWith(
309                                     "xsl"))) {
310                                 // This should NOT be correct, but Xerces seems to have a bug - bt 4/2001
311                                 this.println("<?xml " + node.getNodeName() + "=\"" + node.getNodeValue() + "\"?>");
312                             } else {
313                                 // This should be the normal behaviour
314                                 if (!_omitXmlDecl || "xml".equals(node.getNodeName())) {
315                                     this.println("<?" + node.getNodeName() + " " + node.getNodeValue() + " ?>");
316                                 }
317                             }
318                         }
319                         break;
320                     }
321 
322                 case Node.ENTITY_REFERENCE_NODE:
323                     {
324                         this.println("&" + node.getNodeName() + ";");
325                         break;
326                     }
327 
328                 case Node.COMMENT_NODE:
329                     {
330                         print("<!--" + node.getNodeValue() + "-->");
331                         break;
332                     }
333 
334                 case Node.DOCUMENT_TYPE_NODE:
335                     {
336                         DocumentType   docType = (DocumentType  ) node;
337 
338                         // Note: below is since DOM 2
339                         //  Print either SYSTEM '...'  or  PUBLIC '...' '...'
340                         this.print("<!DOCTYPE " + docType.getName());
341                         if (docType.getPublicId() != null) {
342                             this.print(" PUBLIC ");
343                         } else if (docType.getSystemId() != null) {
344                             this.print(" SYSTEM ");
345                         }
346                         // There may not even be a public or system, that's OK
347 
348                         if (docType.getPublicId() != null) {
349 
350                             this.print("\"" + docType.getPublicId() + "\" ");
351                         }
352                         if (docType.getSystemId() != null) {
353                             this.print("\"" + docType.getSystemId() + "\" ");
354                         }
355 
356                         // Also print any entities that were defined, such as [<!ENTITY lt "<" >]
357                         NamedNodeMap   nodes = docType.getEntities();
358 
359                         for (int i = 0; i < nodes.getLength(); i++) {
360                             this.println("");
361                             Entity   entity = (Entity  ) nodes.item(i);
362                             this.print(" [<!ENTITY " + entity.getNodeName() + " ");
363 
364                             // Entity should have a child node that is its value
365                             NodeList   children = entity.getChildNodes();
366                             if (children != null && children.getLength() > 0) {
367                                 this.print("\"" + XMLPrinter.nodeToString(children.item(0)) + "\">]");
368                             } else {
369                                 this.print("\"" + entity.getNodeValue() + "\">]");
370                             }
371                         }
372 
373                         // End the doctype entry
374                         this.println("");
375                         this.println(">");
376 
377                         break;
378                     }
379             }
380 
381         }
382 
383         this._out.flush();
384     }
385 
386     /**
387      * @return True if the tag name is a docbook pre-formatted tag.
388      */
389     protected boolean isPreTag(String   tagName) {
390         return "programlisting".equals(tagName) || "programlistingco".equals(tagName) ||
391                 "screen".equals(tagName) || "screenco".equals(tagName) ||
392                 "literallayout".equals(tagName);
393     }
394 
395     /**
396      * @return True if the node is a docbook pre-formatted tag.
397      */
398     protected boolean isPreTag(Node   node) {
399         return node != null &&
400                 node.getNodeType() == Node.ELEMENT_NODE &&
401                 isPreTag(node.getNodeName());
402     }
403 
404     /**
405      * @return True if the tag name is a docbook paragraph tag.
406      */
407     protected boolean isParaTag(String   tagName) {
408         return "para".equals(tagName);
409     }
410 
411     /**
412      * @return True if the node is a docbook paragraph tag.
413      */
414     protected boolean isParaTag(Node   node) {
415         return node != null &&
416                 node.getNodeType() == Node.ELEMENT_NODE &&
417                 isParaTag(node.getNodeName());
418     }
419 
420     /**
421      * @return True if the node is a docbook inline tag.
422      */
423     protected boolean isInlineTag(Node   node) {
424         return node != null &&
425                 node.getNodeType() == Node.ELEMENT_NODE &&
426                 (isInlineTag(node.getNodeName()) || isPreTag(node.getParentNode()));
427     }
428 
429     /**
430      * @return True if the tag name is a docbook inline tag.
431      */
432     protected boolean isInlineTag(String   tagName) {
433         return _inlineTagSet.contains(tagName);
434     }
435 
436     /**
437      * @return The list of inline tags.
438      */
439     protected String  [] getInlineTags() {
440         return new String  []{
441             "emphasis",
442             "inlinegraphic",
443             "link",
444             "olink",
445             "ulink",
446             "phrase",
447             "sgmltag",
448             "subscript",
449             "superscript",
450             "symbol",
451             "trademark",
452             "wordasword",
453             "xref",
454         };
455     }
456 
457     /**
458      * @return The virtual area occupied by a indentation level.
459      */
460     protected int getIndentLength(int col) {
461         return _indentLength * col;
462     }
463 
464     /**
465      * Print the text formatted for a tag that does not place significance on
466      * multiple whitespace.  This will layout the text nicely for viewing, short
467      * text is placed inline while long text is formatted into a block. The
468      * threshhold is defined by _maxFormatLen.
469      * An example is docbook's &lt;para&gt;.
470      *
471      * @param value The text to print.
472      */
473     protected boolean printFormatted(String   value) throws IOException   {
474         if (value == null || value.length() == 0) {
475             return false;
476         }
477         value = formatParaText(value.trim());
478         int len = getIndentLength(_column) + value.length();
479         if (len > _maxFormatLen) {
480             println("");
481             println(value);
482             return true;
483         } else {
484             print(value);
485         }
486         return false;
487     }
488 
489     /**
490      * Print the text formatted for a tag that does not place significance on
491      * multiple whitespace.  This will layout the text into a block good for viewing.
492      * An example is docbook's &lt;para&gt;.
493      *
494      * @param text The text to print.
495      */
496     protected String   formatParaText(String   text) {
497         FastStringBuffer fsb = new FastStringBuffer(1024);
498 
499         //convert MSDOS crlf to a single space
500         text = StringUtil.replace(text, "\r\n", " ");
501         //convert lf to a single space now that crlf will not duplicate this
502         text = StringUtil.replace(text, "\n", " ");
503         //convert tab to a space
504         text = StringUtil.replace(text, "\t", " ");
505 
506         //get rid of duplicate spaces
507         int textlen = text.length();
508         do {
509             int oldlen = textlen;
510             text = StringUtil.replace(text, "  ", " "); //replace two spaces with one
511             textlen = text.length();
512             if (oldlen == textlen) {
513                 break;
514             }
515         } while (true);
516 
517         int charCount = 0;
518 
519         for (int i = 0; i < textlen; i++) {
520             char c = text.charAt(i);
521 
522             if (charCount >= _maxFormatLen) {
523                 if (Character.isWhitespace(c)) // break the line here
524                 {
525                     fsb.append(_newline);
526                 } else {
527                     fsb.append(c);
528 
529                     int j = i + 1;
530                     for (; j < textlen; j++) {
531                         c = text.charAt(j);
532                         if (Character.isWhitespace(c)) { // break the line here
533                             fsb.append(_newline);
534                             break;
535                         } else {
536                             fsb.append(c);
537                         }
538                     }
539                     i = j;
540                 }
541                 charCount = 0;
542             } else {
543                 fsb.append(c);
544                 charCount++;
545             }
546         }
547         return fsb.toString();
548     }
549 
550 
551     public static int run(String  [] args) {
552         org.apache.log4j.BasicConfigurator.configure();
553 
554         XMLPrinter printer = new DocBookPrinter();
555 
556         return run(args, printer);
557     }
558 
559     public static void main(String  [] args) {
560         System.exit(run(args));
561     }
562     
563     
564     //
565     // ErrorHandler methods
566     //
567     /**
568      * Issue a warning on parsing errors
569      *
570      * @param ex A Sax Parse Exception event
571      */
572     public void warning(SAXParseException   ex) {
573         log.warn(getLocationString(ex) + ": " + ex.getMessage());
574     }
575 
576     /**
577      * Issue an error
578      *
579      * @param ex A Sax Parse Exception event
580      */
581     public void error(SAXParseException   ex) {
582         log.error(getLocationString(ex) + ": " + ex.getMessage());
583     }
584 
585     /**
586      * Fatal error. Used Internally for parsing only
587      *
588      * @param ex A Sax Parse Exception event
589      * @throws SAXException after logging the Parsing Exception
590      */
591     public void fatalError(SAXParseException   ex)
592             throws SAXException   {
593         log.error(getLocationString(ex) + ": " + ex.getMessage());
594         throw ex;
595     }
596 
597     /**
598      * Returns a string of the location. Used Internally For Parsing Only
599      *
600      * @param ex A Sax Parse Exception event
601      * @return java.lang.String
602      */
603     private String   getLocationString(SAXParseException   ex) {
604         FastStringBuffer str = new FastStringBuffer(128);
605         String   systemId = ex.getSystemId();
606 
607         if (systemId != null) {
608             int index = systemId.lastIndexOf('/');
609 
610             if (index != -1) {
611                 systemId = systemId.substring(index + 1);
612             }
613 
614             str.append(systemId);
615         }
616 
617         str.append(':');
618         str.append(ex.getLineNumber());
619         str.append(':');
620         str.append(ex.getColumnNumber());
621 
622         return str.toString();
623     } // getLocationString(SAXParseException):String
624 }
625
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags