BaseDOMFormatter


1   /*
2    * Enhydra Java Application Server Project
3    * 
4    * The contents of this file are subject to the Enhydra Public License
5    * Version 1.1 (the "License"); you may not use this file except in
6    * compliance with the License. You may obtain a copy of the License on
7    * the Enhydra web site ( http://www.enhydra.org/ ).
8    * 
9    * Software distributed under the License is distributed on an "AS IS"
10   * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
11   * the License for the specific terms governing rights and limitations
12   * under the License.
13   * 
14   * The Initial Developer of the Enhydra Application Server is Lutris
15   * Technologies, Inc. The Enhydra Application Server and portions created
16   * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
17   * All Rights Reserved.
18   * 
19   * Contributor(s):
20   * 
21   * $Id: BaseDOMFormatter.java,v 1.6 2005/01/26 08:29:24 jkjome Exp $
22   */
23  
24  package org.enhydra.xml.io;
25  
26  import java.io.IOException  ;
27  import java.io.StringWriter  ;
28  import java.io.Writer  ;
29  
30  import org.enhydra.xml.dom.DOMAccess;
31  import org.enhydra.xml.dom.DOMError;
32  import org.enhydra.xml.dom.DOMOps;
33  import org.enhydra.xml.dom.DOMTraversal;
34  import org.enhydra.xml.xmlc.XMLObject;
35  import org.enhydra.xml.xmlc.XMLObjectLink;
36  import org.w3c.dom.Attr  ;
37  import org.w3c.dom.Comment  ;
38  import org.w3c.dom.Document  ;
39  import org.w3c.dom.DocumentType  ;
40  import org.w3c.dom.Element  ;
41  import org.w3c.dom.Entity  ;
42  import org.w3c.dom.EntityReference  ;
43  import org.w3c.dom.Node  ;
44  import org.w3c.dom.Notation  ;
45  import org.w3c.dom.Text  ;
46  import org.w3c.dom.html.HTMLDocument;
47  
48  
49  /**
50   * Base class with common functionally XML and HTML DOM formatting
51   * functionality.
52   */
53  abstract class BaseDOMFormatter implements Formatter, DOMTraversal.Handler {
54      // N.B. A lot of fields are made final to aid in optimization.
55  
56      /**
57       * Maximum character value in an entity quick-check table.
58       */
59      protected static final int MAX_ENTITY_QUICK_CHECK_CHAR = 0x7f;
60  
61      /**
62       * Quote character to use for attribute values.
63       */
64      protected static final char ATTR_QUOTE_CHAR = '"';
65  
66      /**
67       * Entity reference for attribute value quote character.
68       */
69      protected static final String   ATTR_QUOTE_CHAR_ENTITY_REF = "#34";
70  
71      /**
72       * The output options.
73       */
74      protected final OutputOptions fOptions;
75  
76      /**
77       * Output writer.
78       */
79      protected Writer   fOut;
80  
81      /**
82       * DOM traverser.
83       */
84      protected final DOMTraversal fTraverser;
85  
86      /**
87       * Should character entity references be omitted from attribute values?
88       */
89      private final boolean fOmitAttributeCharEntityRefs;
90  
91      /**
92       * Are we doing pre-formatting?  
93       */
94      protected final boolean fPreFormatMode;
95  
96      /**
97       * Should pre-formatted text be written.  This is only enabled if the
98       * document implements PreFormattedTextDocument and the preformatted
99       * encoding matches the output encoding.  This option only controls
100      * text nodes that are not children of attributes.
101      */
102     protected final boolean fUsePreFormattedText;
103 
104     /**
105      * Should pre-formatted text be use for descendents of attribute
106      * nodes.  A seperate flag is required to support the
107      * omitAttributeCharEntityRefs option.
108      */
109     protected final boolean fUsePreFormattedAttrText;
110 
111     /**
112      * Should pre-formatted Elements be written.  This is the same as
113      * fUsePreFormattedText, unless a URLRewriter is present, it which
114      * cases its false, since we need to check the attributes for URLs.
115      */
116     protected final boolean fUsePreFormattedElements;
117 
118     /**
119      * Count of preformatted text nodes that were written.
120      */
121     private int fPreFormattedTextCount;
122 
123     /**
124      * Count of text nodes that were written not using preformatted text.
125      */
126     private int fDynamicFormattedTextCount;
127 
128     /**
129      * Count of preformatted element open tags that were written.
130      */
131     protected int fPreFormattedElementCount;
132 
133     /**
134      * Count of element open tags that were written not using preformatted
135      * text.
136      */
137     protected int fDynamicFormattedElementCount;
138 
139     /**
140      * Are we currently outputting an attribute or its children.
141      */
142     private boolean fProcessingAttr;
143 
144     /**
145      * Are we pretty-printing?
146      */
147     protected final boolean fPrettyPrinting;
148     
149     /**
150      * Pretty-printing indent size.
151      */
152     private final int fIndentSize;
153 
154     /**
155      * The encoding for the current document.
156      */
157     private final String   fEncoding;
158 
159     /**
160      * The maximum value of an unicode character in the document's encoding.
161      */
162     private final int fMaxCharacterValue;
163 
164     /**
165      * The character set object for the encoding.
166      */
167     private final CharacterSet fCharSet;
168 
169     /**
170      * Object to do URL value rewriting, or null if URL rewriting is
171      * not being done.
172      */
173     private final URLRewriter fURLRewriter;
174 
175     /**
176      * Document being formatted.
177      */
178     protected final Document   fDocument;
179     
180     /**
181      * DocumentType from document.
182      */
183     protected final DocumentType   fDocType;
184     
185     /**
186      * Public id to use for DOCTYPE, or null if none.
187      */
188     protected final String   fPublicId;
189 
190     /**
191      * System id to use for DOCTYPE, or null if none.
192      */
193     protected final String   fSystemId;
194 
195     /**
196      * If the document implements DocumentInfo, this is set.  It is
197      * used by URL rewriting.
198      */
199     private final DocumentInfo fDocInfo;
200 
201     /**
202      * Entity quick-check table, indexed by character value, indicating if the
203      * character must be represented as an character entity reference.
204      */
205     private final boolean[] fEntityQuickCheck;
206 
207     /**
208      * Platform line separator.
209      */
210     private static final String   fNewLine;
211 
212     /**
213      * Static string that is used as a quick way to get a certain indent
214      * level.  Indent strings are generated by taking substrings of this
215      * string.  This string is grown as needed.
216      */
217     private static String   fIndentSource
218         = "                                                               ";
219 
220     /**
221      * Static initializer.
222      */
223     static {
224         fNewLine = System.getProperty("line.separator");
225         if (fNewLine == null) {
226             throw new XMLIOError("System property line.separator not found");
227         }
228     }
229 
230     /**
231      * Check if preformatted text might be used for this document.
232      * 
233      * @return The output options that were used for preformatting or null
234      *  if preformatted text can be used for some reason.
235      */
236     private static OutputOptions checkUsePreformatting(Document   doc,
237                                                        String   defaultEncoding,
238                                                        CharacterSet charSet) {
239         // Document must implement PreFormattedTextDocument
240         if (!(doc instanceof PreFormattedTextDocument)) {
241             return null;
242         }
243         
244         // OutputOptions used for preformatting must be available
245         OutputOptions pfOptions = ((PreFormattedTextDocument)doc).getPreFormatOutputOptions();
246         if (pfOptions == null) {
247             return null;
248         }
249 
250         // Make sure that the two encodings are compatible in terms of which
251         // characters are converted to character entity references.
252         String   preFormatEncoding = pfOptions.getMIMEEncoding();
253         if (preFormatEncoding == null) {
254             preFormatEncoding = defaultEncoding;
255         }
256         Encodings encodings = Encodings.getEncodings();
257         CharacterSet preFormatCharSet
258             = encodings.getCharacterSet(preFormatEncoding);
259         if (!charSet.isCompatible(preFormatCharSet)) {
260             return null;  // not compatible
261         }
262         return pfOptions;
263     }
264 
265     /**
266      * If preformatting is enabled, determine if text nodes should 
267      * use preformatted text.
268      */
269     private static boolean checkUsePreformattedText(OutputOptions options,
270                                                     OutputOptions pfOptions) {
271         // Only pretty-printing must be the same
272         return (pfOptions.getIndentSize() == options.getIndentSize())
273             && (pfOptions.getPrettyPrinting() == options.getPrettyPrinting());
274     }
275 
276     /**
277      * If preformatting is enabled, determine if attribute value text nodes
278      * should use preformatted text.
279      */
280     private static boolean checkUsePreformattedAttrText(OutputOptions options,
281                                                         OutputOptions pfOptions) {
282         return (pfOptions.getOmitAttributeCharEntityRefs() == options.getOmitAttributeCharEntityRefs());
283     }
284 
285     /**
286      * If preformatting is enabled, determine if preformatted element open
287      * tags should be used.
288      */
289     private static boolean checkUsePreformattedElements(OutputOptions options,
290                                                         OutputOptions pfOptions,
291                                                         URLRewriter urlRewriter) {
292         return (pfOptions.getDropHtmlSpanIds() == options.getDropHtmlSpanIds())
293             && (pfOptions.getOmitAttributeCharEntityRefs() == options.getOmitAttributeCharEntityRefs())
294             && (urlRewriter == null);
295     }
296 
297     /**
298      * Get the DocumentInfo object, if available.
299      */
300     private static DocumentInfo findDocumentInfo(Document   document) {
301         if (document instanceof DocumentInfo) {
302             return (DocumentInfo)document;
303         } else if (document instanceof XMLObjectLink) {
304             // Go back to the XMLObject if possible.
305             return (DocumentInfo)((XMLObjectLink)document).getXMLObject();
306         } else if (document instanceof HTMLDocument) {
307             return new DocumentInfo () {
308             public boolean isURLAttribute(Element   element,
309                           String   attrName) {
310             return org.enhydra.xml.xmlc.dom.HTMLDomFactoryMethods.isURLAttribute(element, attrName);
311             }
312 
313         };
314         } else {
315         return null;
316     }
317     }
318 
319     /**
320      * Get the encoding to use. If one is not explictly specified in
321      * output options, see if one can be obtained from the document.
322      * If that fails, use the default for this type of document.
323      */
324     private static String   getEncoding(Document   document,
325                                       OutputOptions outputOptions,
326                                       String   defaultEncoding) {
327         String   encoding = outputOptions.getMIMEEncoding();
328         if (encoding == null) {
329             // Try getting from the document
330             if (document instanceof XMLObject) {
331                 encoding = ((XMLObject)document).getEncoding();
332             } else if (document instanceof XMLObjectLink) {
333                 encoding = ((XMLObjectLink)document).getXMLObject().getEncoding();
334             }
335             if (encoding == null) {
336                 // ok, must use default for XML or HTML
337                 encoding = defaultEncoding;
338             }
339         }
340         return encoding;
341     }
342 
343     /**
344      * Constructor.
345      *
346      * @param node Any node of the document that this formatter will be
347      *  associated with.  This can also be an XMLC Document object (XMLObject).
348      * @param options The output options.
349      * @param defaultEncoding The default encoding for this format.
350      * @param forPreFormatting Is this going to be used for preformatting?
351      * @param entityQuickCheck Document-type specific table that provides
352      *  a quick check of the need to encode that character as a character
353      *  entity reference.  This table MUST include the double-quote character,
354      *  as it it used to quote attribute values.
355      */
356     protected BaseDOMFormatter(Node   node,
357                                OutputOptions outputOptions,
358                                boolean forPreFormatting,
359                                String   defaultEncoding,
360                                boolean[] entityQuickCheck) {
361         fDocument = DOMOps.getDocument(node);
362         fDocType = DOMAccess.accessDocumentType(fDocument);
363         fOptions = outputOptions;
364         fTraverser = DOMTraversal.getTraverser(this, 0, node);
365 
366         // Get DOCTYPE information (and overrides from OutputOptions)
367         String   publicId = fOptions.getPublicId();
368         String   systemId = fOptions.getSystemId();
369         if (fDocType != null) {
370             if (fDocType.getPublicId() != null) {
371                 publicId = fDocType.getPublicId();
372             }
373             if (fDocType.getSystemId() != null) {
374                 systemId = fDocType.getSystemId();
375             }
376         }
377         fPublicId = publicId;
378         fSystemId = systemId;
379 
380         // Initialize the encoding information.
381         fEncoding = getEncoding(fDocument, outputOptions, defaultEncoding);
382         Encodings encodings = Encodings.getEncodings();
383         fMaxCharacterValue = encodings.getMaxCharacterValue(fEncoding);
384         fCharSet = encodings.getCharacterSet(fEncoding);
385 
386         // Setup URL rewriting, which needs DocumentInfo
387         fDocInfo = findDocumentInfo(fDocument);
388         fURLRewriter = (fDocInfo != null) ? fOptions.getURLRewriter() : null;
389 
390         // Various options from output options.
391         fIndentSize = fOptions.getIndentSize();
392         fPrettyPrinting = (fIndentSize > 0) && fOptions.getPrettyPrinting();
393         fOmitAttributeCharEntityRefs = fOptions.getOmitAttributeCharEntityRefs();
394 
395         // Initialize preformatted text options.  This is all rather tricky.
396         OutputOptions pfOptions
397             = checkUsePreformatting(fDocument, defaultEncoding, fCharSet);
398 
399         if ((pfOptions != null) && (!forPreFormatting)) {
400             fUsePreFormattedText = checkUsePreformattedText(fOptions, pfOptions);
401             fUsePreFormattedAttrText = checkUsePreformattedAttrText(fOptions, pfOptions);
402             fUsePreFormattedElements = checkUsePreformattedElements(fOptions, pfOptions,
403                                                                     fURLRewriter);
404         } else {
405             fUsePreFormattedText = false;
406             fUsePreFormattedAttrText = false;
407             fUsePreFormattedElements = false;
408         }
409         fPreFormatMode = forPreFormatting;
410 
411         // Entity handling
412         fEntityQuickCheck = entityQuickCheck;
413         if (!fOptions.getUseAposEntity())
414             fEntityQuickCheck['\''] = false;
415 
416         // initialize for pre-formatting.
417         if (forPreFormatting) {
418             fOut = new StringWriter  (4096); // bigger than default
419         }
420     }
421 
422     /**
423      * @see Formatter#getMIMEEncoding
424      */
425     public final String   getMIMEEncoding() {
426         return fEncoding;
427     }
428 
429     /**
430      * @see Formatter#usedPreFormattedText
431      */
432     public boolean usedPreFormattedText() {
433         return fUsePreFormattedText;
434     }
435 
436     /**
437      * Get the count of preformatted text nodes that were written.
438      */
439     public final int getPreFormattedTextCount() {
440         return fPreFormattedTextCount;
441     }
442 
443     /**
444      * Get the count of text nodes that were written not using
445      * preformatted text.
446      */
447     public final int getDynamicFormattedTextCount() {
448         return fDynamicFormattedTextCount;
449     }
450 
451     /**
452      * @see Formatter#usedPreFormattedElements
453      */
454     public final boolean usedPreFormattedElements() {
455         return fUsePreFormattedElements;
456     }
457 
458     /**
459      * Get the count of preformatted element open tags that were written.
460      */
461     public final int getPreFormattedElementCount() {
462         return fPreFormattedElementCount;
463     }
464 
465     /**
466      * Get the count of element open tags that were written not using
467      * preformatted text.
468      */
469     public final int getDynamicFormattedElementCount() {
470         return fDynamicFormattedElementCount;
471     }
472 
473     /**
474      * Write a newline
475      */
476     protected final void writeln() throws IOException   {
477         fOut.write(fNewLine);
478     }
479 
480     /**
481      * Grow indentation source string to at least the specified size
482      * if needed.
483      */
484     private static void ensureIndentSource(int numChars) {
485         // Grow as needed; coded to avoid sync
486         while (fIndentSource.length() < numChars) {
487             fIndentSource += fIndentSource;
488         }
489     }
490 
491     /**
492      * Print indentation to the current level.
493      */
494     protected final void printIndent() throws IOException   {
495         if (fPrettyPrinting) {
496             int indent = (fTraverser.getDepth() - 1) * fIndentSize;
497             ensureIndentSource(indent);
498             fOut.write(fIndentSource, 0, indent);
499         } // end of if ()
500     }
501 
502     /**
503      * Get a character entity name for a character.
504      * This is the slow-path, so its ok this is an abstract method call.
505      * @return The character entity name, or null if this character doesn't
506      *  have one.
507      */
508     abstract protected String   getCharacterEntity(char textChar);
509 
510     /**
511      * Output a character, possibly substituting a character entity reference
512      * or a numeric entity. This is the slow-path method.
513      */
514     private void writeCharacter(char textChar) throws IOException   {
515         // Determine the entity to substitute; always do quote, even if substitute disabled,
516         // as this is only implemented for attribute values.
517         String   entity;
518         if (fProcessingAttr && fOmitAttributeCharEntityRefs) {
519             entity = (textChar == ATTR_QUOTE_CHAR) ? ATTR_QUOTE_CHAR_ENTITY_REF : null;
520         } else {
521             entity = getCharacterEntity(textChar);
522         }
523 
524         // FIXME: need to review impact of isValid call on performance.
525         // this use to be a simple compare.
526         if (entity != null) {
527             fOut.write('&');
528             fOut.write(entity);
529             fOut.write(';');
530         } else if (!fCharSet.isValid(textChar)) {
531             fOut.write("&#");
532             fOut.write(Integer.toString(textChar));
533             fOut.write(';');
534         } else {
535             fOut.write(textChar);
536         }
537     }
538 
539     /**
540      * Write a text string, encoding document type-specific character entities.
541      * This is an expensive procedure and has been carefully hand optimized.
542      */
543     protected final void writeText(String   text) throws IOException   {
544         if (text == null) return;
545         int len = text.length();
546         char ch;
547 
548         // FIXME: here we make a nasty assumption about all characters
549         // less than fMaxCharacterValue && MAX_ENTITY_QUICK_CHECK_CHAR 
550         // are valid. This is probably ok.  The fMaxCharacterValue check
551         // is needed for 7-bit encodings.
552         for (int idx = 0; idx < len; idx++) {
553             ch = text.charAt(idx);
554             if ((ch <= fMaxCharacterValue)
555                 && (ch <= MAX_ENTITY_QUICK_CHECK_CHAR)
556                 && (!fEntityQuickCheck[ch])) {
557                 fOut.write(ch);  // Fast path.
558             } else {
559                 writeCharacter(ch);
560             }
561         }
562     }
563 
564     /**
565      * Handler called for Entity nodes; should never be called.
566      * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleEntity
567      */
568     public final void handleEntity(Entity   entity) {
569         throw new XMLIOError("Unexpected call to handleEntity");
570     }
571 
572     /**
573      * Handler called for Notation nodes; should never be called.
574      * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleNotation
575      */
576     public final void handleNotation(Notation   notation) {
577         throw new XMLIOError("Unexpected call to handleNotation");
578     }
579 
580     /**
581      * Handler called for EntityReference nodes.
582      * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleEntityReference
583      */
584     public final void handleEntityReference(EntityReference   entityRef) throws IOException   {
585         fOut.write('&');
586         fOut.write(entityRef.getNodeName());
587         fOut.write(';');
588     }
589 
590     /**
591      * Handler called for Comment nodes.
592      * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleComment
593      */
594     public final void handleComment(Comment   comment) throws IOException   {
595         fOut.write("<!--");
596         fOut.write(comment.getData());
597         fOut.write("-->");
598     }
599 
600     /**
601      * Handler called for Text nodes.
602      * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleText
603      */
604     public void handleText(Text   text) throws IOException   {
605         String   preformattedData = null;
606         if (fProcessingAttr) {
607             if (fUsePreFormattedAttrText && (text instanceof PreFormattedText)) {
608                 preformattedData = ((PreFormattedText)text).getPreFormattedText();
609             }
610         } else {
611             if (fUsePreFormattedText && (text instanceof PreFormattedText)) {
612                 preformattedData = ((PreFormattedText)text).getPreFormattedText();
613             }
614         }
615 
616         if (preformattedData != null) {
617             // Have preformatted data that can be used.
618             fOut.write(preformattedData);
619             fPreFormattedTextCount++;
620         } else {
621             // No preformatted text.
622             writeText(text.getData());
623             fDynamicFormattedTextCount++;
624         }
625     }
626 
627     /**
628      * Write an attribute value.  Convert characters to character entity
629      * references as needed.
630      */
631     protected final void writeAttributeValue(Attr   attr) throws IOException   {
632         fProcessingAttr = true;
633 
634         fOut.write('=');
635         fOut.write(ATTR_QUOTE_CHAR);
636 
637         // Are we doing URL rewritting and is this a URL attr?
638         if ((fURLRewriter != null)
639             && fDocInfo.isURLAttribute(attr.getOwnerElement(),
640                                        attr.getName())) {
641             String   value = fURLRewriter.rewriteURL(attr.getValue());
642             writeText(value);
643         } else {
644             fTraverser.processChildren(attr);
645         }
646         fOut.write(ATTR_QUOTE_CHAR);
647 
648         // Don't need to reset on error, as its reset at the beginning
649         // of each write.
650         fProcessingAttr = false;
651     }
652 
653     /**
654      * Method to write an open tag, including attributes.  Children
655      * are not processed.  This is normally called by the derived class
656      * handleElement method, but its hear to allow for use by preformatter.
657      */
658     abstract protected void writeOpenTag(Element   element, 
659                                          String   tagName,
660                                          boolean hasChildren) throws IOException  ;
661 
662     /**
663      * Preformat an element.
664      */
665     private String   preFormatElement(Element   element) throws IOException   {
666         writeOpenTag(element, element.getTagName(), element.hasChildNodes());
667         return ((StringWriter  )fOut).getBuffer().toString();
668     }
669 
670     /**
671      * Recursively determine if a node is a child of an attribute.
672      */
673     private boolean isAttributeChild(Node   node) {
674         if (node == null) {
675             return false;
676         }
677         switch (node.getNodeType()) {
678         case Node.ELEMENT_NODE:
679             return false;
680         case Node.ATTRIBUTE_NODE:
681             return true;
682         default:
683             return isAttributeChild(node.getParentNode());
684         }
685     }
686 
687     /**
688      * Preformat a text node.  The preformatted text is returned even if
689      * its the same.
690      */
691     private String   preFormatText(Text   text) throws IOException   {
692         fProcessingAttr = isAttributeChild(text);
693         try {
694             handleText(text);
695         } finally {
696             fProcessingAttr = false;
697         }
698         return ((StringWriter  )fOut).getBuffer().toString();
699     }
700 
701     /**
702      * @see Formatter#preFormatNode
703      */
704     public final String   preFormatNode(Node   node) {
705         fProcessingAttr = false;
706         try {
707             ((StringWriter  )fOut).getBuffer().setLength(0); // Clear buffer
708             
709             switch (node.getNodeType()) {
710             case Node.ELEMENT_NODE:
711                 return preFormatElement((Element  )node);
712             case Node.TEXT_NODE:
713                 return preFormatText((Text  )node);
714             }
715             return null;
716         } catch (IOException   except) {
717             // Should never happen.
718             throw new XMLIOError(except);
719         }
720     }
721 
722     /**
723      * Format a Node and children to the specified writer.
724      * @see Formatter#write
725      */
726     public final void write(Node   node,
727                             Writer   writer) throws IOException   {
728         try {
729             fOut = writer;
730             fProcessingAttr = false;
731             fTraverser.traverse(node);
732 //csc_040604_1 - this really causes problems if you are using the DOMWriter repeatedly, to write chunks of the document at a
733 //               time, rather than the whole thing at once. By putting a writeln() here, it ends up splitting lines wherever
734 //               the chunk finishes, and if that's in between spans surrounding text nodes, it can adversely effect the output
735 //               that gets displayed. Consequently, we really need to remove this writeln. What this means is that there will
736 //               no longer be a final CR/LF at the end of reports, but I don't think that will actually cause any problems
737 //csc_040604_1            writeln();
738         } catch (DOMError error) {
739             // Rethrow IOExceptions
740             Throwable   cause = error.getCause();
741             if (cause instanceof IOException  ) {
742                 throw (IOException  )cause;
743             } else {
744                 throw error;
745             }
746         }
747     }
748 }
749
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags