KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jdom > output > Format


1 /*--
2
3  $Id: Format.java,v 1.10 2004/09/07 06:37:20 jhunter Exp $
4
5  Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
6  All rights reserved.
7
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11
12  1. Redistributions of source code must retain the above copyright
13     notice, this list of conditions, and the following disclaimer.
14
15  2. Redistributions in binary form must reproduce the above copyright
16     notice, this list of conditions, and the disclaimer that follows
17     these conditions in the documentation and/or other materials
18     provided with the distribution.
19
20  3. The name "JDOM" must not be used to endorse or promote products
21     derived from this software without prior written permission. For
22     written permission, please contact <request_AT_jdom_DOT_org>.
23
24  4. Products derived from this software may not be called "JDOM", nor
25     may "JDOM" appear in their name, without prior written permission
26     from the JDOM Project Management <request_AT_jdom_DOT_org>.
27
28  In addition, we request (but do not require) that you include in the
29  end-user documentation provided with the redistribution and/or in the
30  software itself an acknowledgement equivalent to the following:
31      "This product includes software developed by the
32       JDOM Project (http://www.jdom.org/)."
33  Alternatively, the acknowledgment may be graphical using the logos
34  available at http://www.jdom.org/images/logos.
35
36  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
40  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  SUCH DAMAGE.
48
49  This software consists of voluntary contributions made by many
50  individuals on behalf of the JDOM Project and was originally
51  created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
52  Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
53  on the JDOM Project, please see <http://www.jdom.org/>.
54
55  */

56
57 package org.jdom.output;
58
59 import java.lang.reflect.Method JavaDoc;
60
61 /**
62  * Class to encapsulate XMLOutputter format options.
63  * Typical users can use the standard format configurations obtained by
64  * {@link #getRawFormat} (no whitespace changes),
65  * {@link #getPrettyFormat} (whitespace beautification), and
66  * {@link #getCompactFormat} (whitespace normalization).
67  * <p>
68  * Several modes are available to effect the way textual content is printed.
69  * See the documentation for {@link TextMode} for details.
70  *
71  * @version $Revision: 1.10 $, $Date: 2004/09/07 06:37:20 $
72  * @author Jason Hunter
73  */

74 public class Format implements Cloneable JavaDoc {
75
76     private static final String JavaDoc CVS_ID =
77             "@(#) $RCSfile: Format.java,v $ $Revision: 1.10 $ $Date: 2004/09/07 06:37:20 $ $Name: $";
78
79     /**
80      * Returns a new Format object that performs no whitespace changes, uses
81      * the UTF-8 encoding, doesn't expand empty elements, includes the
82      * declaration and encoding, and uses the default entity escape strategy.
83      * Tweaks can be made to the returned Format instance without affecting
84      * other instances.
85
86      * @return a Format with no whitespace changes
87      */

88     public static Format getRawFormat() {
89         return new Format();
90     }
91
92     /**
93      * Returns a new Format object that performs whitespace beautification with
94      * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
95      * includes the declaration and encoding, and uses the default entity
96      * escape strategy.
97      * Tweaks can be made to the returned Format instance without affecting
98      * other instances.
99      *
100      * @return a Format with whitespace beautification
101      */

102     public static Format getPrettyFormat() {
103         Format f = new Format();
104         f.setIndent(STANDARD_INDENT);
105         f.setTextMode(TextMode.TRIM);
106         return f;
107     }
108
109     /**
110      * Returns a new Format object that performs whitespace normalization, uses
111      * the UTF-8 encoding, doesn't expand empty elements, includes the
112      * declaration and encoding, and uses the default entity escape strategy.
113      * Tweaks can be made to the returned Format instance without affecting
114      * other instances.
115      *
116      * @return a Format with whitespace normalization
117      */

118     public static Format getCompactFormat() {
119         Format f = new Format();
120         f.setTextMode(TextMode.NORMALIZE);
121         return f;
122     }
123
124     /** standard value to indent by, if we are indenting */
125     private static final String JavaDoc STANDARD_INDENT = " ";
126
127     /** standard string with which to end a line */
128     private static final String JavaDoc STANDARD_LINE_SEPARATOR = "\r\n";
129
130     /** standard encoding */
131     private static final String JavaDoc STANDARD_ENCODING = "UTF-8";
132
133
134     /** The default indent is no spaces (as original document) */
135     String JavaDoc indent = null;
136
137     /** New line separator */
138     String JavaDoc lineSeparator = STANDARD_LINE_SEPARATOR;
139
140     /** The encoding format */
141     String JavaDoc encoding = STANDARD_ENCODING;
142
143     /** Whether or not to output the XML declaration
144      * - default is <code>false</code> */

145     boolean omitDeclaration = false;
146
147     /** Whether or not to output the encoding in the XML declaration
148      * - default is <code>false</code> */

149     boolean omitEncoding = false;
150
151     /** Whether or not to expand empty elements to
152      * &lt;tagName&gt;&lt;/tagName&gt; - default is <code>false</code> */

153     boolean expandEmptyElements = false;
154
155     /** Whether TrAX output escaping disabling/enabling PIs are ignored
156       * or processed - default is <code>false</code> */

157     boolean ignoreTrAXEscapingPIs = false;
158
159     /** text handling mode */
160     TextMode mode = TextMode.PRESERVE;
161
162     /** entity escape logic */
163     EscapeStrategy escapeStrategy = new DefaultEscapeStrategy(encoding);
164
165     /**
166      * Creates a new Format instance with default (raw) behavior.
167      */

168     private Format() { }
169
170     /**
171      * Sets the {@link EscapeStrategy} to use for character escaping.
172      *
173      * @param strategy the EscapeStrategy to use
174      * @return a pointer to this Format for chaining
175      */

176     public Format setEscapeStrategy(EscapeStrategy strategy) {
177         escapeStrategy = strategy;
178         return this;
179     }
180
181     /**
182      * Returns the current escape strategy
183      *
184      * @return the current escape strategy
185      */

186     public EscapeStrategy getEscapeStrategy() {
187         return escapeStrategy;
188     }
189
190     /**
191      * This will set the newline separator (<code>lineSeparator</code>).
192      * The default is <code>\r\n</code>. Note that if the "newlines"
193      * property is false, this value is irrelevant. To make it output
194      * the system default line ending string, call
195      * <code>setLineSeparator(System.getProperty("line.separator"))</code>
196      *
197      * <p>
198      * To output "UNIX-style" documents, call
199      * <code>setLineSeparator("\n")</code>. To output "Mac-style"
200      * documents, call <code>setLineSeparator("\r")</code>. DOS-style
201      * documents use CR-LF ("\r\n"), which is the default.
202      * </p>
203      *
204      * <p>
205      * Note that this only applies to newlines generated by the
206      * outputter. If you parse an XML document that contains newlines
207      * embedded inside a text node, and you do not set TextMode.NORMALIZE,
208      * then the newlines will be output
209      * verbatim, as "\n" which is how parsers normalize them.
210      * </p>
211      *
212      * @see #setTextMode
213      *
214      * @param separator <code>String</code> line separator to use.
215      * @return a pointer to this Format for chaining
216      */

217     public Format setLineSeparator(String JavaDoc separator) {
218         this.lineSeparator = separator;
219         return this;
220     }
221
222     /**
223      * Returns the current line separator.
224      *
225      * @return the current line separator
226      */

227     public String JavaDoc getLineSeparator() {
228         return lineSeparator;
229     }
230
231     /**
232      * This will set whether the XML declaration
233      * (<code>&lt;&#063;xml version="1&#046;0"
234      * encoding="UTF-8"&#063;&gt;</code>)
235      * includes the encoding of the document. It is common to omit
236      * this in uses such as WML and other wireless device protocols.
237      *
238      * @param omitEncoding <code>boolean</code> indicating whether or not
239      * the XML declaration should indicate the document encoding.
240      * @return a pointer to this Format for chaining
241      */

242     public Format setOmitEncoding(boolean omitEncoding) {
243         this.omitEncoding = omitEncoding;
244         return this;
245     }
246
247     /**
248      * Returns whether the XML declaration encoding will be omitted.
249      *
250      * @return whether the XML declaration encoding will be omitted
251      */

252     public boolean getOmitEncoding() {
253         return omitEncoding;
254     }
255
256     /**
257      * This will set whether the XML declaration
258      * (<code>&lt;&#063;xml version="1&#046;0"&#063;gt;</code>)
259      * will be omitted or not. It is common to omit this in uses such
260      * as SOAP and XML-RPC calls.
261      *
262      * @param omitDeclaration <code>boolean</code> indicating whether or not
263      * the XML declaration should be omitted.
264      * @return a pointer to this Format for chaining
265      */

266     public Format setOmitDeclaration(boolean omitDeclaration) {
267         this.omitDeclaration = omitDeclaration;
268         return this;
269     }
270
271     /**
272      * Returns whether the XML declaration will be omitted.
273      *
274      * @return whether the XML declaration will be omitted
275      */

276     public boolean getOmitDeclaration() {
277         return omitDeclaration;
278     }
279
280     /**
281      * This will set whether empty elements are expanded from
282      * <code>&lt;tagName/&gt;</code> to
283      * <code>&lt;tagName&gt;&lt;/tagName&gt;</code>.
284      *
285      * @param expandEmptyElements <code>boolean</code> indicating whether or not
286      * empty elements should be expanded.
287      * @return a pointer to this Format for chaining
288      */

289     public Format setExpandEmptyElements(boolean expandEmptyElements) {
290         this.expandEmptyElements = expandEmptyElements;
291         return this;
292     }
293
294     /**
295      * Returns whether empty elements are expanded.
296      *
297      * @return whether empty elements are expanded
298      */

299     public boolean getExpandEmptyElements() {
300         return expandEmptyElements;
301     }
302
303     /**
304      * This will set whether JAXP TrAX processing instructions for
305      * disabling/enabling output escaping are ignored. Disabling
306      * output escaping allows using XML text as element content and
307      * outputing it verbatim, i&#46;e&#46; as element children would be.
308      * <p>
309      * When processed, these processing instructions are removed from
310      * the generated XML text and control whether the element text
311      * content is output verbatim or with escaping of the pre-defined
312      * entities in XML 1.0. The text to be output verbatim shall be
313      * surrounded by the
314      * <code>&lt;?javax.xml.transform.disable-output-escaping ?&gt;</code>
315      * and <code>&lt;?javax.xml.transform.enable-output-escaping ?&gt;</code>
316      * PIs.</p>
317      * <p>
318      * When ignored, the processing instructions are present in the
319      * generated XML text and the pre-defined entities in XML 1.0 are
320      * escaped.
321      * <p>
322      * Default: <code>false</code>.</p>
323      *
324      * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
325      * whether or not TrAX ouput escaping PIs are ignored.
326      *
327      * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
328      * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
329      */

330     public void setIgnoreTrAXEscapingPIs(boolean ignoreTrAXEscapingPIs) {
331         this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
332     }
333
334     /**
335      * Returns whether JAXP TrAX processing instructions for
336      * disabling/enabling output escaping are ignored.
337      *
338      * @return whether or not TrAX ouput escaping PIs are ignored.
339      */

340     public boolean getIgnoreTrAXEscapingPIs() {
341         return ignoreTrAXEscapingPIs;
342     }
343
344     /**
345      * This sets the text output style. Options are available as static
346      * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}.
347      *
348      * @return a pointer to this Format for chaining
349      */

350     public Format setTextMode(Format.TextMode mode) {
351         this.mode = mode;
352         return this;
353     }
354
355     /**
356      * Returns the current text output style.
357      *
358      * @return the current text output style
359      */

360     public Format.TextMode getTextMode() {
361         return mode;
362     }
363
364     /**
365      * This will set the indent <code>String</code> to use; this
366      * is usually a <code>String</code> of empty spaces. If you pass
367      * null, or the empty string (""), then no indentation will
368      * happen. Default: none (null)
369      *
370      * @param indent <code>String</code> to use for indentation.
371      * @return a pointer to this Format for chaining
372      */

373     public Format setIndent(String JavaDoc indent) {
374         // if passed the empty string, change it to null, for marginal
375
// performance gains later (can compare to null first instead
376
// of calling equals())
377
if ("".equals(indent)) {
378             indent = null;
379         }
380         this.indent = indent;
381         return this;
382     }
383
384     /**
385      * Returns the indent string in use.
386      *
387      * @return the indent string in use
388      */

389     public String JavaDoc getIndent() {
390         return indent;
391     }
392
393     /**
394      * Sets the output encoding. The name should be an accepted XML
395      * encoding.
396      *
397      * @param encoding the encoding format. Use XML-style names like
398      * "UTF-8" or "ISO-8859-1" or "US-ASCII"
399      * @return a pointer to this Format for chaining
400      */

401     public Format setEncoding(String JavaDoc encoding) {
402         this.encoding = encoding;
403         escapeStrategy = new DefaultEscapeStrategy(encoding);
404         return this;
405     }
406
407     /**
408      * Returns the configured output encoding.
409      *
410      * @return the output encoding
411      */

412     public String JavaDoc getEncoding() {
413         return encoding;
414     }
415
416     protected Object JavaDoc clone() {
417         Format format = null;
418
419         try {
420             format = (Format) super.clone();
421         }
422         catch (CloneNotSupportedException JavaDoc ce) {
423         }
424
425         return format;
426     }
427
428
429     /**
430      * Handle common charsets quickly and easily. Use reflection
431      * to query the JDK 1.4 CharsetEncoder class for unknown charsets.
432      * If JDK 1.4 isn't around, default to no special encoding.
433      */

434     class DefaultEscapeStrategy implements EscapeStrategy {
435         private int bits;
436         Object JavaDoc encoder;
437         Method JavaDoc canEncode;
438
439         public DefaultEscapeStrategy(String JavaDoc encoding) {
440             if ("UTF-8".equalsIgnoreCase(encoding) ||
441                     "UTF-16".equalsIgnoreCase(encoding)) {
442                 bits = 16;
443             }
444             else if ("ISO-8859-1".equalsIgnoreCase(encoding) ||
445                     "Latin1".equalsIgnoreCase(encoding)) {
446                 bits = 8;
447             }
448             else if ("US-ASCII".equalsIgnoreCase(encoding) ||
449                     "ASCII".equalsIgnoreCase(encoding)) {
450                 bits = 7;
451             }
452             else {
453                 bits = 0;
454                 //encoder = Charset.forName(encoding).newEncoder();
455
try {
456                     Class JavaDoc charsetClass = Class.forName("java.nio.charset.Charset");
457                     Class JavaDoc encoderClass = Class.forName("java.nio.charset.CharsetEncoder");
458                     Method JavaDoc forName = charsetClass.getMethod("forName", new Class JavaDoc[]{String JavaDoc.class});
459                     Object JavaDoc charsetObj = forName.invoke(null, new Object JavaDoc[]{encoding});
460                     Method JavaDoc newEncoder = charsetClass.getMethod("newEncoder", null);
461                     encoder = newEncoder.invoke(charsetObj, null);
462                     canEncode = encoderClass.getMethod("canEncode", new Class JavaDoc[]{char.class});
463                 }
464                 catch (Exception JavaDoc ignored) {
465                 }
466             }
467         }
468
469         public boolean shouldEscape(char ch) {
470             if (bits == 16) {
471                 return false;
472             }
473             if (bits == 8) {
474                 if ((int) ch > 255)
475                     return true;
476                 else
477                     return false;
478             }
479             if (bits == 7) {
480                 if ((int) ch > 127)
481                     return true;
482                 else
483                     return false;
484             }
485             else {
486                 if (canEncode != null && encoder != null) {
487                     try {
488                         Boolean JavaDoc val = (Boolean JavaDoc) canEncode.invoke(encoder, new Object JavaDoc[]{new Character JavaDoc(ch)});
489                         return !val.booleanValue();
490                     }
491                     catch (Exception JavaDoc ignored) {
492                     }
493                 }
494                 // Return false if we don't know. This risks not escaping
495
// things which should be escaped, but also means people won't
496
// start getting loads of unnecessary escapes.
497
return false;
498             }
499         }
500     }
501
502
503     /**
504      * Class to signify how text should be handled on output. The following
505      * table provides details.
506      *
507      * <table>
508      * <tr>
509      * <th align="left">
510      * Text Mode
511      * </th>
512      * <th>
513      * Resulting behavior.
514      * </th>
515      * </tr>
516      *
517      * <tr valign="top">
518      * <td>
519      * <i>PRESERVE (Default)</i>
520      * </td>
521      * <td>
522      * All content is printed in the format it was created, no whitespace
523      * or line separators are are added or removed.
524      * </td>
525      * </tr>
526      *
527      * <tr valign="top">
528      * <td>
529      * TRIM_FULL_WHITE
530      * </td>
531      * <td>
532      * Content between tags consisting of all whitespace is not printed.
533      * If the content contains even one non-whitespace character, it is
534      * printed verbatim, whitespace and all.
535      * </td>
536      * </tr>
537      *
538      * <tr valign="top">
539      * <td>
540      * TRIM
541      * </td>
542      * <td>
543      * Same as TrimAllWhite, plus leading/trailing whitespace are
544      * trimmed.
545      * </td>
546      * </tr>
547      *
548      * <tr valign="top">
549      * <td>
550      * NORMALIZE
551      * </td>
552      * <td>
553      * Same as TextTrim, plus addition interior whitespace is compressed
554      * to a single space.
555      * </td>
556      * </tr>
557      * </table>
558      *
559      * In most cases textual content is aligned with the surrounding tags
560      * (after the appropriate text mode is applied). In the case where the only
561      * content between the start and end tags is textual, the start tag, text,
562      * and end tag are all printed on the same line. If the document being
563      * output already has whitespace, it's wise to turn on TRIM mode so the
564      * pre-existing whitespace can be trimmed before adding new whitespace.
565      * <p>
566      * When a element has a xml:space attribute with the value of "preserve",
567      * all formating is turned off and reverts back to the default until the
568      * element and its contents have been printed. If a nested element contains
569      * another xml:space with the value "default" formatting is turned back on
570      * for the child element and then off for the remainder of the parent
571      * element.
572      */

573     public static class TextMode {
574         /**
575          * Mode for literal text preservation.
576          */

577         public static final TextMode PRESERVE = new TextMode("PRESERVE");
578
579         /**
580          * Mode for text trimming (left and right trim).
581          */

582         public static final TextMode TRIM = new TextMode("TRIM");
583
584         /**
585          * Mode for text normalization (left and right trim plus internal
586          * whitespace is normalized to a single space.
587          * @see org.jdom.Element#getTextNormalize
588          */

589         public static final TextMode NORMALIZE = new TextMode("NORMALIZE");
590
591         /**
592          * Mode for text trimming of content consisting of nothing but
593          * whitespace but otherwise not changing output.
594          */

595         public static final TextMode TRIM_FULL_WHITE =
596                 new TextMode("TRIM_FULL_WHITE");
597
598         private final String JavaDoc name;
599
600         private TextMode(String JavaDoc name) {
601             this.name = name;
602         }
603
604         public String JavaDoc toString() {
605             return name;
606         }
607     }
608 }
609
Popular Tags