KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sourceforge > groboutils > util > xml > v1 > XMLUtil


1 /*
2  * @(#)XmlUtil.java
3  *
4  * Copyright (C) 2001,,2003 2002 Matt Albrecht
5  * groboclown@users.sourceforge.net
6  * http://groboutils.sourceforge.net
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24  * DEALINGS IN THE SOFTWARE.
25  */

26
27 package net.sourceforge.groboutils.util.xml.v1;
28
29
30 /**
31  * A Utility to aid in various XML activities.
32  *
33  * @author Matt Albrecht <a HREF="mailto:groboclown@users.sourceforge.net">groboclown@users.sourceforge.net</a>
34  * @since May 21, 2001
35  * @version $Date: 2003/11/23 21:28:47 $
36  */

37 public class XMLUtil
38 {
39     protected static XMLUtil s_instance = new XMLUtil();
40     
41     // * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
42
// * [#xE000-#xFFFD] | [#x10000-#x10FFFF]
43
//private static final char LOWER_RANGE_1 = 0x20;
44
//private static final char UPPER_RANGE_1 = 0xD7FF;
45
//private static final char LOWER_RANGE_2 = 0xE000;
46
//private static final char UPPER_RANGE_2 = 0xFFFD;
47
private static final char LOWER_RANGE = 0x20;
48     private static final char UPPER_RANGE = 0x7f;
49     
50     // java doesn't support this range
51
// private static final char LOWER_RANGE_3 = 0x10000;
52
// private static final char UPPER_RANGE_3 = 0x10FFFF;
53
private static final char VALID_CHAR_1 = 0x9;
54     private static final char VALID_CHAR_2 = 0xA;
55     private static final char VALID_CHAR_3 = 0xD;
56     
57     
58     private static final char[] IN_RANGE_INVALID =
59         { '<', '>', '"', '\'', '&' };
60     //private static final String IN_RANGE_INVALID_STR =
61
// new String( IN_RANGE_INVALID );
62
private static final String JavaDoc IN_RANGE_VALID[] =
63         { "&lt;", "&gt;", "&quot;", "&apos;", "&amp;" };
64     
65     protected XMLUtil()
66     {
67         // do nothing
68
}
69     
70     
71     public static XMLUtil getInstance()
72     {
73         return s_instance;
74     }
75     
76     //------------------------------------------
77

78     
79     
80     /**
81      * Convert a standard Java String into an XML string. It transforms
82      * out-of-range characters (&lt;, &gt;, &amp;, ", ', and non-standard
83      * character values) into XML formatted values. Since it does correctly
84      * escape the quote characters, this may be used for both attribute values
85      * as well as standard text.
86      *
87      * @param javaStr the Java string to be transformed into XML text. If
88      * the string is <tt>null</tt>, then <tt>null</tt> is returned.
89      * @return the XML version of <tt>javaStr</tt>.
90      * @see #utf2xml( String, StringBuffer )
91      */

92     public String JavaDoc utf2xml( String JavaDoc javaStr )
93     {
94         if (javaStr == null)
95         {
96             return null;
97         }
98         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
99         utf2xml( javaStr, sb );
100         return sb.toString();
101     }
102     
103     
104     /**
105      * Convert a standard Java String into an XML string. It transforms
106      * out-of-range characters (&lt;, &gt;, &amp;, ", ', and non-standard
107      * character values) into XML formatted values. Since it does correctly
108      * escape the quote characters, this may be used for both attribute values
109      * as well as standard text.
110      * <P>
111      * From <a HREF="http://www.w3c.org/TR/2000/REC-xml-20001006">
112      * the XML recommendation</a>:
113      * <PRE>
114      * [Definition: A parsed entity contains text, a sequence of characters,
115      * which may represent markup or character data.]
116      * [Definition: A character is an atomic unit of text as specified by
117      * ISO/IEC 10646 [ISO/IEC 10646] (see also [ISO/IEC 10646-2000]).
118      * Legal characters are tab, carriage return, line feed, and the legal
119      * characters of Unicode and ISO/IEC 10646. The versions of these standards
120      * cited in A.1 Normative References were current at the time this document
121      * was prepared. New characters may be added to these standards by
122      * amendments or new editions. Consequently, XML processors must accept
123      * any character in the range specified for Char. The use of
124      * "compatibility characters", as defined in section 6.8 of
125      * [Unicode] (see also D21 in section 3.6 of [Unicode3]), is discouraged.]
126      *
127      * Character Range
128      * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
129      * [#xE000-#xFFFD] | [#x10000-#x10FFFF]
130      * // any Unicode character, excluding the surrogate blocks,
131      * FFFE, and FFFF. //
132      *
133      * The mechanism for encoding character code points into bit patterns may
134      * vary from entity to entity. All XML processors must accept the UTF-8
135      * and UTF-16 encodings of 10646; the mechanisms for signaling which of
136      * the two is in use, or for bringing other encodings into play, are
137      * discussed later, in 4.3.3 Character Encoding in Entities.
138      *
139      * ...
140      *
141      * The ampersand character (&amp;) and the left angle bracket (&lt;)
142      * may appear in their literal form only when used as markup delimiters, or
143      * within a comment, a processing instruction, or a CDATA section. If they
144      * are needed elsewhere, they must be escaped using either numeric
145      * character references or the strings "&amp;amp;" and "&amp;lt;"
146      * respectively. The right angle bracket (>) may be represented using the
147      * string "&amp;gt;", and must, for compatibility, be escaped using
148      * "&amp;gt;" or a character reference when it appears in the string
149      * "]]>" in content, when that string is not marking the end of a CDATA
150      * section.
151      * To allow attribute values to contain both single and double quotes, the
152      * apostrophe or single-quote character (&apos;) may be represented as
153      * "&amp;apos;", and the double-quote character (&quot;) as "&amp;quot;".
154      * </PRE>
155      *
156      * @param javaStr the Java string to be transformed into XML text. If
157      * it is <tt>null</tt>, then the text "null" is appended to the
158      * @param output the StringBuffer to send the transformed XML into.
159      */

160     public void utf2xml( String JavaDoc javaStr, StringBuffer JavaDoc output )
161     {
162         if (output == null)
163         {
164             throw new IllegalArgumentException JavaDoc("No null StringBuffer");
165         }
166         if (javaStr == null)
167         {
168             // original:
169
// javaStr = "null";
170

171             // the string "null" does not have any out-of-range characters,
172
// so to optimize...
173
output.append("null");
174             return;
175         }
176         int len = javaStr.length();
177         // Ensure that the output string buffer has enough space.
178
// The given huristic seems to work well.
179
output.ensureCapacity( output.length() + (len * 2) );
180         
181         // for efficiency, directly access the array.
182
char buf[] = javaStr.toCharArray();
183         for ( int pos = 0; pos < len; ++pos)
184         {
185             char c = buf[pos];
186             // test for out-of-range for escaping using &#
187
if (
188                 // * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
189
// * [#xE000-#xFFFD] | [#x10000-#x10FFFF]
190
(c < LOWER_RANGE &&
191                  c != VALID_CHAR_1 && c != VALID_CHAR_2 && c != VALID_CHAR_3)
192                 ||
193                 (c > UPPER_RANGE)
194                 )
195             {
196                 output.append( "&#" );
197                 output.append( Integer.toString( c ) );
198                 output.append( ';' );
199             }
200             else
201             {
202                 // should we escape the character with an &XXX; ?
203
boolean notfound = true;
204                 for (int p2 = IN_RANGE_INVALID.length; --p2 >= 0;)
205                 {
206                     if (IN_RANGE_INVALID[p2] == c)
207                     {
208                         notfound = false;
209                         output.append( IN_RANGE_VALID[ p2 ] );
210                         break;
211                     }
212                 }
213                 if (notfound)
214                 {
215                     // append the character as-is
216
output.append( c );
217                 }
218             }
219         }
220     }
221 }
222
223
Popular Tags