KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > commons > betwixt > XMLUtils


1 /*
2  * Copyright 2001-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.commons.betwixt;
17  /**
18   * <p><code>XMLUtils</code> contains basic utility methods for XML.</p>
19   *
20   * <p>The code for {@link #isWellFormedXMLName} is based on code in
21   * <code>org.apache.xerces.util.XMLChar</code>
22   * in <a HREF='http://xml.apache.org/xerces2-j/index.html'>Apache Xerces</a>.
23   * The authors of this class are credited below.</p>
24   *
25   * @author Glenn Marcy, IBM
26   * @author Andy Clark, IBM
27   * @author Eric Ye, IBM
28   * @author Arnaud Le Hors, IBM
29   * @author Rahul Srivastava, Sun Microsystems Inc.
30   *
31   * @author Robert Burrell Donkin
32   * @since 0.5
33   */

34 public class XMLUtils {
35
36     // Constants
37
//-------------------------------------------------------------------------
38

39     /** Escaped <code>&lt;</code> entity */
40     public static final String JavaDoc LESS_THAN_ENTITY = "&lt;";
41     /** Escaped <code>&gt;</code> entity */
42     public static final String JavaDoc GREATER_THAN_ENTITY = "&gt;";
43     /** Escaped <code>&amp;</code> entity */
44     public static final String JavaDoc AMPERSAND_ENTITY = "&amp;";
45     /** Escaped <code>'</code> entity */
46     public static final String JavaDoc APOSTROPHE_ENTITY = "&apos;";
47     /** Escaped <code>"</code> entity */
48     public static final String JavaDoc QUOTE_ENTITY = "&quot;";
49
50     // Used by isWellFormedXMLName
51
/** Name start character mask. */
52     private static final int MASK_NAME_START = 0x01;
53     /** Name character mask. */
54     private static final int MASK_NAME = 0x02;
55     
56     // Class attributes
57
//-------------------------------------------------------------------------
58

59     /** Character flags. */
60     private static final byte[] CHARS = new byte[1 << 16];
61
62     //
63
// Static initialization
64
//
65

66     static {
67
68         //
69
// [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
70
// CombiningChar | Extender
71
//
72

73         int nameChar[] = {
74             0x002D, 0x002E, // '-' and '.'
75
};
76
77         //
78
// [5] Name ::= (Letter | '_' | ':') (NameChar)*
79
//
80

81         int nameStartChar[] = {
82             0x003A, 0x005F, // ':' and '_'
83
};
84
85         //
86
// [84] Letter ::= BaseChar | Ideographic
87
//
88

89         int letterRange[] = {
90             // BaseChar
91
0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
92             0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
93             0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
94             0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
95             0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
96             0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
97             0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
98             0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
99             0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
100             0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
101             0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
102             0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
103             0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
104             0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
105             0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
106             0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
107             0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
108             0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
109             0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
110             0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
111             0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
112             0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
113             0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
114             0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
115             0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
116             0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
117             0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
118             0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
119             0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
120             0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
121             0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
122             0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
123             0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
124             0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
125             0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
126             0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
127             0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
128             0xAC00, 0xD7A3,
129             // Ideographic
130
0x3021, 0x3029, 0x4E00, 0x9FA5,
131         };
132         int letterChar[] = {
133             // BaseChar
134
0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
135             0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
136             0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
137             0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
138             0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
139             0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
140             0x1F5D, 0x1FBE, 0x2126, 0x212E,
141             // Ideographic
142
0x3007,
143         };
144
145         //
146
// [87] CombiningChar ::= ...
147
//
148

149         int combiningCharRange[] = {
150             0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
151             0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
152             0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
153             0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
154             0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
155             0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
156             0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
157             0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
158             0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
159             0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
160             0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
161             0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
162             0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
163             0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
164             0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
165             0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
166             0x20D0, 0x20DC, 0x302A, 0x302F,
167         };
168
169         int combiningCharChar[] = {
170             0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
171             0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
172             0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
173             0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
174         };
175
176         //
177
// [88] Digit ::= ...
178
//
179

180         int digitRange[] = {
181             0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
182             0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
183             0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
184             0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
185         };
186
187         //
188
// [89] Extender ::= ...
189
//
190

191         int extenderRange[] = {
192             0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
193         };
194
195         int extenderChar[] = {
196             0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
197         };
198
199         //
200
// Initialize
201
//
202

203         // set name start characters
204
for (int i = 0; i < nameStartChar.length; i++) {
205             CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME;
206         }
207         for (int i = 0; i < letterRange.length; i += 2) {
208             for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
209                 CHARS[j] |= MASK_NAME_START | MASK_NAME;
210             }
211         }
212         for (int i = 0; i < letterChar.length; i++) {
213             CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME;
214         }
215
216         // set name characters
217
for (int i = 0; i < nameChar.length; i++) {
218             CHARS[nameChar[i]] |= MASK_NAME;
219         }
220         for (int i = 0; i < digitRange.length; i += 2) {
221             for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
222                 CHARS[j] |= MASK_NAME;
223             }
224         }
225         for (int i = 0; i < combiningCharRange.length; i += 2) {
226             for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
227                 CHARS[j] |= MASK_NAME;
228             }
229         }
230         for (int i = 0; i < combiningCharChar.length; i++) {
231             CHARS[combiningCharChar[i]] |= MASK_NAME;
232         }
233         for (int i = 0; i < extenderRange.length; i += 2) {
234             for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
235                 CHARS[j] |= MASK_NAME;
236             }
237         }
238         for (int i = 0; i < extenderChar.length; i++) {
239             CHARS[extenderChar[i]] |= MASK_NAME;
240         }
241
242     }
243         
244     // Constructor
245
//-------------------------------------------------------------------------
246

247     /**
248      * <p>Constructor for use by tools that required <code>JavaBean</code> instances.</p>
249      *
250      * <p>This constructor is public <strong>only</strong>
251      * to permit tools that require a JavaBean instance to operate.
252      * <code>XMLUtils</code> instances should <strong>not</strong> be constructed in standard
253      * programming. Instead, the class methods should be called directly.</p>
254      */

255     public XMLUtils() {}
256
257     // Class methods
258
//-------------------------------------------------------------------------
259

260     /**
261      * <p>Escape the <code>toString</code> of the given object.
262      * For use as body text.</p>
263      *
264      * @param value escape <code>value.toString()</code>
265      * @return text with escaped delimiters
266      */

267     public static final String JavaDoc escapeBodyValue(Object JavaDoc value) {
268         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc(value.toString());
269         for (int i=0, size = buffer.length(); i <size; i++) {
270             switch (buffer.charAt(i)) {
271                 case '<':
272                     buffer.replace(i, i+1, LESS_THAN_ENTITY);
273                     size += 3;
274                     i+=3;
275                     break;
276                  case '>':
277                     buffer.replace(i, i+1, GREATER_THAN_ENTITY);
278                     size += 3;
279                     i += 3;
280                     break;
281                  case '&':
282                     buffer.replace(i, i+1, AMPERSAND_ENTITY);
283                     size += 4;
284                     i += 4;
285                     break;
286             }
287         }
288         return buffer.toString();
289     }
290
291     /**
292      * <p>Escape the <code>toString</code> of the given object.
293      * For use in an attribute value.</p>
294      *
295      * @param value escape <code>value.toString()</code>
296      * @return text with characters restricted (for use in attributes) escaped
297      */

298     public static final String JavaDoc escapeAttributeValue(Object JavaDoc value) {
299         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc(value.toString());
300         for (int i=0, size = buffer.length(); i <size; i++) {
301             switch (buffer.charAt(i)) {
302                 case '<':
303                     buffer.replace(i, i+1, LESS_THAN_ENTITY);
304                     size += 3;
305                     i+=3;
306                     break;
307                  case '>':
308                     buffer.replace(i, i+1, GREATER_THAN_ENTITY);
309                     size += 3;
310                     i += 3;
311                     break;
312                  case '&':
313                     buffer.replace(i, i+1, AMPERSAND_ENTITY);
314                     size += 4;
315                     i += 4;
316                     break;
317                  case '\'':
318                     buffer.replace(i, i+1, APOSTROPHE_ENTITY);
319                     size += 5;
320                     i += 5;
321                     break;
322                  case '\"':
323                     buffer.replace(i, i+1, QUOTE_ENTITY);
324                     size += 5;
325                     i += 5;
326                     break;
327             }
328         }
329         return buffer.toString();
330     }
331     
332     
333     /**
334      * Escapes the given content suitable for insertion within a
335      * <code>CDATA</code> sequence.
336      * Within a <code>CDATA</code> section, only the <code>CDEnd</code>
337      * string ']]>' is recognized as markup.
338      * @param content the body content whose character data should
339      * be escaped in a way appropriate for use within a <code>CDATA</code>
340      * section of xml.
341      * @return escaped character data, not null
342      */

343     public static final String JavaDoc escapeCDATAContent(String JavaDoc content) {
344         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc(content);
345         escapeCDATAContent(buffer);
346         return buffer.toString();
347     }
348      
349     /**
350      * Escapes the given content suitable for insertion within a
351      * <code>CDATA</code> sequence.
352      * Within a <code>CDATA</code> section, only the <code>CDEnd</code>
353      * string ']]>' is recognized as markup.
354      * @param bufferedContent the body content within a buffer
355      * whose character data should
356      * be escaped in a way appropriate for use within a <code>CDATA</code>
357      * section of xml.
358      * @return escaped character data, not null
359      */

360     public static final void escapeCDATAContent(StringBuffer JavaDoc bufferedContent) {
361         for (int i=2, size = bufferedContent.length(); i<size; i++) {
362             char at = bufferedContent.charAt(i);
363             if ( at == '>'
364                 && bufferedContent.charAt(i-1) == ']'
365                 && bufferedContent.charAt(i-2) == ']') {
366                     
367                     bufferedContent.replace(i, i+1, GREATER_THAN_ENTITY);
368                 size += 3;
369                 i+=3;
370             }
371         }
372     }
373  
374     
375     /**
376      * <p>Is this string a well formed xml name?</p>
377      *
378      * <p>Only certain characters are allowed in well formed element and attribute
379      * names in xml. For example, white space is not allowed in a name.</p>
380      *
381      * <p>The code for this method is based on code in
382      * <code>org.apache.xerces.util.XMLChar</code>
383      * in <a HREF='http://xml.apache.org/xerces2-j/index.html'>Apache Xerces</a>.
384      * The authors of this class are credited at the top of this class.</p>
385      *
386      * @param name the <code>String</code> to be checked for use as an xml attribute
387      * or element name. Returns false if <code>name</code> is null
388      * @return true if this string would be a well-formed name
389      */

390     public static boolean isWellFormedXMLName( String JavaDoc name ) {
391         if ( name == null ) {
392             return false;
393         }
394         
395         if ( name.length() == 0 ) {
396             return false;
397         }
398         
399         char ch = name.charAt(0);
400         if( isNameStartChar(ch) == false) {
401            return false;
402            
403         }
404         
405         for (int i = 1; i < name.length(); i++ ) {
406            ch = name.charAt(i);
407            if( isNameChar( ch ) == false ) {
408               return false;
409            }
410         }
411         return true;
412     }
413
414     /**
415      * Returns true if the specified character is a valid name
416      * character as defined by the XML 1.0 specification.
417      *
418      * @param c The character to check.
419      * @return true if this is an XML name character
420      */

421     public static boolean isNameChar(int c) {
422         return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
423     }
424     
425     /**
426      * Returns true if the specified character is a valid name start
427      * character as defined in the XML 1.0 specification.
428      *
429      * @param c The character to check.
430      * @return trus if this is an XML name start character
431      */

432     public static boolean isNameStartChar(int c) {
433         return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
434     }
435 }
436
Popular Tags