KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > saxon > om > XMLChar


1 package net.sf.saxon.om;
2
3 // Copied without change from Xerces module org.apache.xerces.util.XMLChar, version 2.5.0
4

5 /*
6  * The Apache Software License, Version 1.1
7  *
8  *
9  * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
10  * reserved.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  *
16  * 1. Redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer.
18  *
19  * 2. Redistributions in binary form must reproduce the above copyright
20  * notice, this list of conditions and the following disclaimer in
21  * the documentation and/or other materials provided with the
22  * distribution.
23  *
24  * 3. The end-user documentation included with the redistribution,
25  * if any, must include the following acknowledgment:
26  * "This product includes software developed by the
27  * Apache Software Foundation (http://www.apache.org/)."
28  * Alternately, this acknowledgment may appear in the software itself,
29  * if and wherever such third-party acknowledgments normally appear.
30  *
31  * 4. The names "Xerces" and "Apache Software Foundation" must
32  * not be used to endorse or promote products derived from this
33  * software without prior written permission. For written
34  * permission, please contact apache@apache.org.
35  *
36  * 5. Products derived from this software may not be called "Apache",
37  * nor may "Apache" appear in their name, without prior written
38  * permission of the Apache Software Foundation.
39  *
40  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
44  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  * ====================================================================
53  *
54  * This software consists of voluntary contributions made by many
55  * individuals on behalf of the Apache Software Foundation and was
56  * originally based on software copyright (c) 1999, International
57  * Business Machines, Inc., http://www.apache.org. For more
58  * information on the Apache Software Foundation, please see
59  * <http://www.apache.org/>.
60  */

61
62 /**
63  * This class defines the basic XML character properties. The data
64  * in this class can be used to verify that a character is a valid
65  * XML character or if the character is a space, name start, or name
66  * character.
67  * <p>
68  * A series of convenience methods are supplied to ease the burden
69  * of the developer. Because inlining the checks can improve per
70  * character performance, the tables of character properties are
71  * public. Using the character as an index into the <code>CHARS</code>
72  * array and applying the appropriate mask flag (e.g.
73  * <code>MASK_VALID</code>), yields the same results as calling the
74  * convenience methods. There is one exception: check the comments
75  * for the <code>isValid</code> method for details.
76  *
77  * @author Glenn Marcy, IBM
78  * @author Andy Clark, IBM
79  * @author Eric Ye, IBM
80  * @author Arnaud Le Hors, IBM
81  * @author Rahul Srivastava, Sun Microsystems Inc.
82  *
83  * @version $Id: XMLChar.java,v 1.2 2004/03/09 10:19:09 mhkay Exp $
84  */

85 public class XMLChar {
86
87     // TODO: Xerces-J 2.6.2 has a new version of this module which claims to
88
// improve class loading performance.
89

90     //
91
// Constants
92
//
93

94     /** Character flags. */
95     private static final byte[] CHARS = new byte[1 << 16];
96
97     /** Valid character mask. */
98     public static final int MASK_VALID = 0x01;
99
100     /** Space character mask. */
101     public static final int MASK_SPACE = 0x02;
102
103     /** Name start character mask. */
104     public static final int MASK_NAME_START = 0x04;
105
106     /** Name character mask. */
107     public static final int MASK_NAME = 0x08;
108
109     /** Pubid character mask. */
110     public static final int MASK_PUBID = 0x10;
111
112     /**
113      * Content character mask. Special characters are those that can
114      * be considered the start of markup, such as '&lt;' and '&amp;'.
115      * The various newline characters are considered special as well.
116      * All other valid XML characters can be considered content.
117      * <p>
118      * This is an optimization for the inner loop of character scanning.
119      */

120     public static final int MASK_CONTENT = 0x20;
121
122     /** NCName start character mask. */
123     public static final int MASK_NCNAME_START = 0x40;
124
125     /** NCName character mask. */
126     public static final int MASK_NCNAME = 0x80;
127
128     private XMLChar() {
129     }
130
131     //
132
// Static initialization
133
//
134

135     static {
136
137         //
138
// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
139
// [#xE000-#xFFFD] | [#x10000-#x10FFFF]
140
//
141

142         int charRange[] = {
143             0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
144         };
145
146         //
147
// [3] S ::= (#x20 | #x9 | #xD | #xA)+
148
//
149

150         int spaceChar[] = {
151             0x0020, 0x0009, 0x000D, 0x000A,
152         };
153
154         //
155
// [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
156
// CombiningChar | Extender
157
//
158

159         int nameChar[] = {
160             0x002D, 0x002E, // '-' and '.'
161
};
162
163         //
164
// [5] Name ::= (Letter | '_' | ':') (NameChar)*
165
//
166

167         int nameStartChar[] = {
168             0x003A, 0x005F, // ':' and '_'
169
};
170
171         //
172
// [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
173
//
174

175         int pubidChar[] = {
176             0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
177             0x005F
178         };
179
180         int pubidRange[] = {
181             0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
182         };
183
184         //
185
// [84] Letter ::= BaseChar | Ideographic
186
//
187

188         int letterRange[] = {
189             // BaseChar
190
0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
191             0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
192             0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
193             0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
194             0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
195             0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
196             0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
197             0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
198             0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
199             0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
200             0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
201             0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
202             0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
203             0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
204             0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
205             0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
206             0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
207             0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
208             0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
209             0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
210             0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
211             0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
212             0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
213             0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
214             0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
215             0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
216             0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
217             0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
218             0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
219             0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
220             0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
221             0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
222             0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
223             0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
224             0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
225             0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
226             0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
227             0xAC00, 0xD7A3,
228             // Ideographic
229
0x3021, 0x3029, 0x4E00, 0x9FA5,
230         };
231         int letterChar[] = {
232             // BaseChar
233
0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
234             0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
235             0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
236             0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
237             0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
238             0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
239             0x1F5D, 0x1FBE, 0x2126, 0x212E,
240             // Ideographic
241
0x3007,
242         };
243
244         //
245
// [87] CombiningChar ::= ...
246
//
247

248         int combiningCharRange[] = {
249             0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
250             0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
251             0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
252             0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
253             0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
254             0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
255             0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
256             0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
257             0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
258             0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
259             0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
260             0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
261             0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
262             0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
263             0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
264             0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
265             0x20D0, 0x20DC, 0x302A, 0x302F,
266         };
267
268         int combiningCharChar[] = {
269             0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
270             0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
271             0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
272             0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
273         };
274
275         //
276
// [88] Digit ::= ...
277
//
278

279         int digitRange[] = {
280             0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
281             0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
282             0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
283             0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
284         };
285
286         //
287
// [89] Extender ::= ...
288
//
289

290         int extenderRange[] = {
291             0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
292         };
293
294         int extenderChar[] = {
295             0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
296         };
297
298         //
299
// SpecialChar ::= '<', '&', '\n', '\r', ']'
300
//
301

302         int specialChar[] = {
303             '<', '&', '\n', '\r', ']',
304         };
305
306         //
307
// Initialize
308
//
309

310         // set valid characters
311
for (int i = 0; i < charRange.length; i += 2) {
312             for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
313                 CHARS[j] |= MASK_VALID | MASK_CONTENT;
314             }
315         }
316
317         // remove special characters
318
for (int i = 0; i < specialChar.length; i++) {
319             CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
320         }
321
322         // set space characters
323
for (int i = 0; i < spaceChar.length; i++) {
324             CHARS[spaceChar[i]] |= MASK_SPACE;
325         }
326
327         // set name start characters
328
for (int i = 0; i < nameStartChar.length; i++) {
329             CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
330                                        MASK_NCNAME_START | MASK_NCNAME;
331         }
332         for (int i = 0; i < letterRange.length; i += 2) {
333             for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
334                 CHARS[j] |= MASK_NAME_START | MASK_NAME |
335                             MASK_NCNAME_START | MASK_NCNAME;
336             }
337         }
338         for (int i = 0; i < letterChar.length; i++) {
339             CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
340                                     MASK_NCNAME_START | MASK_NCNAME;
341         }
342
343         // set name characters
344
for (int i = 0; i < nameChar.length; i++) {
345             CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
346         }
347         for (int i = 0; i < digitRange.length; i += 2) {
348             for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
349                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
350             }
351         }
352         for (int i = 0; i < combiningCharRange.length; i += 2) {
353             for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
354                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
355             }
356         }
357         for (int i = 0; i < combiningCharChar.length; i++) {
358             CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
359         }
360         for (int i = 0; i < extenderRange.length; i += 2) {
361             for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
362                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
363             }
364         }
365         for (int i = 0; i < extenderChar.length; i++) {
366             CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
367         }
368
369         // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
370
CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
371
372         // set Pubid characters
373
for (int i = 0; i < pubidChar.length; i++) {
374             CHARS[pubidChar[i]] |= MASK_PUBID;
375         }
376         for (int i = 0; i < pubidRange.length; i += 2) {
377             for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
378                 CHARS[j] |= MASK_PUBID;
379             }
380         }
381
382     } // <clinit>()
383

384     //
385
// Public static methods
386
//
387

388     /**
389      * Returns true if the specified character is a supplemental character.
390      *
391      * @param c The character to check.
392      */

393     public static boolean isSupplemental(int c) {
394         return (c >= 0x10000 && c <= 0x10FFFF);
395     }
396
397     /**
398      * Returns the supplemental character corresponding to the given
399      * surrogates.
400      *
401      * @param h The high surrogate.
402      * @param l The low surrogate.
403      */

404     public static int supplemental(char h, char l) {
405         return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
406     }
407
408     /**
409      * Returns the high surrogate of a supplemental character
410      *
411      * @param c The supplemental character to "split".
412      */

413     public static char highSurrogate(int c) {
414         return (char) (((c - 0x00010000) >> 10) + 0xD800);
415     }
416
417     /**
418      * Returns the low surrogate of a supplemental character
419      *
420      * @param c The supplemental character to "split".
421      */

422     public static char lowSurrogate(int c) {
423         return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
424     }
425
426     /**
427      * Return whether a given char (codepoint) is a surrogate (high or low)
428      */

429
430     public static boolean isSurrogate(int c) {
431         return (c & 0xF800) == 0xD800;
432     }
433
434     /**
435      * Returns whether the given character is a high surrogate
436      *
437      * @param c The character to check.
438      */

439     public static boolean isHighSurrogate(int c) {
440         //return (0xD800 <= c && c <= 0xDBFF);
441
return (c & 0xFC00) == 0xD800;
442     }
443
444     /**
445      * Returns whether the given character is a low surrogate
446      *
447      * @param c The character to check.
448      */

449     public static boolean isLowSurrogate(int c) {
450         //return (0xDC00 <= c && c <= 0xDFFF);
451
return (c & 0xFC00) == 0xDC00;
452     }
453
454
455     /**
456      * Returns true if the specified character is valid. This method
457      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
458      * <p>
459      * If the program chooses to apply the mask directly to the
460      * <code>CHARS</code> array, then they are responsible for checking
461      * the surrogate character range.
462      *
463      * @param c The character to check.
464      */

465     public static boolean isValid(int c) {
466         return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
467                (0x10000 <= c && c <= 0x10FFFF);
468     } // isValid(int):boolean
469

470     /**
471      * Returns true if the specified character is invalid.
472      *
473      * @param c The character to check.
474      */

475     public static boolean isInvalid(int c) {
476         return !isValid(c);
477     } // isInvalid(int):boolean
478

479     /**
480      * Returns true if the specified character can be considered content.
481      *
482      * @param c The character to check.
483      */

484     public static boolean isContent(int c) {
485         return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
486                (0x10000 <= c && c <= 0x10FFFF);
487     } // isContent(int):boolean
488

489     /**
490      * Returns true if the specified character can be considered markup.
491      * Markup characters include '&lt;', '&amp;', and '%'.
492      *
493      * @param c The character to check.
494      */

495     public static boolean isMarkup(int c) {
496         return c == '<' || c == '&' || c == '%';
497     } // isMarkup(int):boolean
498

499     /**
500      * Returns true if the specified character is a space character
501      * as defined by production [3] in the XML 1.0 specification.
502      *
503      * @param c The character to check.
504      */

505     public static boolean isSpace(int c) {
506         return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
507     } // isSpace(int):boolean
508

509     /**
510      * Returns true if the specified character is a valid name start
511      * character as defined by production [5] in the XML 1.0
512      * specification.
513      *
514      * @param c The character to check.
515      */

516     public static boolean isNameStart(int c) {
517         return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
518     } // isNameStart(int):boolean
519

520     /**
521      * Returns true if the specified character is a valid name
522      * character as defined by production [4] in the XML 1.0
523      * specification.
524      *
525      * @param c The character to check.
526      */

527     public static boolean isName(int c) {
528         return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
529     } // isName(int):boolean
530

531     /**
532      * Returns true if the specified character is a valid NCName start
533      * character as defined by production [4] in Namespaces in XML
534      * recommendation.
535      *
536      * @param c The character to check.
537      */

538     public static boolean isNCNameStart(int c) {
539         return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
540     } // isNCNameStart(int):boolean
541

542     /**
543      * Returns true if the specified character is a valid NCName
544      * character as defined by production [5] in Namespaces in XML
545      * recommendation.
546      *
547      * @param c The character to check.
548      */

549     public static boolean isNCName(int c) {
550         return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
551     } // isNCName(int):boolean
552

553     /**
554      * Returns true if the specified character is a valid Pubid
555      * character as defined by production [13] in the XML 1.0
556      * specification.
557      *
558      * @param c The character to check.
559      */

560     public static boolean isPubid(int c) {
561         return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
562     } // isPubid(int):boolean
563

564     /*
565      * [5] Name ::= (Letter | '_' | ':') (NameChar)*
566      */

567     /**
568      * Check to see if a string is a valid Name according to [5]
569      * in the XML 1.0 Recommendation
570      *
571      * @param name string to check
572      * @return true if name is a valid Name
573      */

574     public static boolean isValidName(String JavaDoc name) {
575         if (name.length() == 0)
576             return false;
577         char ch = name.charAt(0);
578         if( isNameStart(ch) == false)
579            return false;
580         for (int i = 1; i < name.length(); i++ ) {
581            ch = name.charAt(i);
582            if( isName( ch ) == false ){
583               return false;
584            }
585         }
586         return true;
587     } // isValidName(String):boolean
588

589
590     /*
591      * from the namespace rec
592      * [4] NCName ::= (Letter | '_') (NCNameChar)*
593      */

594     /**
595      * Check to see if a string is a valid NCName according to [4]
596      * from the XML Namespaces 1.0 Recommendation
597      *
598      * @param ncName string to check
599      * @return true if name is a valid NCName
600      */

601     public static boolean isValidNCName(CharSequence JavaDoc ncName) {
602         if (ncName.length() == 0)
603             return false;
604         char ch = ncName.charAt(0);
605         if( isNCNameStart(ch) == false)
606            return false;
607         for (int i = 1; i < ncName.length(); i++ ) {
608            ch = ncName.charAt(i);
609            if( isNCName( ch ) == false ){
610               return false;
611            }
612         }
613         return true;
614     } // isValidNCName(String):boolean
615

616     /*
617      * [7] Nmtoken ::= (NameChar)+
618      */

619     /**
620      * Check to see if a string is a valid Nmtoken according to [7]
621      * in the XML 1.0 Recommendation
622      *
623      * @param nmtoken string to check
624      * @return true if nmtoken is a valid Nmtoken
625      */

626     public static boolean isValidNmtoken(String JavaDoc nmtoken) {
627         if (nmtoken.length() == 0)
628             return false;
629         for (int i = 0; i < nmtoken.length(); i++ ) {
630            char ch = nmtoken.charAt(i);
631            if( ! isName( ch ) ){
632               return false;
633            }
634         }
635         return true;
636     } // isValidName(String):boolean
637

638
639
640
641
642     // encodings
643

644     /**
645      * Returns true if the encoding name is a valid IANA encoding.
646      * This method does not verify that there is a decoder available
647      * for this encoding, only that the characters are valid for an
648      * IANA encoding name.
649      *
650      * @param ianaEncoding The IANA encoding name.
651      */

652     public static boolean isValidIANAEncoding(String JavaDoc ianaEncoding) {
653         if (ianaEncoding != null) {
654             int length = ianaEncoding.length();
655             if (length > 0) {
656                 char c = ianaEncoding.charAt(0);
657                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
658                     for (int i = 1; i < length; i++) {
659                         c = ianaEncoding.charAt(i);
660                         if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
661                             (c < '0' || c > '9') && c != '.' && c != '_' &&
662                             c != '-') {
663                             return false;
664                         }
665                     }
666                     return true;
667                 }
668             }
669         }
670         return false;
671     } // isValidIANAEncoding(String):boolean
672

673     /**
674      * Returns true if the encoding name is a valid Java encoding.
675      * This method does not verify that there is a decoder available
676      * for this encoding, only that the characters are valid for an
677      * Java encoding name.
678      *
679      * @param javaEncoding The Java encoding name.
680      */

681     public static boolean isValidJavaEncoding(String JavaDoc javaEncoding) {
682         if (javaEncoding != null) {
683             int length = javaEncoding.length();
684             if (length > 0) {
685                 for (int i = 1; i < length; i++) {
686                     char c = javaEncoding.charAt(i);
687                     if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
688                         (c < '0' || c > '9') && c != '.' && c != '_' &&
689                         c != '-') {
690                         return false;
691                     }
692                 }
693                 return true;
694             }
695         }
696         return false;
697     } // isValidIANAEncoding(String):boolean
698

699
700 } // class XMLChar
701
Popular Tags