KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xml > utils > XMLChar


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 /*
17  * $Id$
18  */

19
20 package org.apache.xml.utils;
21
22 /**
23  * This class defines the basic XML character properties. The data
24  * in this class can be used to verify that a character is a valid
25  * XML character or if the character is a space, name start, or name
26  * character.
27  * <p>
28  * A series of convenience methods are supplied to ease the burden
29  * of the developer. Because inlining the checks can improve per
30  * character performance, the tables of character properties are
31  * public. Using the character as an index into the <code>CHARS</code>
32  * array and applying the appropriate mask flag (e.g.
33  * <code>MASK_VALID</code>), yields the same results as calling the
34  * convenience methods. There is one exception: check the comments
35  * for the <code>isValid</code> method for details.
36  *
37  * @author Glenn Marcy, IBM
38  * @author Andy Clark, IBM
39  * @author Eric Ye, IBM
40  * @author Arnaud Le Hors, IBM
41  * @author Rahul Srivastava, Sun Microsystems Inc.
42  *
43  * @version $Id: XMLChar.java,v 1.7 2002/01/29 01:15:18 lehors Exp $
44  */

45 public class XMLChar {
46
47     //
48
// Constants
49
//
50

51     /** Character flags. */
52     public static final byte[] CHARS = new byte[1 << 16];
53
54     /** Valid character mask. */
55     public static final int MASK_VALID = 0x01;
56
57     /** Space character mask. */
58     public static final int MASK_SPACE = 0x02;
59
60     /** Name start character mask. */
61     public static final int MASK_NAME_START = 0x04;
62
63     /** Name character mask. */
64     public static final int MASK_NAME = 0x08;
65
66     /** Pubid character mask. */
67     public static final int MASK_PUBID = 0x10;
68     
69     /**
70      * Content character mask. Special characters are those that can
71      * be considered the start of markup, such as '&lt;' and '&amp;'.
72      * The various newline characters are considered special as well.
73      * All other valid XML characters can be considered content.
74      * <p>
75      * This is an optimization for the inner loop of character scanning.
76      */

77     public static final int MASK_CONTENT = 0x20;
78
79     /** NCName start character mask. */
80     public static final int MASK_NCNAME_START = 0x40;
81
82     /** NCName character mask. */
83     public static final int MASK_NCNAME = 0x80;
84
85     //
86
// Static initialization
87
//
88

89     static {
90         
91         //
92
// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
93
// [#xE000-#xFFFD] | [#x10000-#x10FFFF]
94
//
95

96         int charRange[] = {
97             0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
98         };
99
100         //
101
// [3] S ::= (#x20 | #x9 | #xD | #xA)+
102
//
103

104         int spaceChar[] = {
105             0x0020, 0x0009, 0x000D, 0x000A,
106         };
107
108         //
109
// [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
110
// CombiningChar | Extender
111
//
112

113         int nameChar[] = {
114             0x002D, 0x002E, // '-' and '.'
115
};
116
117         //
118
// [5] Name ::= (Letter | '_' | ':') (NameChar)*
119
//
120

121         int nameStartChar[] = {
122             0x003A, 0x005F, // ':' and '_'
123
};
124
125         //
126
// [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
127
//
128

129         int pubidChar[] = {
130             0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
131             0x005F
132         };
133
134         int pubidRange[] = {
135             0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
136         };
137
138         //
139
// [84] Letter ::= BaseChar | Ideographic
140
//
141

142         int letterRange[] = {
143             // BaseChar
144
0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
145             0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
146             0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
147             0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
148             0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
149             0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
150             0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
151             0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
152             0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
153             0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
154             0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
155             0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
156             0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
157             0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
158             0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
159             0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
160             0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
161             0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
162             0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
163             0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
164             0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
165             0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
166             0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
167             0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
168             0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
169             0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
170             0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
171             0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
172             0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
173             0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
174             0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
175             0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
176             0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
177             0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
178             0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
179             0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
180             0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
181             0xAC00, 0xD7A3,
182             // Ideographic
183
0x3021, 0x3029, 0x4E00, 0x9FA5,
184         };
185         int letterChar[] = {
186             // BaseChar
187
0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
188             0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
189             0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
190             0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
191             0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
192             0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
193             0x1F5D, 0x1FBE, 0x2126, 0x212E,
194             // Ideographic
195
0x3007,
196         };
197
198         //
199
// [87] CombiningChar ::= ...
200
//
201

202         int combiningCharRange[] = {
203             0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
204             0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
205             0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
206             0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
207             0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
208             0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
209             0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
210             0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
211             0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
212             0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
213             0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
214             0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
215             0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
216             0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
217             0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
218             0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
219             0x20D0, 0x20DC, 0x302A, 0x302F,
220         };
221
222         int combiningCharChar[] = {
223             0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
224             0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
225             0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
226             0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
227         };
228
229         //
230
// [88] Digit ::= ...
231
//
232

233         int digitRange[] = {
234             0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
235             0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
236             0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
237             0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
238         };
239
240         //
241
// [89] Extender ::= ...
242
//
243

244         int extenderRange[] = {
245             0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
246         };
247
248         int extenderChar[] = {
249             0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
250         };
251
252         //
253
// SpecialChar ::= '<', '&', '\n', '\r', ']'
254
//
255

256         int specialChar[] = {
257             '<', '&', '\n', '\r', ']',
258         };
259
260         //
261
// Initialize
262
//
263

264         // set valid characters
265
for (int i = 0; i < charRange.length; i += 2) {
266             for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
267                 CHARS[j] |= MASK_VALID | MASK_CONTENT;
268             }
269         }
270
271         // remove special characters
272
for (int i = 0; i < specialChar.length; i++) {
273             CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
274         }
275
276         // set space characters
277
for (int i = 0; i < spaceChar.length; i++) {
278             CHARS[spaceChar[i]] |= MASK_SPACE;
279         }
280
281         // set name start characters
282
for (int i = 0; i < nameStartChar.length; i++) {
283             CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
284                                        MASK_NCNAME_START | MASK_NCNAME;
285         }
286         for (int i = 0; i < letterRange.length; i += 2) {
287             for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
288                 CHARS[j] |= MASK_NAME_START | MASK_NAME |
289                             MASK_NCNAME_START | MASK_NCNAME;
290             }
291         }
292         for (int i = 0; i < letterChar.length; i++) {
293             CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
294                                     MASK_NCNAME_START | MASK_NCNAME;
295         }
296
297         // set name characters
298
for (int i = 0; i < nameChar.length; i++) {
299             CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
300         }
301         for (int i = 0; i < digitRange.length; i += 2) {
302             for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
303                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
304             }
305         }
306         for (int i = 0; i < combiningCharRange.length; i += 2) {
307             for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
308                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
309             }
310         }
311         for (int i = 0; i < combiningCharChar.length; i++) {
312             CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
313         }
314         for (int i = 0; i < extenderRange.length; i += 2) {
315             for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
316                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
317             }
318         }
319         for (int i = 0; i < extenderChar.length; i++) {
320             CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
321         }
322
323         // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
324
CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
325
326         // set Pubid characters
327
for (int i = 0; i < pubidChar.length; i++) {
328             CHARS[pubidChar[i]] |= MASK_PUBID;
329         }
330         for (int i = 0; i < pubidRange.length; i += 2) {
331             for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
332                 CHARS[j] |= MASK_PUBID;
333             }
334         }
335
336     } // <clinit>()
337

338     //
339
// Public static methods
340
//
341

342     /**
343      * Returns true if the specified character is a supplemental character.
344      *
345      * @param c The character to check.
346      */

347     public static boolean isSupplemental(int c) {
348         return (c >= 0x10000 && c <= 0x10FFFF);
349     }
350
351     /**
352      * Returns true the supplemental character corresponding to the given
353      * surrogates.
354      *
355      * @param h The high surrogate.
356      * @param l The low surrogate.
357      */

358     public static int supplemental(char h, char l) {
359         return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
360     }
361
362     /**
363      * Returns the high surrogate of a supplemental character
364      *
365      * @param c The supplemental character to "split".
366      */

367     public static char highSurrogate(int c) {
368         return (char) (((c - 0x00010000) >> 10) + 0xD800);
369     }
370
371     /**
372      * Returns the low surrogate of a supplemental character
373      *
374      * @param c The supplemental character to "split".
375      */

376     public static char lowSurrogate(int c) {
377         return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
378     }
379
380     /**
381      * Returns whether the given character is a high surrogate
382      *
383      * @param c The character to check.
384      */

385     public static boolean isHighSurrogate(int c) {
386         return (0xD800 <= c && c <= 0xDBFF);
387     }
388
389     /**
390      * Returns whether the given character is a low surrogate
391      *
392      * @param c The character to check.
393      */

394     public static boolean isLowSurrogate(int c) {
395         return (0xDC00 <= c && c <= 0xDFFF);
396     }
397
398
399     /**
400      * Returns true if the specified character is valid. This method
401      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
402      * <p>
403      * If the program chooses to apply the mask directly to the
404      * <code>CHARS</code> array, then they are responsible for checking
405      * the surrogate character range.
406      *
407      * @param c The character to check.
408      */

409     public static boolean isValid(int c) {
410         return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
411                (0x10000 <= c && c <= 0x10FFFF);
412     } // isValid(int):boolean
413

414     /**
415      * Returns true if the specified character is invalid.
416      *
417      * @param c The character to check.
418      */

419     public static boolean isInvalid(int c) {
420         return !isValid(c);
421     } // isInvalid(int):boolean
422

423     /**
424      * Returns true if the specified character can be considered content.
425      *
426      * @param c The character to check.
427      */

428     public static boolean isContent(int c) {
429         return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
430                (0x10000 <= c && c <= 0x10FFFF);
431     } // isContent(int):boolean
432

433     /**
434      * Returns true if the specified character can be considered markup.
435      * Markup characters include '&lt;', '&amp;', and '%'.
436      *
437      * @param c The character to check.
438      */

439     public static boolean isMarkup(int c) {
440         return c == '<' || c == '&' || c == '%';
441     } // isMarkup(int):boolean
442

443     /**
444      * Returns true if the specified character is a space character
445      * as defined by production [3] in the XML 1.0 specification.
446      *
447      * @param c The character to check.
448      */

449     public static boolean isSpace(int c) {
450         return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
451     } // isSpace(int):boolean
452

453     /**
454      * Returns true if the specified character is a valid name start
455      * character as defined by production [5] in the XML 1.0
456      * specification.
457      *
458      * @param c The character to check.
459      */

460     public static boolean isNameStart(int c) {
461         return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
462     } // isNameStart(int):boolean
463

464     /**
465      * Returns true if the specified character is a valid name
466      * character as defined by production [4] in the XML 1.0
467      * specification.
468      *
469      * @param c The character to check.
470      */

471     public static boolean isName(int c) {
472         return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
473     } // isName(int):boolean
474

475     /**
476      * Returns true if the specified character is a valid NCName start
477      * character as defined by production [4] in Namespaces in XML
478      * recommendation.
479      *
480      * @param c The character to check.
481      */

482     public static boolean isNCNameStart(int c) {
483         return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
484     } // isNCNameStart(int):boolean
485

486     /**
487      * Returns true if the specified character is a valid NCName
488      * character as defined by production [5] in Namespaces in XML
489      * recommendation.
490      *
491      * @param c The character to check.
492      */

493     public static boolean isNCName(int c) {
494         return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
495     } // isNCName(int):boolean
496

497     /**
498      * Returns true if the specified character is a valid Pubid
499      * character as defined by production [13] in the XML 1.0
500      * specification.
501      *
502      * @param c The character to check.
503      */

504     public static boolean isPubid(int c) {
505         return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
506     } // isPubid(int):boolean
507

508     /*
509      * [5] Name ::= (Letter | '_' | ':') (NameChar)*
510      */

511     /**
512      * Check to see if a string is a valid Name according to [5]
513      * in the XML 1.0 Recommendation
514      *
515      * @param name string to check
516      * @return true if name is a valid Name
517      */

518     public static boolean isValidName(String JavaDoc name) {
519         if (name.length() == 0)
520             return false;
521         char ch = name.charAt(0);
522         if( isNameStart(ch) == false)
523            return false;
524         for (int i = 1; i < name.length(); i++ ) {
525            ch = name.charAt(i);
526            if( isName( ch ) == false ){
527               return false;
528            }
529         }
530         return true;
531     } // isValidName(String):boolean
532

533
534     /*
535      * from the namespace rec
536      * [4] NCName ::= (Letter | '_') (NCNameChar)*
537      */

538     /**
539      * Check to see if a string is a valid NCName according to [4]
540      * from the XML Namespaces 1.0 Recommendation
541      *
542      * @param name string to check
543      * @return true if name is a valid NCName
544      */

545     public static boolean isValidNCName(String JavaDoc ncName) {
546         if (ncName.length() == 0)
547             return false;
548         char ch = ncName.charAt(0);
549         if( isNCNameStart(ch) == false)
550            return false;
551         for (int i = 1; i < ncName.length(); i++ ) {
552            ch = ncName.charAt(i);
553            if( isNCName( ch ) == false ){
554               return false;
555            }
556         }
557         return true;
558     } // isValidNCName(String):boolean
559

560     /*
561      * [7] Nmtoken ::= (NameChar)+
562      */

563     /**
564      * Check to see if a string is a valid Nmtoken according to [7]
565      * in the XML 1.0 Recommendation
566      *
567      * @param nmtoken string to check
568      * @return true if nmtoken is a valid Nmtoken
569      */

570     public static boolean isValidNmtoken(String JavaDoc nmtoken) {
571         if (nmtoken.length() == 0)
572             return false;
573         for (int i = 0; i < nmtoken.length(); i++ ) {
574            char ch = nmtoken.charAt(i);
575            if( ! isName( ch ) ){
576               return false;
577            }
578         }
579         return true;
580     } // isValidName(String):boolean
581

582
583
584
585
586     // encodings
587

588     /**
589      * Returns true if the encoding name is a valid IANA encoding.
590      * This method does not verify that there is a decoder available
591      * for this encoding, only that the characters are valid for an
592      * IANA encoding name.
593      *
594      * @param ianaEncoding The IANA encoding name.
595      */

596     public static boolean isValidIANAEncoding(String JavaDoc ianaEncoding) {
597         if (ianaEncoding != null) {
598             int length = ianaEncoding.length();
599             if (length > 0) {
600                 char c = ianaEncoding.charAt(0);
601                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
602                     for (int i = 1; i < length; i++) {
603                         c = ianaEncoding.charAt(i);
604                         if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
605                             (c < '0' || c > '9') && c != '.' && c != '_' &&
606                             c != '-') {
607                             return false;
608                         }
609                     }
610                     return true;
611                 }
612             }
613         }
614         return false;
615     } // isValidIANAEncoding(String):boolean
616

617     /**
618      * Returns true if the encoding name is a valid Java encoding.
619      * This method does not verify that there is a decoder available
620      * for this encoding, only that the characters are valid for an
621      * Java encoding name.
622      *
623      * @param javaEncoding The Java encoding name.
624      */

625     public static boolean isValidJavaEncoding(String JavaDoc javaEncoding) {
626         if (javaEncoding != null) {
627             int length = javaEncoding.length();
628             if (length > 0) {
629                 for (int i = 1; i < length; i++) {
630                     char c = javaEncoding.charAt(i);
631                     if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
632                         (c < '0' || c > '9') && c != '.' && c != '_' &&
633                         c != '-') {
634                         return false;
635                     }
636                 }
637                 return true;
638             }
639         }
640         return false;
641     } // isValidIANAEncoding(String):boolean
642

643    /**
644      * Simple check to determine if qname is legal. If it returns false
645      * then <param>str</param> is illegal; if it returns true then
646      * <param>str</param> is legal.
647      */

648     public static boolean isValidQName(String JavaDoc str) {
649        
650        final int colon = str.indexOf(':');
651        
652        if (colon == 0 || colon == str.length() - 1) {
653            return false;
654        }
655        
656        if (colon > 0) {
657            final String JavaDoc prefix = str.substring(0,colon);
658            final String JavaDoc localPart = str.substring(colon+1);
659            return isValidNCName(prefix) && isValidNCName(localPart);
660        }
661        else {
662            return isValidNCName(str);
663        }
664     }
665
666 } // class XMLChar
667
Popular Tags