KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xml > internal > utils > XML11Char


1 /*
2  * Copyright 1999-2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package com.sun.org.apache.xml.internal.utils;
18
19 import java.util.Arrays JavaDoc;
20
21
22 /**
23  * THIS IS A COPY OF THE XERCES-2J CLASS com.sun.org.apache.xerces.internal.utls.XMLChar
24  *
25  * This class defines the basic properties of characters in XML 1.1. The data
26  * in this class can be used to verify that a character is a valid
27  * XML 1.1 character or if the character is a space, name start, or name
28  * character.
29  * <p>
30  * A series of convenience methods are supplied to ease the burden
31  * of the developer. Using the character as an index into the <code>XML11CHARS</code>
32  * array and applying the appropriate mask flag (e.g.
33  * <code>MASK_VALID</code>), yields the same results as calling the
34  * convenience methods. There is one exception: check the comments
35  * for the <code>isValid</code> method for details.
36  *
37  * @version $Id: XML11Char.java,v 1.1.4.1 2005/09/08 11:11:21 suresh_emailid Exp $
38  */

39 public class XML11Char {
40
41     //
42
// Constants
43
//
44

45     /** Character flags for XML 1.1. */
46     private static final byte XML11CHARS [] = new byte [1 << 16];
47
48     /** XML 1.1 Valid character mask. */
49     public static final int MASK_XML11_VALID = 0x01;
50
51     /** XML 1.1 Space character mask. */
52     public static final int MASK_XML11_SPACE = 0x02;
53
54     /** XML 1.1 Name start character mask. */
55     public static final int MASK_XML11_NAME_START = 0x04;
56
57     /** XML 1.1 Name character mask. */
58     public static final int MASK_XML11_NAME = 0x08;
59
60     /** XML 1.1 control character mask */
61     public static final int MASK_XML11_CONTROL = 0x10;
62
63     /** XML 1.1 content for external entities (valid - "special" chars - control chars) */
64     public static final int MASK_XML11_CONTENT = 0x20;
65
66     /** XML namespaces 1.1 NCNameStart */
67     public static final int MASK_XML11_NCNAME_START = 0x40;
68
69     /** XML namespaces 1.1 NCName */
70     public static final int MASK_XML11_NCNAME = 0x80;
71     
72     /** XML 1.1 content for internal entities (valid - "special" chars) */
73     public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT;
74
75     //
76
// Static initialization
77
//
78

79     static {
80         
81         // Initializing the Character Flag Array
82
// Code generated by: XML11CharGenerator.
83

84         Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17
85
XML11CHARS[9] = 35;
86         XML11CHARS[10] = 3;
87         Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17
88
XML11CHARS[13] = 3;
89         Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17
90
XML11CHARS[32] = 35;
91         Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33
92
XML11CHARS[38] = 1;
93         Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33
94
Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87
95
XML11CHARS[47] = 33;
96         Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87
97
XML11CHARS[58] = 45;
98         XML11CHARS[59] = 33;
99         XML11CHARS[60] = 1;
100         Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33
101
Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19
102
Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33
103
XML11CHARS[93] = 1;
104         XML11CHARS[94] = 33;
105         XML11CHARS[95] = -19;
106         XML11CHARS[96] = 33;
107         Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19
108
Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33
109
Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17
110
XML11CHARS[133] = 35;
111         Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17
112
Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33
113
XML11CHARS[183] = -87;
114         Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33
115
Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19
116
XML11CHARS[215] = 33;
117         Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19
118
XML11CHARS[247] = 33;
119         Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19
120
Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87
121
Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19
122
XML11CHARS[894] = 33;
123         Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19
124
Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33
125
Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19
126
Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33
127
XML11CHARS[8232] = 35;
128         Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33
129
Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87
130
Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33
131
Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19
132
Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33
133
Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19
134
Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33
135
Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19
136
Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33
137
Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19
138
Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33
139
Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19
140

141     } // <clinit>()
142

143     //
144
// Public static methods
145
//
146

147     /**
148      * Returns true if the specified character is a space character
149      * as amdended in the XML 1.1 specification.
150      *
151      * @param c The character to check.
152      */

153     public static boolean isXML11Space(int c) {
154         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
155     } // isXML11Space(int):boolean
156

157     /**
158      * Returns true if the specified character is valid. This method
159      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
160      * <p>
161      * If the program chooses to apply the mask directly to the
162      * <code>XML11CHARS</code> array, then they are responsible for checking
163      * the surrogate character range.
164      *
165      * @param c The character to check.
166      */

167     public static boolean isXML11Valid(int c) {
168         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
169                 || (0x10000 <= c && c <= 0x10FFFF);
170     } // isXML11Valid(int):boolean
171

172     /**
173      * Returns true if the specified character is invalid.
174      *
175      * @param c The character to check.
176      */

177     public static boolean isXML11Invalid(int c) {
178         return !isXML11Valid(c);
179     } // isXML11Invalid(int):boolean
180

181     /**
182      * Returns true if the specified character is valid and permitted outside
183      * of a character reference.
184      * That is, this method will return false for the same set as
185      * isXML11Valid, except it also reports false for "control characters".
186      *
187      * @param c The character to check.
188      */

189     public static boolean isXML11ValidLiteral(int c) {
190         return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0))
191             || (0x10000 <= c && c <= 0x10FFFF));
192     } // isXML11ValidLiteral(int):boolean
193

194     /**
195      * Returns true if the specified character can be considered
196      * content in an external parsed entity.
197      *
198      * @param c The character to check.
199      */

200     public static boolean isXML11Content(int c) {
201         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) ||
202                (0x10000 <= c && c <= 0x10FFFF);
203     } // isXML11Content(int):boolean
204

205     /**
206      * Returns true if the specified character can be considered
207      * content in an internal parsed entity.
208      *
209      * @param c The character to check.
210      */

211     public static boolean isXML11InternalEntityContent(int c) {
212         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) ||
213                (0x10000 <= c && c <= 0x10FFFF);
214     } // isXML11InternalEntityContent(int):boolean
215

216     /**
217      * Returns true if the specified character is a valid name start
218      * character as defined by production [4] in the XML 1.1
219      * specification.
220      *
221      * @param c The character to check.
222      */

223     public static boolean isXML11NameStart(int c) {
224         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
225             || (0x10000 <= c && c < 0xF0000);
226     } // isXML11NameStart(int):boolean
227

228     /**
229      * Returns true if the specified character is a valid name
230      * character as defined by production [4a] in the XML 1.1
231      * specification.
232      *
233      * @param c The character to check.
234      */

235     public static boolean isXML11Name(int c) {
236         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
237             || (c >= 0x10000 && c < 0xF0000);
238     } // isXML11Name(int):boolean
239

240     /**
241      * Returns true if the specified character is a valid NCName start
242      * character as defined by production [4] in Namespaces in XML
243      * 1.1 recommendation.
244      *
245      * @param c The character to check.
246      */

247     public static boolean isXML11NCNameStart(int c) {
248         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
249             || (0x10000 <= c && c < 0xF0000);
250     } // isXML11NCNameStart(int):boolean
251

252     /**
253      * Returns true if the specified character is a valid NCName
254      * character as defined by production [5] in Namespaces in XML
255      * 1.1 recommendation.
256      *
257      * @param c The character to check.
258      */

259     public static boolean isXML11NCName(int c) {
260         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
261             || (0x10000 <= c && c < 0xF0000);
262     } // isXML11NCName(int):boolean
263

264     /**
265      * Returns whether the given character is a valid
266      * high surrogate for a name character. This includes
267      * all high surrogates for characters [0x10000-0xEFFFF].
268      * In other words everything excluding planes 15 and 16.
269      *
270      * @param c The character to check.
271      */

272     public static boolean isXML11NameHighSurrogate(int c) {
273         return (0xD800 <= c && c <= 0xDB7F);
274     }
275
276     /*
277      * [5] Name ::= NameStartChar NameChar*
278      */

279     /**
280      * Check to see if a string is a valid Name according to [5]
281      * in the XML 1.1 Recommendation
282      *
283      * @param name string to check
284      * @return true if name is a valid Name
285      */

286     public static boolean isXML11ValidName(String JavaDoc name) {
287         int length = name.length();
288         if (length == 0)
289             return false;
290         int i = 1;
291         char ch = name.charAt(0);
292         if( !isXML11NameStart(ch) ) {
293             if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
294                 char ch2 = name.charAt(1);
295                 if ( !XMLChar.isLowSurrogate(ch2) ||
296                      !isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
297                     return false;
298                 }
299                 i = 2;
300             }
301             else {
302                 return false;
303             }
304         }
305         while (i < length) {
306             ch = name.charAt(i);
307             if ( !isXML11Name(ch) ) {
308                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
309                     char ch2 = name.charAt(i);
310                     if ( !XMLChar.isLowSurrogate(ch2) ||
311                          !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
312                         return false;
313                     }
314                 }
315                 else {
316                     return false;
317                 }
318             }
319             ++i;
320         }
321         return true;
322     } // isXML11ValidName(String):boolean
323

324
325     /*
326      * from the namespace 1.1 rec
327      * [4] NCName ::= NCNameStartChar NCNameChar*
328      */

329     /**
330      * Check to see if a string is a valid NCName according to [4]
331      * from the XML Namespaces 1.1 Recommendation
332      *
333      * @param ncName string to check
334      * @return true if name is a valid NCName
335      */

336     public static boolean isXML11ValidNCName(String JavaDoc ncName) {
337         int length = ncName.length();
338         if (length == 0)
339             return false;
340         int i = 1;
341         char ch = ncName.charAt(0);
342         if( !isXML11NCNameStart(ch) ) {
343             if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
344                 char ch2 = ncName.charAt(1);
345                 if ( !XMLChar.isLowSurrogate(ch2) ||
346                      !isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
347                     return false;
348                 }
349                 i = 2;
350             }
351             else {
352                 return false;
353             }
354         }
355         while (i < length) {
356             ch = ncName.charAt(i);
357             if ( !isXML11NCName(ch) ) {
358                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
359                     char ch2 = ncName.charAt(i);
360                     if ( !XMLChar.isLowSurrogate(ch2) ||
361                          !isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
362                         return false;
363                     }
364                 }
365                 else {
366                     return false;
367                 }
368             }
369             ++i;
370         }
371         return true;
372     } // isXML11ValidNCName(String):boolean
373

374     /*
375      * [7] Nmtoken ::= (NameChar)+
376      */

377     /**
378      * Check to see if a string is a valid Nmtoken according to [7]
379      * in the XML 1.1 Recommendation
380      *
381      * @param nmtoken string to check
382      * @return true if nmtoken is a valid Nmtoken
383      */

384     public static boolean isXML11ValidNmtoken(String JavaDoc nmtoken) {
385         int length = nmtoken.length();
386         if (length == 0)
387             return false;
388         for (int i = 0; i < length; ++i ) {
389             char ch = nmtoken.charAt(i);
390             if( !isXML11Name(ch) ) {
391                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
392                     char ch2 = nmtoken.charAt(i);
393                     if ( !XMLChar.isLowSurrogate(ch2) ||
394                          !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
395                         return false;
396                     }
397                 }
398                 else {
399                     return false;
400                 }
401             }
402         }
403         return true;
404     } // isXML11ValidName(String):boolean
405

406     /**
407       * Simple check to determine if qname is legal. If it returns false
408       * then <param>str</param> is illegal; if it returns true then
409       * <param>str</param> is legal.
410       */

411      public static boolean isXML11ValidQName(String JavaDoc str) {
412
413         final int colon = str.indexOf(':');
414
415         if (colon == 0 || colon == str.length() - 1) {
416             return false;
417         }
418        
419         if (colon > 0) {
420             final String JavaDoc prefix = str.substring(0,colon);
421             final String JavaDoc localPart = str.substring(colon+1);
422             return isXML11ValidNCName(prefix) && isXML11ValidNCName(localPart);
423         }
424         else {
425             return isXML11ValidNCName(str);
426         }
427      }
428
429 } // class XML11Char
430

431
Popular Tags