1 16 17 package org.apache.axis.utils; 18 19 40 public class XMLChar { 41 42 46 47 private static final byte[] CHARS = new byte[1 << 16]; 48 49 50 public static final int MASK_VALID = 0x01; 51 52 53 public static final int MASK_SPACE = 0x02; 54 55 56 public static final int MASK_NAME_START = 0x04; 57 58 59 public static final int MASK_NAME = 0x08; 60 61 62 public static final int MASK_PUBID = 0x10; 63 64 72 public static final int MASK_CONTENT = 0x20; 73 74 75 public static final int MASK_NCNAME_START = 0x40; 76 77 78 public static final int MASK_NCNAME = 0x80; 79 80 84 static { 85 86 91 int charRange[] = { 92 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD, 93 }; 94 95 99 int spaceChar[] = { 100 0x0020, 0x0009, 0x000D, 0x000A, 101 }; 102 103 108 int nameChar[] = { 109 0x002D, 0x002E, }; 111 112 116 int nameStartChar[] = { 117 0x003A, 0x005F, }; 119 120 124 int pubidChar[] = { 125 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D, 126 0x005F 127 }; 128 129 int pubidRange[] = { 130 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A 131 }; 132 133 137 int letterRange[] = { 138 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6, 140 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E, 141 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217, 142 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1, 143 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C, 144 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4, 145 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5, 146 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA, 147 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7, 148 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6, 149 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990, 150 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD, 151 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10, 152 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36, 153 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B, 154 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3, 155 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28, 156 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D, 157 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95, 158 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA, 159 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10, 160 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61, 161 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3, 162 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10, 163 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E, 164 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88, 165 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB, 166 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47, 167 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103, 168 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155, 169 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF, 170 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9, 171 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D, 172 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC, 173 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB, 174 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B, 175 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C, 176 0xAC00, 0xD7A3, 177 0x3021, 0x3029, 0x4E00, 0x9FA5, 179 }; 180 int letterChar[] = { 181 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5, 183 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C, 184 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0, 185 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E, 186 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E, 187 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B, 188 0x1F5D, 0x1FBE, 0x2126, 0x212E, 189 0x3007, 191 }; 192 193 197 int combiningCharRange[] = { 198 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1, 199 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652, 200 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8, 201 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954, 202 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8, 203 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48, 204 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5, 205 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43, 206 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83, 207 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03, 208 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 209 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD, 210 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48, 211 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9, 212 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84, 213 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7, 214 0x20D0, 0x20DC, 0x302A, 0x302F, 215 }; 216 217 int combiningCharChar[] = { 218 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF, 219 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7, 220 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F, 221 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A, 222 }; 223 224 228 int digitRange[] = { 229 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F, 230 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F, 231 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F, 232 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 233 }; 234 235 239 int extenderRange[] = { 240 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE, 241 }; 242 243 int extenderChar[] = { 244 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005, 245 }; 246 247 251 int specialChar[] = { 252 '<', '&', '\n', '\r', ']', 253 }; 254 255 259 for (int i = 0; i < charRange.length; i += 2) { 261 for (int j = charRange[i]; j <= charRange[i + 1]; j++) { 262 CHARS[j] |= MASK_VALID | MASK_CONTENT; 263 } 264 } 265 266 for (int i = 0; i < specialChar.length; i++) { 268 CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT); 269 } 270 271 for (int i = 0; i < spaceChar.length; i++) { 273 CHARS[spaceChar[i]] |= MASK_SPACE; 274 } 275 276 for (int i = 0; i < nameStartChar.length; i++) { 278 CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME | 279 MASK_NCNAME_START | MASK_NCNAME; 280 } 281 for (int i = 0; i < letterRange.length; i += 2) { 282 for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) { 283 CHARS[j] |= MASK_NAME_START | MASK_NAME | 284 MASK_NCNAME_START | MASK_NCNAME; 285 } 286 } 287 for (int i = 0; i < letterChar.length; i++) { 288 CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME | 289 MASK_NCNAME_START | MASK_NCNAME; 290 } 291 292 for (int i = 0; i < nameChar.length; i++) { 294 CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME; 295 } 296 for (int i = 0; i < digitRange.length; i += 2) { 297 for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) { 298 CHARS[j] |= MASK_NAME | MASK_NCNAME; 299 } 300 } 301 for (int i = 0; i < combiningCharRange.length; i += 2) { 302 for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) { 303 CHARS[j] |= MASK_NAME | MASK_NCNAME; 304 } 305 } 306 for (int i = 0; i < combiningCharChar.length; i++) { 307 CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME; 308 } 309 for (int i = 0; i < extenderRange.length; i += 2) { 310 for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) { 311 CHARS[j] |= MASK_NAME | MASK_NCNAME; 312 } 313 } 314 for (int i = 0; i < extenderChar.length; i++) { 315 CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME; 316 } 317 318 CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME); 320 321 for (int i = 0; i < pubidChar.length; i++) { 323 CHARS[pubidChar[i]] |= MASK_PUBID; 324 } 325 for (int i = 0; i < pubidRange.length; i += 2) { 326 for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) { 327 CHARS[j] |= MASK_PUBID; 328 } 329 } 330 331 } 333 337 342 public static boolean isSupplemental(int c) { 343 return (c >= 0x10000 && c <= 0x10FFFF); 344 } 345 346 353 public static int supplemental(char h, char l) { 354 return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000; 355 } 356 357 362 public static char highSurrogate(int c) { 363 return (char) (((c - 0x00010000) >> 10) + 0xD800); 364 } 365 366 371 public static char lowSurrogate(int c) { 372 return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00); 373 } 374 375 380 public static boolean isHighSurrogate(int c) { 381 return (0xD800 <= c && c <= 0xDBFF); 382 } 383 384 389 public static boolean isLowSurrogate(int c) { 390 return (0xDC00 <= c && c <= 0xDFFF); 391 } 392 393 394 404 public static boolean isValid(int c) { 405 return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) || 406 (0x10000 <= c && c <= 0x10FFFF); 407 } 409 414 public static boolean isInvalid(int c) { 415 return !isValid(c); 416 } 418 423 public static boolean isContent(int c) { 424 return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) || 425 (0x10000 <= c && c <= 0x10FFFF); 426 } 428 434 public static boolean isMarkup(int c) { 435 return c == '<' || c == '&' || c == '%'; 436 } 438 444 public static boolean isSpace(int c) { 445 return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0; 446 } 448 454 public static boolean isXML11Space(int c) { 455 return (c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0) || 456 c == 0x85 || c == 0x2028; 457 } 459 466 public static boolean isNameStart(int c) { 467 return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0; 468 } 470 477 public static boolean isName(int c) { 478 return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0; 479 } 481 488 public static boolean isNCNameStart(int c) { 489 return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0; 490 } 492 499 public static boolean isNCName(int c) { 500 return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0; 501 } 503 510 public static boolean isPubid(int c) { 511 return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0; 512 } 514 517 524 public static boolean isValidName(String name) { 525 if (name.length() == 0) 526 return false; 527 char ch = name.charAt(0); 528 if( isNameStart(ch) == false) 529 return false; 530 for (int i = 1; i < name.length(); i++ ) { 531 ch = name.charAt(i); 532 if( isName( ch ) == false ){ 533 return false; 534 } 535 } 536 return true; 537 } 539 540 544 551 public static boolean isValidNCName(String ncName) { 552 if (ncName.length() == 0) 553 return false; 554 char ch = ncName.charAt(0); 555 if( isNCNameStart(ch) == false) 556 return false; 557 for (int i = 1; i < ncName.length(); i++ ) { 558 ch = ncName.charAt(i); 559 if( isNCName( ch ) == false ){ 560 return false; 561 } 562 } 563 return true; 564 } 566 569 576 public static boolean isValidNmtoken(String nmtoken) { 577 if (nmtoken.length() == 0) 578 return false; 579 for (int i = 0; i < nmtoken.length(); i++ ) { 580 char ch = nmtoken.charAt(i); 581 if( ! isName( ch ) ){ 582 return false; 583 } 584 } 585 return true; 586 } 588 589 590 591 592 594 602 public static boolean isValidIANAEncoding(String ianaEncoding) { 603 if (ianaEncoding != null) { 604 int length = ianaEncoding.length(); 605 if (length > 0) { 606 char c = ianaEncoding.charAt(0); 607 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 608 for (int i = 1; i < length; i++) { 609 c = ianaEncoding.charAt(i); 610 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 611 (c < '0' || c > '9') && c != '.' && c != '_' && 612 c != '-') { 613 return false; 614 } 615 } 616 return true; 617 } 618 } 619 } 620 return false; 621 } 623 631 public static boolean isValidJavaEncoding(String javaEncoding) { 632 if (javaEncoding != null) { 633 int length = javaEncoding.length(); 634 if (length > 0) { 635 for (int i = 1; i < length; i++) { 636 char c = javaEncoding.charAt(i); 637 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 638 (c < '0' || c > '9') && c != '.' && c != '_' && 639 c != '-') { 640 return false; 641 } 642 } 643 return true; 644 } 645 } 646 return false; 647 } 649 } | Popular Tags |