1 57 58 package org.jboss.axis.utils; 59 60 82 public class XMLChar 83 { 84 85 89 92 private static final byte[] CHARS = new byte[1 << 16]; 93 94 97 public static final int MASK_VALID = 0x01; 98 99 102 public static final int MASK_SPACE = 0x02; 103 104 107 public static final int MASK_NAME_START = 0x04; 108 109 112 public static final int MASK_NAME = 0x08; 113 114 117 public static final int MASK_PUBID = 0x10; 118 119 127 public static final int MASK_CONTENT = 0x20; 128 129 132 public static final int MASK_NCNAME_START = 0x40; 133 134 137 public static final int MASK_NCNAME = 0x80; 138 139 143 static 144 { 145 146 151 int charRange[] = { 152 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD, 153 }; 154 155 159 int spaceChar[] = { 160 0x0020, 0x0009, 0x000D, 0x000A, 161 }; 162 163 168 int nameChar[] = { 169 0x002D, 0x002E, }; 171 172 176 int nameStartChar[] = { 177 0x003A, 0x005F, }; 179 180 184 int pubidChar[] = { 185 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D, 186 0x005F 187 }; 188 189 int pubidRange[] = { 190 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A 191 }; 192 193 197 int letterRange[] = { 198 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6, 200 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E, 201 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217, 202 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1, 203 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C, 204 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4, 205 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5, 206 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA, 207 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7, 208 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6, 209 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990, 210 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD, 211 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10, 212 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36, 213 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B, 214 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3, 215 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28, 216 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D, 217 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95, 218 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA, 219 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10, 220 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61, 221 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3, 222 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10, 223 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E, 224 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88, 225 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB, 226 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47, 227 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103, 228 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155, 229 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF, 230 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9, 231 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D, 232 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC, 233 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB, 234 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B, 235 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C, 236 0xAC00, 0xD7A3, 237 0x3021, 0x3029, 0x4E00, 0x9FA5, 239 }; 240 int letterChar[] = { 241 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5, 243 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C, 244 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0, 245 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E, 246 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E, 247 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B, 248 0x1F5D, 0x1FBE, 0x2126, 0x212E, 249 0x3007, 251 }; 252 253 257 int combiningCharRange[] = { 258 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1, 259 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652, 260 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8, 261 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954, 262 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8, 263 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48, 264 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5, 265 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43, 266 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83, 267 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03, 268 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 269 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD, 270 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48, 271 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9, 272 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84, 273 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7, 274 0x20D0, 0x20DC, 0x302A, 0x302F, 275 }; 276 277 int combiningCharChar[] = { 278 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF, 279 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7, 280 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F, 281 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A, 282 }; 283 284 288 int digitRange[] = { 289 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F, 290 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F, 291 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F, 292 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 293 }; 294 295 299 int extenderRange[] = { 300 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE, 301 }; 302 303 int extenderChar[] = { 304 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005, 305 }; 306 307 311 int specialChar[] = { 312 '<', '&', '\n', '\r', ']', 313 }; 314 315 319 for (int i = 0; i < charRange.length; i += 2) 321 { 322 for (int j = charRange[i]; j <= charRange[i + 1]; j++) 323 { 324 CHARS[j] |= MASK_VALID | MASK_CONTENT; 325 } 326 } 327 328 for (int i = 0; i < specialChar.length; i++) 330 { 331 CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT); 332 } 333 334 for (int i = 0; i < spaceChar.length; i++) 336 { 337 CHARS[spaceChar[i]] |= MASK_SPACE; 338 } 339 340 for (int i = 0; i < nameStartChar.length; i++) 342 { 343 CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME | 344 MASK_NCNAME_START | MASK_NCNAME; 345 } 346 for (int i = 0; i < letterRange.length; i += 2) 347 { 348 for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) 349 { 350 CHARS[j] |= MASK_NAME_START | MASK_NAME | 351 MASK_NCNAME_START | MASK_NCNAME; 352 } 353 } 354 for (int i = 0; i < letterChar.length; i++) 355 { 356 CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME | 357 MASK_NCNAME_START | MASK_NCNAME; 358 } 359 360 for (int i = 0; i < nameChar.length; i++) 362 { 363 CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME; 364 } 365 for (int i = 0; i < digitRange.length; i += 2) 366 { 367 for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) 368 { 369 CHARS[j] |= MASK_NAME | MASK_NCNAME; 370 } 371 } 372 for (int i = 0; i < combiningCharRange.length; i += 2) 373 { 374 for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) 375 { 376 CHARS[j] |= MASK_NAME | MASK_NCNAME; 377 } 378 } 379 for (int i = 0; i < combiningCharChar.length; i++) 380 { 381 CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME; 382 } 383 for (int i = 0; i < extenderRange.length; i += 2) 384 { 385 for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) 386 { 387 CHARS[j] |= MASK_NAME | MASK_NCNAME; 388 } 389 } 390 for (int i = 0; i < extenderChar.length; i++) 391 { 392 CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME; 393 } 394 395 CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME); 397 398 for (int i = 0; i < pubidChar.length; i++) 400 { 401 CHARS[pubidChar[i]] |= MASK_PUBID; 402 } 403 for (int i = 0; i < pubidRange.length; i += 2) 404 { 405 for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) 406 { 407 CHARS[j] |= MASK_PUBID; 408 } 409 } 410 411 } 413 417 422 public static boolean isSupplemental(int c) 423 { 424 return (c >= 0x10000 && c <= 0x10FFFF); 425 } 426 427 434 public static int supplemental(char h, char l) 435 { 436 return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000; 437 } 438 439 444 public static char highSurrogate(int c) 445 { 446 return (char)(((c - 0x00010000) >> 10) + 0xD800); 447 } 448 449 454 public static char lowSurrogate(int c) 455 { 456 return (char)(((c - 0x00010000) & 0x3FF) + 0xDC00); 457 } 458 459 464 public static boolean isHighSurrogate(int c) 465 { 466 return (0xD800 <= c && c <= 0xDBFF); 467 } 468 469 474 public static boolean isLowSurrogate(int c) 475 { 476 return (0xDC00 <= c && c <= 0xDFFF); 477 } 478 479 480 490 public static boolean isValid(int c) 491 { 492 return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) || 493 (0x10000 <= c && c <= 0x10FFFF); 494 } 496 501 public static boolean isInvalid(int c) 502 { 503 return !isValid(c); 504 } 506 511 public static boolean isContent(int c) 512 { 513 return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) || 514 (0x10000 <= c && c <= 0x10FFFF); 515 } 517 523 public static boolean isMarkup(int c) 524 { 525 return c == '<' || c == '&' || c == '%'; 526 } 528 534 public static boolean isSpace(int c) 535 { 536 return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0; 537 } 539 545 public static boolean isXML11Space(int c) 546 { 547 return (c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0) || 548 c == 0x85 || c == 0x2028; 549 } 551 558 public static boolean isNameStart(int c) 559 { 560 return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0; 561 } 563 570 public static boolean isName(int c) 571 { 572 return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0; 573 } 575 582 public static boolean isNCNameStart(int c) 583 { 584 return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0; 585 } 587 594 public static boolean isNCName(int c) 595 { 596 return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0; 597 } 599 606 public static boolean isPubid(int c) 607 { 608 return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0; 609 } 611 614 621 public static boolean isValidName(String name) 622 { 623 if (name.length() == 0) 624 return false; 625 char ch = name.charAt(0); 626 if (isNameStart(ch) == false) 627 return false; 628 for (int i = 1; i < name.length(); i++) 629 { 630 ch = name.charAt(i); 631 if (isName(ch) == false) 632 { 633 return false; 634 } 635 } 636 return true; 637 } 639 640 644 651 public static boolean isValidNCName(String ncName) 652 { 653 if (ncName.length() == 0) 654 return false; 655 char ch = ncName.charAt(0); 656 if (isNCNameStart(ch) == false) 657 return false; 658 for (int i = 1; i < ncName.length(); i++) 659 { 660 ch = ncName.charAt(i); 661 if (isNCName(ch) == false) 662 { 663 return false; 664 } 665 } 666 return true; 667 } 669 672 679 public static boolean isValidNmtoken(String nmtoken) 680 { 681 if (nmtoken.length() == 0) 682 return false; 683 for (int i = 0; i < nmtoken.length(); i++) 684 { 685 char ch = nmtoken.charAt(i); 686 if (!isName(ch)) 687 { 688 return false; 689 } 690 } 691 return true; 692 } 694 695 696 697 698 700 708 public static boolean isValidIANAEncoding(String ianaEncoding) 709 { 710 if (ianaEncoding != null) 711 { 712 int length = ianaEncoding.length(); 713 if (length > 0) 714 { 715 char c = ianaEncoding.charAt(0); 716 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) 717 { 718 for (int i = 1; i < length; i++) 719 { 720 c = ianaEncoding.charAt(i); 721 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 722 (c < '0' || c > '9') && c != '.' && c != '_' && 723 c != '-') 724 { 725 return false; 726 } 727 } 728 return true; 729 } 730 } 731 } 732 return false; 733 } 735 743 public static boolean isValidJavaEncoding(String javaEncoding) 744 { 745 if (javaEncoding != null) 746 { 747 int length = javaEncoding.length(); 748 if (length > 0) 749 { 750 for (int i = 1; i < length; i++) 751 { 752 char c = javaEncoding.charAt(i); 753 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 754 (c < '0' || c > '9') && c != '.' && c != '_' && 755 c != '-') 756 { 757 return false; 758 } 759 } 760 return true; 761 } 762 } 763 return false; 764 } 766 } | Popular Tags |