1 package net.sf.saxon.om; 2 3 5 61 62 85 public class XMLChar { 86 87 90 94 95 private static final byte[] CHARS = new byte[1 << 16]; 96 97 98 public static final int MASK_VALID = 0x01; 99 100 101 public static final int MASK_SPACE = 0x02; 102 103 104 public static final int MASK_NAME_START = 0x04; 105 106 107 public static final int MASK_NAME = 0x08; 108 109 110 public static final int MASK_PUBID = 0x10; 111 112 120 public static final int MASK_CONTENT = 0x20; 121 122 123 public static final int MASK_NCNAME_START = 0x40; 124 125 126 public static final int MASK_NCNAME = 0x80; 127 128 private XMLChar() { 129 } 130 131 135 static { 136 137 142 int charRange[] = { 143 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD, 144 }; 145 146 150 int spaceChar[] = { 151 0x0020, 0x0009, 0x000D, 0x000A, 152 }; 153 154 159 int nameChar[] = { 160 0x002D, 0x002E, }; 162 163 167 int nameStartChar[] = { 168 0x003A, 0x005F, }; 170 171 175 int pubidChar[] = { 176 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D, 177 0x005F 178 }; 179 180 int pubidRange[] = { 181 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A 182 }; 183 184 188 int letterRange[] = { 189 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6, 191 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E, 192 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217, 193 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1, 194 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C, 195 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4, 196 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5, 197 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA, 198 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7, 199 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6, 200 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990, 201 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD, 202 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10, 203 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36, 204 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B, 205 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3, 206 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28, 207 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D, 208 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95, 209 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA, 210 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10, 211 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61, 212 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3, 213 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10, 214 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E, 215 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88, 216 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB, 217 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47, 218 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103, 219 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155, 220 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF, 221 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9, 222 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D, 223 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC, 224 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB, 225 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B, 226 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C, 227 0xAC00, 0xD7A3, 228 0x3021, 0x3029, 0x4E00, 0x9FA5, 230 }; 231 int letterChar[] = { 232 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5, 234 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C, 235 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0, 236 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E, 237 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E, 238 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B, 239 0x1F5D, 0x1FBE, 0x2126, 0x212E, 240 0x3007, 242 }; 243 244 248 int combiningCharRange[] = { 249 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1, 250 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652, 251 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8, 252 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954, 253 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8, 254 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48, 255 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5, 256 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43, 257 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83, 258 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03, 259 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 260 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD, 261 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48, 262 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9, 263 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84, 264 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7, 265 0x20D0, 0x20DC, 0x302A, 0x302F, 266 }; 267 268 int combiningCharChar[] = { 269 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF, 270 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7, 271 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F, 272 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A, 273 }; 274 275 279 int digitRange[] = { 280 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F, 281 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F, 282 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F, 283 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 284 }; 285 286 290 int extenderRange[] = { 291 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE, 292 }; 293 294 int extenderChar[] = { 295 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005, 296 }; 297 298 302 int specialChar[] = { 303 '<', '&', '\n', '\r', ']', 304 }; 305 306 310 for (int i = 0; i < charRange.length; i += 2) { 312 for (int j = charRange[i]; j <= charRange[i + 1]; j++) { 313 CHARS[j] |= MASK_VALID | MASK_CONTENT; 314 } 315 } 316 317 for (int i = 0; i < specialChar.length; i++) { 319 CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT); 320 } 321 322 for (int i = 0; i < spaceChar.length; i++) { 324 CHARS[spaceChar[i]] |= MASK_SPACE; 325 } 326 327 for (int i = 0; i < nameStartChar.length; i++) { 329 CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME | 330 MASK_NCNAME_START | MASK_NCNAME; 331 } 332 for (int i = 0; i < letterRange.length; i += 2) { 333 for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) { 334 CHARS[j] |= MASK_NAME_START | MASK_NAME | 335 MASK_NCNAME_START | MASK_NCNAME; 336 } 337 } 338 for (int i = 0; i < letterChar.length; i++) { 339 CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME | 340 MASK_NCNAME_START | MASK_NCNAME; 341 } 342 343 for (int i = 0; i < nameChar.length; i++) { 345 CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME; 346 } 347 for (int i = 0; i < digitRange.length; i += 2) { 348 for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) { 349 CHARS[j] |= MASK_NAME | MASK_NCNAME; 350 } 351 } 352 for (int i = 0; i < combiningCharRange.length; i += 2) { 353 for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) { 354 CHARS[j] |= MASK_NAME | MASK_NCNAME; 355 } 356 } 357 for (int i = 0; i < combiningCharChar.length; i++) { 358 CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME; 359 } 360 for (int i = 0; i < extenderRange.length; i += 2) { 361 for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) { 362 CHARS[j] |= MASK_NAME | MASK_NCNAME; 363 } 364 } 365 for (int i = 0; i < extenderChar.length; i++) { 366 CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME; 367 } 368 369 CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME); 371 372 for (int i = 0; i < pubidChar.length; i++) { 374 CHARS[pubidChar[i]] |= MASK_PUBID; 375 } 376 for (int i = 0; i < pubidRange.length; i += 2) { 377 for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) { 378 CHARS[j] |= MASK_PUBID; 379 } 380 } 381 382 } 384 388 393 public static boolean isSupplemental(int c) { 394 return (c >= 0x10000 && c <= 0x10FFFF); 395 } 396 397 404 public static int supplemental(char h, char l) { 405 return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000; 406 } 407 408 413 public static char highSurrogate(int c) { 414 return (char) (((c - 0x00010000) >> 10) + 0xD800); 415 } 416 417 422 public static char lowSurrogate(int c) { 423 return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00); 424 } 425 426 429 430 public static boolean isSurrogate(int c) { 431 return (c & 0xF800) == 0xD800; 432 } 433 434 439 public static boolean isHighSurrogate(int c) { 440 return (c & 0xFC00) == 0xD800; 442 } 443 444 449 public static boolean isLowSurrogate(int c) { 450 return (c & 0xFC00) == 0xDC00; 452 } 453 454 455 465 public static boolean isValid(int c) { 466 return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) || 467 (0x10000 <= c && c <= 0x10FFFF); 468 } 470 475 public static boolean isInvalid(int c) { 476 return !isValid(c); 477 } 479 484 public static boolean isContent(int c) { 485 return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) || 486 (0x10000 <= c && c <= 0x10FFFF); 487 } 489 495 public static boolean isMarkup(int c) { 496 return c == '<' || c == '&' || c == '%'; 497 } 499 505 public static boolean isSpace(int c) { 506 return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0; 507 } 509 516 public static boolean isNameStart(int c) { 517 return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0; 518 } 520 527 public static boolean isName(int c) { 528 return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0; 529 } 531 538 public static boolean isNCNameStart(int c) { 539 return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0; 540 } 542 549 public static boolean isNCName(int c) { 550 return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0; 551 } 553 560 public static boolean isPubid(int c) { 561 return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0; 562 } 564 567 574 public static boolean isValidName(String name) { 575 if (name.length() == 0) 576 return false; 577 char ch = name.charAt(0); 578 if( isNameStart(ch) == false) 579 return false; 580 for (int i = 1; i < name.length(); i++ ) { 581 ch = name.charAt(i); 582 if( isName( ch ) == false ){ 583 return false; 584 } 585 } 586 return true; 587 } 589 590 594 601 public static boolean isValidNCName(CharSequence ncName) { 602 if (ncName.length() == 0) 603 return false; 604 char ch = ncName.charAt(0); 605 if( isNCNameStart(ch) == false) 606 return false; 607 for (int i = 1; i < ncName.length(); i++ ) { 608 ch = ncName.charAt(i); 609 if( isNCName( ch ) == false ){ 610 return false; 611 } 612 } 613 return true; 614 } 616 619 626 public static boolean isValidNmtoken(String nmtoken) { 627 if (nmtoken.length() == 0) 628 return false; 629 for (int i = 0; i < nmtoken.length(); i++ ) { 630 char ch = nmtoken.charAt(i); 631 if( ! isName( ch ) ){ 632 return false; 633 } 634 } 635 return true; 636 } 638 639 640 641 642 644 652 public static boolean isValidIANAEncoding(String ianaEncoding) { 653 if (ianaEncoding != null) { 654 int length = ianaEncoding.length(); 655 if (length > 0) { 656 char c = ianaEncoding.charAt(0); 657 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 658 for (int i = 1; i < length; i++) { 659 c = ianaEncoding.charAt(i); 660 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 661 (c < '0' || c > '9') && c != '.' && c != '_' && 662 c != '-') { 663 return false; 664 } 665 } 666 return true; 667 } 668 } 669 } 670 return false; 671 } 673 681 public static boolean isValidJavaEncoding(String javaEncoding) { 682 if (javaEncoding != null) { 683 int length = javaEncoding.length(); 684 if (length > 0) { 685 for (int i = 1; i < length; i++) { 686 char c = javaEncoding.charAt(i); 687 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 688 (c < '0' || c > '9') && c != '.' && c != '_' && 689 c != '-') { 690 return false; 691 } 692 } 693 return true; 694 } 695 } 696 return false; 697 } 699 700 } | Popular Tags |