1 16 19 20 package org.apache.xml.utils; 21 22 45 public class XMLChar { 46 47 51 52 public static final byte[] CHARS = new byte[1 << 16]; 53 54 55 public static final int MASK_VALID = 0x01; 56 57 58 public static final int MASK_SPACE = 0x02; 59 60 61 public static final int MASK_NAME_START = 0x04; 62 63 64 public static final int MASK_NAME = 0x08; 65 66 67 public static final int MASK_PUBID = 0x10; 68 69 77 public static final int MASK_CONTENT = 0x20; 78 79 80 public static final int MASK_NCNAME_START = 0x40; 81 82 83 public static final int MASK_NCNAME = 0x80; 84 85 89 static { 90 91 96 int charRange[] = { 97 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD, 98 }; 99 100 104 int spaceChar[] = { 105 0x0020, 0x0009, 0x000D, 0x000A, 106 }; 107 108 113 int nameChar[] = { 114 0x002D, 0x002E, }; 116 117 121 int nameStartChar[] = { 122 0x003A, 0x005F, }; 124 125 129 int pubidChar[] = { 130 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D, 131 0x005F 132 }; 133 134 int pubidRange[] = { 135 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A 136 }; 137 138 142 int letterRange[] = { 143 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6, 145 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E, 146 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217, 147 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1, 148 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C, 149 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4, 150 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5, 151 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA, 152 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7, 153 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6, 154 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990, 155 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD, 156 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10, 157 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36, 158 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B, 159 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3, 160 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28, 161 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D, 162 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95, 163 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA, 164 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10, 165 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61, 166 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3, 167 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10, 168 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E, 169 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88, 170 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB, 171 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47, 172 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103, 173 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155, 174 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF, 175 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9, 176 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D, 177 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC, 178 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB, 179 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B, 180 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C, 181 0xAC00, 0xD7A3, 182 0x3021, 0x3029, 0x4E00, 0x9FA5, 184 }; 185 int letterChar[] = { 186 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5, 188 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C, 189 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0, 190 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E, 191 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E, 192 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B, 193 0x1F5D, 0x1FBE, 0x2126, 0x212E, 194 0x3007, 196 }; 197 198 202 int combiningCharRange[] = { 203 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1, 204 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652, 205 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8, 206 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954, 207 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8, 208 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48, 209 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5, 210 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43, 211 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83, 212 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03, 213 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 214 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD, 215 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48, 216 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9, 217 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84, 218 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7, 219 0x20D0, 0x20DC, 0x302A, 0x302F, 220 }; 221 222 int combiningCharChar[] = { 223 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF, 224 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7, 225 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F, 226 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A, 227 }; 228 229 233 int digitRange[] = { 234 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F, 235 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F, 236 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F, 237 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 238 }; 239 240 244 int extenderRange[] = { 245 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE, 246 }; 247 248 int extenderChar[] = { 249 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005, 250 }; 251 252 256 int specialChar[] = { 257 '<', '&', '\n', '\r', ']', 258 }; 259 260 264 for (int i = 0; i < charRange.length; i += 2) { 266 for (int j = charRange[i]; j <= charRange[i + 1]; j++) { 267 CHARS[j] |= MASK_VALID | MASK_CONTENT; 268 } 269 } 270 271 for (int i = 0; i < specialChar.length; i++) { 273 CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT); 274 } 275 276 for (int i = 0; i < spaceChar.length; i++) { 278 CHARS[spaceChar[i]] |= MASK_SPACE; 279 } 280 281 for (int i = 0; i < nameStartChar.length; i++) { 283 CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME | 284 MASK_NCNAME_START | MASK_NCNAME; 285 } 286 for (int i = 0; i < letterRange.length; i += 2) { 287 for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) { 288 CHARS[j] |= MASK_NAME_START | MASK_NAME | 289 MASK_NCNAME_START | MASK_NCNAME; 290 } 291 } 292 for (int i = 0; i < letterChar.length; i++) { 293 CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME | 294 MASK_NCNAME_START | MASK_NCNAME; 295 } 296 297 for (int i = 0; i < nameChar.length; i++) { 299 CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME; 300 } 301 for (int i = 0; i < digitRange.length; i += 2) { 302 for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) { 303 CHARS[j] |= MASK_NAME | MASK_NCNAME; 304 } 305 } 306 for (int i = 0; i < combiningCharRange.length; i += 2) { 307 for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) { 308 CHARS[j] |= MASK_NAME | MASK_NCNAME; 309 } 310 } 311 for (int i = 0; i < combiningCharChar.length; i++) { 312 CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME; 313 } 314 for (int i = 0; i < extenderRange.length; i += 2) { 315 for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) { 316 CHARS[j] |= MASK_NAME | MASK_NCNAME; 317 } 318 } 319 for (int i = 0; i < extenderChar.length; i++) { 320 CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME; 321 } 322 323 CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME); 325 326 for (int i = 0; i < pubidChar.length; i++) { 328 CHARS[pubidChar[i]] |= MASK_PUBID; 329 } 330 for (int i = 0; i < pubidRange.length; i += 2) { 331 for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) { 332 CHARS[j] |= MASK_PUBID; 333 } 334 } 335 336 } 338 342 347 public static boolean isSupplemental(int c) { 348 return (c >= 0x10000 && c <= 0x10FFFF); 349 } 350 351 358 public static int supplemental(char h, char l) { 359 return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000; 360 } 361 362 367 public static char highSurrogate(int c) { 368 return (char) (((c - 0x00010000) >> 10) + 0xD800); 369 } 370 371 376 public static char lowSurrogate(int c) { 377 return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00); 378 } 379 380 385 public static boolean isHighSurrogate(int c) { 386 return (0xD800 <= c && c <= 0xDBFF); 387 } 388 389 394 public static boolean isLowSurrogate(int c) { 395 return (0xDC00 <= c && c <= 0xDFFF); 396 } 397 398 399 409 public static boolean isValid(int c) { 410 return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) || 411 (0x10000 <= c && c <= 0x10FFFF); 412 } 414 419 public static boolean isInvalid(int c) { 420 return !isValid(c); 421 } 423 428 public static boolean isContent(int c) { 429 return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) || 430 (0x10000 <= c && c <= 0x10FFFF); 431 } 433 439 public static boolean isMarkup(int c) { 440 return c == '<' || c == '&' || c == '%'; 441 } 443 449 public static boolean isSpace(int c) { 450 return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0; 451 } 453 460 public static boolean isNameStart(int c) { 461 return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0; 462 } 464 471 public static boolean isName(int c) { 472 return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0; 473 } 475 482 public static boolean isNCNameStart(int c) { 483 return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0; 484 } 486 493 public static boolean isNCName(int c) { 494 return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0; 495 } 497 504 public static boolean isPubid(int c) { 505 return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0; 506 } 508 511 518 public static boolean isValidName(String name) { 519 if (name.length() == 0) 520 return false; 521 char ch = name.charAt(0); 522 if( isNameStart(ch) == false) 523 return false; 524 for (int i = 1; i < name.length(); i++ ) { 525 ch = name.charAt(i); 526 if( isName( ch ) == false ){ 527 return false; 528 } 529 } 530 return true; 531 } 533 534 538 545 public static boolean isValidNCName(String ncName) { 546 if (ncName.length() == 0) 547 return false; 548 char ch = ncName.charAt(0); 549 if( isNCNameStart(ch) == false) 550 return false; 551 for (int i = 1; i < ncName.length(); i++ ) { 552 ch = ncName.charAt(i); 553 if( isNCName( ch ) == false ){ 554 return false; 555 } 556 } 557 return true; 558 } 560 563 570 public static boolean isValidNmtoken(String nmtoken) { 571 if (nmtoken.length() == 0) 572 return false; 573 for (int i = 0; i < nmtoken.length(); i++ ) { 574 char ch = nmtoken.charAt(i); 575 if( ! isName( ch ) ){ 576 return false; 577 } 578 } 579 return true; 580 } 582 583 584 585 586 588 596 public static boolean isValidIANAEncoding(String ianaEncoding) { 597 if (ianaEncoding != null) { 598 int length = ianaEncoding.length(); 599 if (length > 0) { 600 char c = ianaEncoding.charAt(0); 601 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 602 for (int i = 1; i < length; i++) { 603 c = ianaEncoding.charAt(i); 604 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 605 (c < '0' || c > '9') && c != '.' && c != '_' && 606 c != '-') { 607 return false; 608 } 609 } 610 return true; 611 } 612 } 613 } 614 return false; 615 } 617 625 public static boolean isValidJavaEncoding(String javaEncoding) { 626 if (javaEncoding != null) { 627 int length = javaEncoding.length(); 628 if (length > 0) { 629 for (int i = 1; i < length; i++) { 630 char c = javaEncoding.charAt(i); 631 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && 632 (c < '0' || c > '9') && c != '.' && c != '_' && 633 c != '-') { 634 return false; 635 } 636 } 637 return true; 638 } 639 } 640 return false; 641 } 643 648 public static boolean isValidQName(String str) { 649 650 final int colon = str.indexOf(':'); 651 652 if (colon == 0 || colon == str.length() - 1) { 653 return false; 654 } 655 656 if (colon > 0) { 657 final String prefix = str.substring(0,colon); 658 final String localPart = str.substring(colon+1); 659 return isValidNCName(prefix) && isValidNCName(localPart); 660 } 661 else { 662 return isValidNCName(str); 663 } 664 } 665 666 } | Popular Tags |