1 4 package org.znerd.xmlenc; 5 6 14 public final class XMLChecker extends Object { 15 16 20 37 public static final void checkS(String s) 38 throws NullPointerException { 39 checkS(s.toCharArray(), 0, s.length()); 40 } 41 42 71 public static final void checkS(char[] ch, int start, int length) 72 throws NullPointerException , IndexOutOfBoundsException , InvalidXMLException { 73 74 for (int i = start; i < length; i++) { 76 int c = (int) ch[i]; 77 78 if (c != 0x20 && c != 0x9 && c != 0xD && c != 0xA) { 79 throw new InvalidXMLException("The character 0x" + Integer.toHexString(c) + " is not valid for the 'S' production (white space)."); 80 } 81 } 82 } 83 84 100 public static final void checkName(String name) 101 throws NullPointerException , InvalidXMLException { 102 checkName(name.toCharArray(), 0, name.length()); 103 } 104 105 121 public static final boolean isName(String name) 122 throws NullPointerException { 123 try { 124 checkName(name); 125 return true; 126 } catch (InvalidXMLException exception) { 127 return false; 128 } 129 } 130 131 160 public static final void checkName(char[] ch, int start, int length) 161 throws NullPointerException , IndexOutOfBoundsException , InvalidXMLException { 162 163 if (length < 1) { 165 throw new InvalidXMLException("An empty string does not match the 'Name' production."); 166 } 167 168 int i = start; 170 char c = ch[i]; 171 if (c != '_' && c != ':' && !isLetter(c)) { 172 throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is invalid as a starting character in the 'Name' production."); 173 } 174 175 for (i++; i < length; i++) { 177 c = ch[i]; 178 179 if (!isNameChar(c)) { 180 throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid for the 'Name' production."); 181 } 182 } 183 } 184 185 199 private static final boolean isNameChar(char c) { 200 return c == '.' 201 || c == '-' 202 || c == '_' 203 || c == ':' 204 || isDigit(c) 205 || isLetter(c) 206 || isCombiningChar(c) 207 || isExtender(c); 208 } 209 210 224 private static final boolean isLetter(char c) { 225 return isBaseChar(c) || isIdeographic(c); 226 } 227 228 242 private static final boolean isBaseChar(char c) { 243 int n = (int) c; 244 return (n >= 0x0041 && n <= 0x005A) 245 || (n >= 0x0061 && n <= 0x007A) 246 || (n >= 0x00C0 && n <= 0x00D6) 247 || (n >= 0x00D8 && n <= 0x00F6) 248 || (n >= 0x00F8 && n <= 0x00FF) 249 || (n >= 0x0100 && n <= 0x0131) 250 || (n >= 0x0134 && n <= 0x013E) 251 || (n >= 0x0141 && n <= 0x0148) 252 || (n >= 0x014A && n <= 0x017E) 253 || (n >= 0x0180 && n <= 0x01C3) 254 || (n >= 0x01CD && n <= 0x01F0) 255 || (n >= 0x01F4 && n <= 0x01F5) 256 || (n >= 0x01FA && n <= 0x0217) 257 || (n >= 0x0250 && n <= 0x02A8) 258 || (n >= 0x02BB && n <= 0x02C1) 259 || (n == 0x0386) 260 || (n >= 0x0388 && n <= 0x038A) 261 || (n == 0x038C) 262 || (n >= 0x038E && n <= 0x03A1) 263 || (n >= 0x03A3 && n <= 0x03CE) 264 || (n >= 0x03D0 && n <= 0x03D6) 265 || (n == 0x03DA) 266 || (n == 0x03DC) 267 || (n == 0x03DE) 268 || (n == 0x03E0) 269 || (n >= 0x03E2 && n <= 0x03F3) 270 || (n >= 0x0401 && n <= 0x040C) 271 || (n >= 0x040E && n <= 0x044F) 272 || (n >= 0x0451 && n <= 0x045C) 273 || (n >= 0x045E && n <= 0x0481) 274 || (n >= 0x0490 && n <= 0x04C4) 275 || (n >= 0x04C7 && n <= 0x04C8) 276 || (n >= 0x04CB && n <= 0x04CC) 277 || (n >= 0x04D0 && n <= 0x04EB) 278 || (n >= 0x04EE && n <= 0x04F5) 279 || (n >= 0x04F8 && n <= 0x04F9) 280 || (n >= 0x0531 && n <= 0x0556) 281 || (n == 0x0559) 282 || (n >= 0x0561 && n <= 0x0586) 283 || (n >= 0x05D0 && n <= 0x05EA) 284 || (n >= 0x05F0 && n <= 0x05F2) 285 || (n >= 0x0621 && n <= 0x063A) 286 || (n >= 0x0641 && n <= 0x064A) 287 || (n >= 0x0671 && n <= 0x06B7) 288 || (n >= 0x06BA && n <= 0x06BE) 289 || (n >= 0x06C0 && n <= 0x06CE) 290 || (n >= 0x06D0 && n <= 0x06D3) 291 || (n == 0x06D5) 292 || (n >= 0x06E5 && n <= 0x06E6) 293 || (n >= 0x0905 && n <= 0x0939) 294 || (n == 0x093D) 295 || (n >= 0x0958 && n <= 0x0961) 296 || (n >= 0x0985 && n <= 0x098C) 297 || (n >= 0x098F && n <= 0x0990) 298 || (n >= 0x0993 && n <= 0x09A8) 299 || (n >= 0x09AA && n <= 0x09B0) 300 || (n == 0x09B2) 301 || (n >= 0x09B6 && n <= 0x09B9) 302 || (n >= 0x09DC && n <= 0x09DD) 303 || (n >= 0x09DF && n <= 0x09E1) 304 || (n >= 0x09F0 && n <= 0x09F1) 305 || (n >= 0x0A05 && n <= 0x0A0A) 306 || (n >= 0x0A0F && n <= 0x0A10) 307 || (n >= 0x0A13 && n <= 0x0A28) 308 || (n >= 0x0A2A && n <= 0x0A30) 309 || (n >= 0x0A32 && n <= 0x0A33) 310 || (n >= 0x0A35 && n <= 0x0A36) 311 || (n >= 0x0A38 && n <= 0x0A39) 312 || (n >= 0x0A59 && n <= 0x0A5C) 313 || (n == 0x0A5E) 314 || (n >= 0x0A72 && n <= 0x0A74) 315 || (n >= 0x0A85 && n <= 0x0A8B) 316 || (n == 0x0A8D) 317 || (n >= 0x0A8F && n <= 0x0A91) 318 || (n >= 0x0A93 && n <= 0x0AA8) 319 || (n >= 0x0AAA && n <= 0x0AB0) 320 || (n >= 0x0AB2 && n <= 0x0AB3) 321 || (n >= 0x0AB5 && n <= 0x0AB9) 322 || (n == 0x0ABD) 323 || (n == 0x0AE0) 324 || (n >= 0x0B05 && n <= 0x0B0C) 325 || (n >= 0x0B0F && n <= 0x0B10) 326 || (n >= 0x0B13 && n <= 0x0B28) 327 || (n >= 0x0B2A && n <= 0x0B30) 328 || (n >= 0x0B32 && n <= 0x0B33) 329 || (n >= 0x0B36 && n <= 0x0B39) 330 || (n == 0x0B3D) 331 || (n >= 0x0B5C && n <= 0x0B5D) 332 || (n >= 0x0B5F && n <= 0x0B61) 333 || (n >= 0x0B85 && n <= 0x0B8A) 334 || (n >= 0x0B8E && n <= 0x0B90) 335 || (n >= 0x0B92 && n <= 0x0B95) 336 || (n >= 0x0B99 && n <= 0x0B9A) 337 || (n == 0x0B9C) 338 || (n >= 0x0B9E && n <= 0x0B9F) 339 || (n >= 0x0BA3 && n <= 0x0BA4) 340 || (n >= 0x0BA8 && n <= 0x0BAA) 341 || (n >= 0x0BAE && n <= 0x0BB5) 342 || (n >= 0x0BB7 && n <= 0x0BB9) 343 || (n >= 0x0C05 && n <= 0x0C0C) 344 || (n >= 0x0C0E && n <= 0x0C10) 345 || (n >= 0x0C12 && n <= 0x0C28) 346 || (n >= 0x0C2A && n <= 0x0C33) 347 || (n >= 0x0C35 && n <= 0x0C39) 348 || (n >= 0x0C60 && n <= 0x0C61) 349 || (n >= 0x0C85 && n <= 0x0C8C) 350 || (n >= 0x0C8E && n <= 0x0C90) 351 || (n >= 0x0C92 && n <= 0x0CA8) 352 || (n >= 0x0CAA && n <= 0x0CB3) 353 || (n >= 0x0CB5 && n <= 0x0CB9) 354 || (n == 0x0CDE) 355 || (n >= 0x0CE0 && n <= 0x0CE1) 356 || (n >= 0x0D05 && n <= 0x0D0C) 357 || (n >= 0x0D0E && n <= 0x0D10) 358 || (n >= 0x0D12 && n <= 0x0D28) 359 || (n >= 0x0D2A && n <= 0x0D39) 360 || (n >= 0x0D60 && n <= 0x0D61) 361 || (n >= 0x0E01 && n <= 0x0E2E) 362 || (n == 0x0E30) 363 || (n >= 0x0E32 && n <= 0x0E33) 364 || (n >= 0x0E40 && n <= 0x0E45) 365 || (n >= 0x0E81 && n <= 0x0E82) 366 || (n == 0x0E84) 367 || (n >= 0x0E87 && n <= 0x0E88) 368 || (n == 0x0E8A) 369 || (n == 0x0E8D) 370 || (n >= 0x0E94 && n <= 0x0E97) 371 || (n >= 0x0E99 && n <= 0x0E9F) 372 || (n >= 0x0EA1 && n <= 0x0EA3) 373 || (n == 0x0EA5) 374 || (n == 0x0EA7) 375 || (n >= 0x0EAA && n <= 0x0EAB) 376 || (n >= 0x0EAD && n <= 0x0EAE) 377 || (n == 0x0EB0) 378 || (n >= 0x0EB2 && n <= 0x0EB3) 379 || (n == 0x0EBD) 380 || (n >= 0x0EC0 && n <= 0x0EC4) 381 || (n >= 0x0F40 && n <= 0x0F47) 382 || (n >= 0x0F49 && n <= 0x0F69) 383 || (n >= 0x10A0 && n <= 0x10C5) 384 || (n >= 0x10D0 && n <= 0x10F6) 385 || (n == 0x1100) 386 || (n >= 0x1102 && n <= 0x1103) 387 || (n >= 0x1105 && n <= 0x1107) 388 || (n == 0x1109) 389 || (n >= 0x110B && n <= 0x110C) 390 || (n >= 0x110E && n <= 0x1112) 391 || (n == 0x113C) 392 || (n == 0x113E) 393 || (n == 0x1140) 394 || (n == 0x114C) 395 || (n == 0x114E) 396 || (n == 0x1150) 397 || (n >= 0x1154 && n <= 0x1155) 398 || (n == 0x1159) 399 || (n >= 0x115F && n <= 0x1161) 400 || (n == 0x1163) 401 || (n == 0x1165) 402 || (n == 0x1167) 403 || (n == 0x1169) 404 || (n >= 0x116D && n <= 0x116E) 405 || (n >= 0x1172 && n <= 0x1173) 406 || (n == 0x1175) 407 || (n == 0x119E) 408 || (n == 0x11A8) 409 || (n == 0x11AB) 410 || (n >= 0x11AE && n <= 0x11AF) 411 || (n >= 0x11B7 && n <= 0x11B8) 412 || (n == 0x11BA) 413 || (n >= 0x11BC && n <= 0x11C2) 414 || (n == 0x11EB) 415 || (n == 0x11F0) 416 || (n == 0x11F9) 417 || (n >= 0x1E00 && n <= 0x1E9B) 418 || (n >= 0x1EA0 && n <= 0x1EF9) 419 || (n >= 0x1F00 && n <= 0x1F15) 420 || (n >= 0x1F18 && n <= 0x1F1D) 421 || (n >= 0x1F20 && n <= 0x1F45) 422 || (n >= 0x1F48 && n <= 0x1F4D) 423 || (n >= 0x1F50 && n <= 0x1F57) 424 || (n == 0x1F59) 425 || (n == 0x1F5B) 426 || (n == 0x1F5D) 427 || (n >= 0x1F5F && n <= 0x1F7D) 428 || (n >= 0x1F80 && n <= 0x1FB4) 429 || (n >= 0x1FB6 && n <= 0x1FBC) 430 || (n == 0x1FBE) 431 || (n >= 0x1FC2 && n <= 0x1FC4) 432 || (n >= 0x1FC6 && n <= 0x1FCC) 433 || (n >= 0x1FD0 && n <= 0x1FD3) 434 || (n >= 0x1FD6 && n <= 0x1FDB) 435 || (n >= 0x1FE0 && n <= 0x1FEC) 436 || (n >= 0x1FF2 && n <= 0x1FF4) 437 || (n >= 0x1FF6 && n <= 0x1FFC) 438 || (n == 0x2126) 439 || (n >= 0x212A && n <= 0x212B) 440 || (n == 0x212E) 441 || (n >= 0x2180 && n <= 0x2182) 442 || (n >= 0x3041 && n <= 0x3094) 443 || (n >= 0x30A1 && n <= 0x30FA) 444 || (n >= 0x3105 && n <= 0x312C) 445 || (n >= 0xAC00 && n <= 0xD7A3); 446 } 447 448 462 private static final boolean isIdeographic(char c) { 463 int n = (int) c; 464 return (n >= 0x4E00 && n <= 0x9FA5) 465 || (n == 0x3007) 466 || (n >= 0x3021 && n <= 0x3029); 467 } 468 469 483 private static final boolean isCombiningChar(char c) { 484 int n = (int) c; 485 return (n >= 0x0300 && n <= 0x0345) 486 || (n >= 0x0360 && n <= 0x0361) 487 || (n >= 0x0483 && n <= 0x0486) 488 || (n >= 0x0591 && n <= 0x05A1) 489 || (n >= 0x05A3 && n <= 0x05B9) 490 || (n >= 0x05BB && n <= 0x05BD) 491 || (n == 0x05BF) 492 || (n >= 0x05C1 && n <= 0x05C2) 493 || (n == 0x05C4) 494 || (n >= 0x064B && n <= 0x0652) 495 || (n == 0x0670) 496 || (n >= 0x06D6 && n <= 0x06DC) 497 || (n >= 0x06DD && n <= 0x06DF) 498 || (n >= 0x06E0 && n <= 0x06E4) 499 || (n >= 0x06E7 && n <= 0x06E8) 500 || (n >= 0x06EA && n <= 0x06ED) 501 || (n >= 0x0901 && n <= 0x0903) 502 || (n == 0x093C) 503 || (n >= 0x093E && n <= 0x094C) 504 || (n == 0x094D) 505 || (n >= 0x0951 && n <= 0x0954) 506 || (n >= 0x0962 && n <= 0x0963) 507 || (n >= 0x0981 && n <= 0x0983) 508 || (n == 0x09BC) 509 || (n == 0x09BE) 510 || (n == 0x09BF) 511 || (n >= 0x09C0 && n <= 0x09C4) 512 || (n >= 0x09C7 && n <= 0x09C8) 513 || (n >= 0x09CB && n <= 0x09CD) 514 || (n == 0x09D7) 515 || (n >= 0x09E2 && n <= 0x09E3) 516 || (n == 0x0A02) 517 || (n == 0x0A3C) 518 || (n == 0x0A3E) 519 || (n == 0x0A3F) 520 || (n >= 0x0A40 && n <= 0x0A42) 521 || (n >= 0x0A47 && n <= 0x0A48) 522 || (n >= 0x0A4B && n <= 0x0A4D) 523 || (n >= 0x0A70 && n <= 0x0A71) 524 || (n >= 0x0A81 && n <= 0x0A83) 525 || (n == 0x0ABC) 526 || (n >= 0x0ABE && n <= 0x0AC5) 527 || (n >= 0x0AC7 && n <= 0x0AC9) 528 || (n >= 0x0ACB && n <= 0x0ACD) 529 || (n >= 0x0B01 && n <= 0x0B03) 530 || (n == 0x0B3C) 531 || (n >= 0x0B3E && n <= 0x0B43) 532 || (n >= 0x0B47 && n <= 0x0B48) 533 || (n >= 0x0B4B && n <= 0x0B4D) 534 || (n >= 0x0B56 && n <= 0x0B57) 535 || (n >= 0x0B82 && n <= 0x0B83) 536 || (n >= 0x0BBE && n <= 0x0BC2) 537 || (n >= 0x0BC6 && n <= 0x0BC8) 538 || (n >= 0x0BCA && n <= 0x0BCD) 539 || (n == 0x0BD7) 540 || (n >= 0x0C01 && n <= 0x0C03) 541 || (n >= 0x0C3E && n <= 0x0C44) 542 || (n >= 0x0C46 && n <= 0x0C48) 543 || (n >= 0x0C4A && n <= 0x0C4D) 544 || (n >= 0x0C55 && n <= 0x0C56) 545 || (n >= 0x0C82 && n <= 0x0C83) 546 || (n >= 0x0CBE && n <= 0x0CC4) 547 || (n >= 0x0CC6 && n <= 0x0CC8) 548 || (n >= 0x0CCA && n <= 0x0CCD) 549 || (n >= 0x0CD5 && n <= 0x0CD6) 550 || (n >= 0x0D02 && n <= 0x0D03) 551 || (n >= 0x0D3E && n <= 0x0D43) 552 || (n >= 0x0D46 && n <= 0x0D48) 553 || (n >= 0x0D4A && n <= 0x0D4D) 554 || (n == 0x0D57) 555 || (n == 0x0E31) 556 || (n >= 0x0E34 && n <= 0x0E3A) 557 || (n >= 0x0E47 && n <= 0x0E4E) 558 || (n == 0x0EB1) 559 || (n >= 0x0EB4 && n <= 0x0EB9) 560 || (n >= 0x0EBB && n <= 0x0EBC) 561 || (n >= 0x0EC8 && n <= 0x0ECD) 562 || (n >= 0x0F18 && n <= 0x0F19) 563 || (n == 0x0F35) 564 || (n == 0x0F37) 565 || (n == 0x0F39) 566 || (n == 0x0F3E) 567 || (n == 0x0F3F) 568 || (n >= 0x0F71 && n <= 0x0F84) 569 || (n >= 0x0F86 && n <= 0x0F8B) 570 || (n >= 0x0F90 && n <= 0x0F95) 571 || (n == 0x0F97) 572 || (n >= 0x0F99 && n <= 0x0FAD) 573 || (n >= 0x0FB1 && n <= 0x0FB7) 574 || (n == 0x0FB9) 575 || (n >= 0x20D0 && n <= 0x20DC) 576 || (n == 0x20E1) 577 || (n >= 0x302A && n <= 0x302F) 578 || (n == 0x3099) 579 || (n == 0x309A); 580 } 581 582 596 private static final boolean isDigit(char c) { 597 int n = (int) c; 598 return (n >= 0x0030 && n <= 0x0039) 599 || (n >= 0x0660 && n <= 0x0669) 600 || (n >= 0x06F0 && n <= 0x06F9) 601 || (n >= 0x0966 && n <= 0x096F) 602 || (n >= 0x09E6 && n <= 0x09EF) 603 || (n >= 0x0A66 && n <= 0x0A6F) 604 || (n >= 0x0AE6 && n <= 0x0AEF) 605 || (n >= 0x0B66 && n <= 0x0B6F) 606 || (n >= 0x0BE7 && n <= 0x0BEF) 607 || (n >= 0x0C66 && n <= 0x0C6F) 608 || (n >= 0x0CE6 && n <= 0x0CEF) 609 || (n >= 0x0D66 && n <= 0x0D6F) 610 || (n >= 0x0E50 && n <= 0x0E59) 611 || (n >= 0x0ED0 && n <= 0x0ED9) 612 || (n >= 0x0F20 && n <= 0x0F29); 613 } 614 615 629 private static final boolean isExtender(char c) { 630 int n = (int) c; 631 return (n == 0x00B7) 632 || (n == 0x02D0) 633 || (n == 0x02D1) 634 || (n == 0x0387) 635 || (n == 0x0640) 636 || (n == 0x0E46) 637 || (n == 0x0EC6) 638 || (n == 0x3005) 639 || (n >= 0x3031 && n <= 0x3035) 640 || (n >= 0x309D && n <= 0x309E) 641 || (n >= 0x30FC && n <= 0x30FE); 642 } 643 644 645 649 653 657 private XMLChecker() { 658 } 660 661 662 666 } 670 | Popular Tags |