1 54 55 package org.w3c.tidy; 56 57 62 public final class TidyUtils 63 { 64 65 68 private static final short DIGIT = 1; 69 70 73 private static final short LETTER = 2; 74 75 78 private static final short NAMECHAR = 4; 79 80 83 private static final short WHITE = 8; 84 85 88 private static final short NEWLINE = 16; 89 90 93 private static final short LOWERCASE = 32; 94 95 98 private static final short UPPERCASE = 64; 99 100 103 private static short[] lexmap = new short[128]; 104 105 static 106 { 107 mapStr("\r\n\f", (short) (NEWLINE | WHITE)); 108 mapStr(" \t", WHITE); 109 mapStr("-.:_", NAMECHAR); 110 mapStr("0123456789", (short) (DIGIT | NAMECHAR)); 111 mapStr("abcdefghijklmnopqrstuvwxyz", (short) (LOWERCASE | LETTER | NAMECHAR)); 112 mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short) (UPPERCASE | LETTER | NAMECHAR)); 113 } 114 115 118 private TidyUtils() 119 { 120 } 122 123 128 static boolean toBoolean(int value) 129 { 130 return value != 0; 131 } 132 133 138 static int toUnsigned(int c) 139 { 140 return c & 0xFF; 141 } 142 143 150 static boolean wsubstrn(String s1, int len1, String s2) 151 { 152 int searchIndex = s1.indexOf(s2); 153 return searchIndex > -1 && searchIndex <= len1; 154 } 155 156 163 static boolean wsubstrncase(String s1, int len1, String s2) 164 { 165 return wsubstrn(s1.toLowerCase(), len1, s2.toLowerCase()); 166 } 167 168 175 static int wstrnchr(String s1, int len1, char cc) 176 { 177 int indexOf = s1.indexOf(cc); 178 if (indexOf < len1) 179 { 180 return indexOf; 181 } 182 183 return -1; 184 } 185 186 192 static boolean wsubstr(String s1, String s2) 193 { 194 int i; 195 int len1 = s1.length(); 196 int len2 = s2.length(); 197 198 for (i = 0; i <= len1 - len2; ++i) 199 { 200 if (s2.equalsIgnoreCase(s1.substring(i))) 201 { 202 return true; 203 } 204 } 205 206 return false; 207 } 208 209 214 static boolean isxdigit(char c) 215 { 216 return Character.isDigit(c) || (Character.toLowerCase(c) >= 'a' && Character.toLowerCase(c) <= 'f'); 217 } 218 219 225 static boolean isInValuesIgnoreCase(String [] validValues, String valueToCheck) 226 { 227 int len = validValues.length; 228 for (int j = 0; j < len; j++) 229 { 230 if (validValues[j].equalsIgnoreCase(valueToCheck)) 231 { 232 return true; 233 } 234 } 235 return false; 236 } 237 238 246 public static boolean findBadSubString(String s, String p, int len) 247 { 248 int n = s.length(); 249 int i = 0; 250 String ps; 251 252 while (n < len) 253 { 254 ps = p.substring(i, i + n); 255 if (s.equalsIgnoreCase(ps)) 256 { 257 return (!ps.equals(s.substring(0, n))); 258 } 259 260 ++i; 261 --len; 262 } 263 264 return false; 265 } 266 267 272 static boolean isXMLLetter(char c) 273 { 274 return ((c >= 0x41 && c <= 0x5a) 275 || (c >= 0x61 && c <= 0x7a) 276 || (c >= 0xc0 && c <= 0xd6) 277 || (c >= 0xd8 && c <= 0xf6) 278 || (c >= 0xf8 && c <= 0xff) 279 || (c >= 0x100 && c <= 0x131) 280 || (c >= 0x134 && c <= 0x13e) 281 || (c >= 0x141 && c <= 0x148) 282 || (c >= 0x14a && c <= 0x17e) 283 || (c >= 0x180 && c <= 0x1c3) 284 || (c >= 0x1cd && c <= 0x1f0) 285 || (c >= 0x1f4 && c <= 0x1f5) 286 || (c >= 0x1fa && c <= 0x217) 287 || (c >= 0x250 && c <= 0x2a8) 288 || (c >= 0x2bb && c <= 0x2c1) 289 || c == 0x386 290 || (c >= 0x388 && c <= 0x38a) 291 || c == 0x38c 292 || (c >= 0x38e && c <= 0x3a1) 293 || (c >= 0x3a3 && c <= 0x3ce) 294 || (c >= 0x3d0 && c <= 0x3d6) 295 || c == 0x3da 296 || c == 0x3dc 297 || c == 0x3de 298 || c == 0x3e0 299 || (c >= 0x3e2 && c <= 0x3f3) 300 || (c >= 0x401 && c <= 0x40c) 301 || (c >= 0x40e && c <= 0x44f) 302 || (c >= 0x451 && c <= 0x45c) 303 || (c >= 0x45e && c <= 0x481) 304 || (c >= 0x490 && c <= 0x4c4) 305 || (c >= 0x4c7 && c <= 0x4c8) 306 || (c >= 0x4cb && c <= 0x4cc) 307 || (c >= 0x4d0 && c <= 0x4eb) 308 || (c >= 0x4ee && c <= 0x4f5) 309 || (c >= 0x4f8 && c <= 0x4f9) 310 || (c >= 0x531 && c <= 0x556) 311 || c == 0x559 312 || (c >= 0x561 && c <= 0x586) 313 || (c >= 0x5d0 && c <= 0x5ea) 314 || (c >= 0x5f0 && c <= 0x5f2) 315 || (c >= 0x621 && c <= 0x63a) 316 || (c >= 0x641 && c <= 0x64a) 317 || (c >= 0x671 && c <= 0x6b7) 318 || (c >= 0x6ba && c <= 0x6be) 319 || (c >= 0x6c0 && c <= 0x6ce) 320 || (c >= 0x6d0 && c <= 0x6d3) 321 || c == 0x6d5 322 || (c >= 0x6e5 && c <= 0x6e6) 323 || (c >= 0x905 && c <= 0x939) 324 || c == 0x93d 325 || (c >= 0x958 && c <= 0x961) 326 || (c >= 0x985 && c <= 0x98c) 327 || (c >= 0x98f && c <= 0x990) 328 || (c >= 0x993 && c <= 0x9a8) 329 || (c >= 0x9aa && c <= 0x9b0) 330 || c == 0x9b2 331 || (c >= 0x9b6 && c <= 0x9b9) 332 || (c >= 0x9dc && c <= 0x9dd) 333 || (c >= 0x9df && c <= 0x9e1) 334 || (c >= 0x9f0 && c <= 0x9f1) 335 || (c >= 0xa05 && c <= 0xa0a) 336 || (c >= 0xa0f && c <= 0xa10) 337 || (c >= 0xa13 && c <= 0xa28) 338 || (c >= 0xa2a && c <= 0xa30) 339 || (c >= 0xa32 && c <= 0xa33) 340 || (c >= 0xa35 && c <= 0xa36) 341 || (c >= 0xa38 && c <= 0xa39) 342 || (c >= 0xa59 && c <= 0xa5c) 343 || c == 0xa5e 344 || (c >= 0xa72 && c <= 0xa74) 345 || (c >= 0xa85 && c <= 0xa8b) 346 || c == 0xa8d 347 || (c >= 0xa8f && c <= 0xa91) 348 || (c >= 0xa93 && c <= 0xaa8) 349 || (c >= 0xaaa && c <= 0xab0) 350 || (c >= 0xab2 && c <= 0xab3) 351 || (c >= 0xab5 && c <= 0xab9) 352 || c == 0xabd 353 || c == 0xae0 354 || (c >= 0xb05 && c <= 0xb0c) 355 || (c >= 0xb0f && c <= 0xb10) 356 || (c >= 0xb13 && c <= 0xb28) 357 || (c >= 0xb2a && c <= 0xb30) 358 || (c >= 0xb32 && c <= 0xb33) 359 || (c >= 0xb36 && c <= 0xb39) 360 || c == 0xb3d 361 || (c >= 0xb5c && c <= 0xb5d) 362 || (c >= 0xb5f && c <= 0xb61) 363 || (c >= 0xb85 && c <= 0xb8a) 364 || (c >= 0xb8e && c <= 0xb90) 365 || (c >= 0xb92 && c <= 0xb95) 366 || (c >= 0xb99 && c <= 0xb9a) 367 || c == 0xb9c 368 || (c >= 0xb9e && c <= 0xb9f) 369 || (c >= 0xba3 && c <= 0xba4) 370 || (c >= 0xba8 && c <= 0xbaa) 371 || (c >= 0xbae && c <= 0xbb5) 372 || (c >= 0xbb7 && c <= 0xbb9) 373 || (c >= 0xc05 && c <= 0xc0c) 374 || (c >= 0xc0e && c <= 0xc10) 375 || (c >= 0xc12 && c <= 0xc28) 376 || (c >= 0xc2a && c <= 0xc33) 377 || (c >= 0xc35 && c <= 0xc39) 378 || (c >= 0xc60 && c <= 0xc61) 379 || (c >= 0xc85 && c <= 0xc8c) 380 || (c >= 0xc8e && c <= 0xc90) 381 || (c >= 0xc92 && c <= 0xca8) 382 || (c >= 0xcaa && c <= 0xcb3) 383 || (c >= 0xcb5 && c <= 0xcb9) 384 || c == 0xcde 385 || (c >= 0xce0 && c <= 0xce1) 386 || (c >= 0xd05 && c <= 0xd0c) 387 || (c >= 0xd0e && c <= 0xd10) 388 || (c >= 0xd12 && c <= 0xd28) 389 || (c >= 0xd2a && c <= 0xd39) 390 || (c >= 0xd60 && c <= 0xd61) 391 || (c >= 0xe01 && c <= 0xe2e) 392 || c == 0xe30 393 || (c >= 0xe32 && c <= 0xe33) 394 || (c >= 0xe40 && c <= 0xe45) 395 || (c >= 0xe81 && c <= 0xe82) 396 || c == 0xe84 397 || (c >= 0xe87 && c <= 0xe88) 398 || c == 0xe8a 399 || c == 0xe8d 400 || (c >= 0xe94 && c <= 0xe97) 401 || (c >= 0xe99 && c <= 0xe9f) 402 || (c >= 0xea1 && c <= 0xea3) 403 || c == 0xea5 404 || c == 0xea7 405 || (c >= 0xeaa && c <= 0xeab) 406 || (c >= 0xead && c <= 0xeae) 407 || c == 0xeb0 408 || (c >= 0xeb2 && c <= 0xeb3) 409 || c == 0xebd 410 || (c >= 0xec0 && c <= 0xec4) 411 || (c >= 0xf40 && c <= 0xf47) 412 || (c >= 0xf49 && c <= 0xf69) 413 || (c >= 0x10a0 && c <= 0x10c5) 414 || (c >= 0x10d0 && c <= 0x10f6) 415 || c == 0x1100 416 || (c >= 0x1102 && c <= 0x1103) 417 || (c >= 0x1105 && c <= 0x1107) 418 || c == 0x1109 419 || (c >= 0x110b && c <= 0x110c) 420 || (c >= 0x110e && c <= 0x1112) 421 || c == 0x113c 422 || c == 0x113e 423 || c == 0x1140 424 || c == 0x114c 425 || c == 0x114e 426 || c == 0x1150 427 || (c >= 0x1154 && c <= 0x1155) 428 || c == 0x1159 429 || (c >= 0x115f && c <= 0x1161) 430 || c == 0x1163 431 || c == 0x1165 432 || c == 0x1167 433 || c == 0x1169 434 || (c >= 0x116d && c <= 0x116e) 435 || (c >= 0x1172 && c <= 0x1173) 436 || c == 0x1175 437 || c == 0x119e 438 || c == 0x11a8 439 || c == 0x11ab 440 || (c >= 0x11ae && c <= 0x11af) 441 || (c >= 0x11b7 && c <= 0x11b8) 442 || c == 0x11ba 443 || (c >= 0x11bc && c <= 0x11c2) 444 || c == 0x11eb 445 || c == 0x11f0 446 || c == 0x11f9 447 || (c >= 0x1e00 && c <= 0x1e9b) 448 || (c >= 0x1ea0 && c <= 0x1ef9) 449 || (c >= 0x1f00 && c <= 0x1f15) 450 || (c >= 0x1f18 && c <= 0x1f1d) 451 || (c >= 0x1f20 && c <= 0x1f45) 452 || (c >= 0x1f48 && c <= 0x1f4d) 453 || (c >= 0x1f50 && c <= 0x1f57) 454 || c == 0x1f59 455 || c == 0x1f5b 456 || c == 0x1f5d 457 || (c >= 0x1f5f && c <= 0x1f7d) 458 || (c >= 0x1f80 && c <= 0x1fb4) 459 || (c >= 0x1fb6 && c <= 0x1fbc) 460 || c == 0x1fbe 461 || (c >= 0x1fc2 && c <= 0x1fc4) 462 || (c >= 0x1fc6 && c <= 0x1fcc) 463 || (c >= 0x1fd0 && c <= 0x1fd3) 464 || (c >= 0x1fd6 && c <= 0x1fdb) 465 || (c >= 0x1fe0 && c <= 0x1fec) 466 || (c >= 0x1ff2 && c <= 0x1ff4) 467 || (c >= 0x1ff6 && c <= 0x1ffc) 468 || c == 0x2126 469 || (c >= 0x212a && c <= 0x212b) 470 || c == 0x212e 471 || (c >= 0x2180 && c <= 0x2182) 472 || (c >= 0x3041 && c <= 0x3094) 473 || (c >= 0x30a1 && c <= 0x30fa) 474 || (c >= 0x3105 && c <= 0x312c) 475 || (c >= 0xac00 && c <= 0xd7a3) 476 || (c >= 0x4e00 && c <= 0x9fa5) 477 || c == 0x3007 478 || (c >= 0x3021 && c <= 0x3029) 479 || (c >= 0x4e00 && c <= 0x9fa5) 480 || c == 0x3007 || (c >= 0x3021 && c <= 0x3029)); 481 } 482 483 488 static boolean isXMLNamechar(char c) 489 { 490 return (isXMLLetter(c) 491 || c == '.' 492 || c == '_' 493 || c == ':' 494 || c == '-' 495 || (c >= 0x300 && c <= 0x345) 496 || (c >= 0x360 && c <= 0x361) 497 || (c >= 0x483 && c <= 0x486) 498 || (c >= 0x591 && c <= 0x5a1) 499 || (c >= 0x5a3 && c <= 0x5b9) 500 || (c >= 0x5bb && c <= 0x5bd) 501 || c == 0x5bf 502 || (c >= 0x5c1 && c <= 0x5c2) 503 || c == 0x5c4 504 || (c >= 0x64b && c <= 0x652) 505 || c == 0x670 506 || (c >= 0x6d6 && c <= 0x6dc) 507 || (c >= 0x6dd && c <= 0x6df) 508 || (c >= 0x6e0 && c <= 0x6e4) 509 || (c >= 0x6e7 && c <= 0x6e8) 510 || (c >= 0x6ea && c <= 0x6ed) 511 || (c >= 0x901 && c <= 0x903) 512 || c == 0x93c 513 || (c >= 0x93e && c <= 0x94c) 514 || c == 0x94d 515 || (c >= 0x951 && c <= 0x954) 516 || (c >= 0x962 && c <= 0x963) 517 || (c >= 0x981 && c <= 0x983) 518 || c == 0x9bc 519 || c == 0x9be 520 || c == 0x9bf 521 || (c >= 0x9c0 && c <= 0x9c4) 522 || (c >= 0x9c7 && c <= 0x9c8) 523 || (c >= 0x9cb && c <= 0x9cd) 524 || c == 0x9d7 525 || (c >= 0x9e2 && c <= 0x9e3) 526 || c == 0xa02 527 || c == 0xa3c 528 || c == 0xa3e 529 || c == 0xa3f 530 || (c >= 0xa40 && c <= 0xa42) 531 || (c >= 0xa47 && c <= 0xa48) 532 || (c >= 0xa4b && c <= 0xa4d) 533 || (c >= 0xa70 && c <= 0xa71) 534 || (c >= 0xa81 && c <= 0xa83) 535 || c == 0xabc 536 || (c >= 0xabe && c <= 0xac5) 537 || (c >= 0xac7 && c <= 0xac9) 538 || (c >= 0xacb && c <= 0xacd) 539 || (c >= 0xb01 && c <= 0xb03) 540 || c == 0xb3c 541 || (c >= 0xb3e && c <= 0xb43) 542 || (c >= 0xb47 && c <= 0xb48) 543 || (c >= 0xb4b && c <= 0xb4d) 544 || (c >= 0xb56 && c <= 0xb57) 545 || (c >= 0xb82 && c <= 0xb83) 546 || (c >= 0xbbe && c <= 0xbc2) 547 || (c >= 0xbc6 && c <= 0xbc8) 548 || (c >= 0xbca && c <= 0xbcd) 549 || c == 0xbd7 550 || (c >= 0xc01 && c <= 0xc03) 551 || (c >= 0xc3e && c <= 0xc44) 552 || (c >= 0xc46 && c <= 0xc48) 553 || (c >= 0xc4a && c <= 0xc4d) 554 || (c >= 0xc55 && c <= 0xc56) 555 || (c >= 0xc82 && c <= 0xc83) 556 || (c >= 0xcbe && c <= 0xcc4) 557 || (c >= 0xcc6 && c <= 0xcc8) 558 || (c >= 0xcca && c <= 0xccd) 559 || (c >= 0xcd5 && c <= 0xcd6) 560 || (c >= 0xd02 && c <= 0xd03) 561 || (c >= 0xd3e && c <= 0xd43) 562 || (c >= 0xd46 && c <= 0xd48) 563 || (c >= 0xd4a && c <= 0xd4d) 564 || c == 0xd57 565 || c == 0xe31 566 || (c >= 0xe34 && c <= 0xe3a) 567 || (c >= 0xe47 && c <= 0xe4e) 568 || c == 0xeb1 569 || (c >= 0xeb4 && c <= 0xeb9) 570 || (c >= 0xebb && c <= 0xebc) 571 || (c >= 0xec8 && c <= 0xecd) 572 || (c >= 0xf18 && c <= 0xf19) 573 || c == 0xf35 574 || c == 0xf37 575 || c == 0xf39 576 || c == 0xf3e 577 || c == 0xf3f 578 || (c >= 0xf71 && c <= 0xf84) 579 || (c >= 0xf86 && c <= 0xf8b) 580 || (c >= 0xf90 && c <= 0xf95) 581 || c == 0xf97 582 || (c >= 0xf99 && c <= 0xfad) 583 || (c >= 0xfb1 && c <= 0xfb7) 584 || c == 0xfb9 585 || (c >= 0x20d0 && c <= 0x20dc) 586 || c == 0x20e1 587 || (c >= 0x302a && c <= 0x302f) 588 || c == 0x3099 589 || c == 0x309a 590 || (c >= 0x30 && c <= 0x39) 591 || (c >= 0x660 && c <= 0x669) 592 || (c >= 0x6f0 && c <= 0x6f9) 593 || (c >= 0x966 && c <= 0x96f) 594 || (c >= 0x9e6 && c <= 0x9ef) 595 || (c >= 0xa66 && c <= 0xa6f) 596 || (c >= 0xae6 && c <= 0xaef) 597 || (c >= 0xb66 && c <= 0xb6f) 598 || (c >= 0xbe7 && c <= 0xbef) 599 || (c >= 0xc66 && c <= 0xc6f) 600 || (c >= 0xce6 && c <= 0xcef) 601 || (c >= 0xd66 && c <= 0xd6f) 602 || (c >= 0xe50 && c <= 0xe59) 603 || (c >= 0xed0 && c <= 0xed9) 604 || (c >= 0xf20 && c <= 0xf29) 605 || c == 0xb7 606 || c == 0x2d0 607 || c == 0x2d1 608 || c == 0x387 609 || c == 0x640 610 || c == 0xe46 611 || c == 0xec6 612 || c == 0x3005 613 || (c >= 0x3031 && c <= 0x3035) 614 || (c >= 0x309d && c <= 0x309e) || (c >= 0x30fc && c <= 0x30fe)); 615 } 616 617 622 static boolean isQuote(int c) 623 { 624 return (c == '\'' || c == '\"'); 625 } 626 627 634 public static byte[] getBytes(String str) 635 { 636 try 637 { 638 return str.getBytes("UTF8"); 639 } 640 catch (java.io.UnsupportedEncodingException e) 641 { 642 throw new Error ("String to UTF-8 conversion failed: " + e.getMessage()); 643 } 644 } 645 646 654 public static String getString(byte[] bytes, int offset, int length) 655 { 656 try 657 { 658 return new String (bytes, offset, length, "UTF8"); 659 } 660 catch (java.io.UnsupportedEncodingException e) 661 { 662 throw new Error ("UTF-8 to string conversion failed: " + e.getMessage()); 663 } 664 } 665 666 671 public static int lastChar(String str) 672 { 673 if (str != null && str.length() > 0) 674 { 675 return str.charAt(str.length() - 1); 676 } 677 678 return 0; 679 } 680 681 686 public static boolean isWhite(char c) 687 { 688 short m = map(c); 689 return TidyUtils.toBoolean(m & WHITE); 690 } 691 692 697 public static boolean isDigit(char c) 698 { 699 short m; 700 m = map(c); 701 return TidyUtils.toBoolean(m & DIGIT); 702 } 703 704 709 public static boolean isLetter(char c) 710 { 711 short m; 712 m = map(c); 713 return TidyUtils.toBoolean(m & LETTER); 714 } 715 716 721 public static boolean isNamechar(char c) 722 { 723 short map = map(c); 724 725 return TidyUtils.toBoolean(map & NAMECHAR); 726 } 727 728 733 public static boolean isLower(char c) 734 { 735 short map = map(c); 736 737 return TidyUtils.toBoolean(map & LOWERCASE); 738 } 739 740 745 public static boolean isUpper(char c) 746 { 747 short map = map(c); 748 749 return TidyUtils.toBoolean(map & UPPERCASE); 750 } 751 752 757 public static char toLower(char c) 758 { 759 short m = map(c); 760 761 if (TidyUtils.toBoolean(m & UPPERCASE)) 762 { 763 c = (char) (c + 'a' - 'A'); 764 } 765 766 return c; 767 } 768 769 774 public static char toUpper(char c) 775 { 776 short m = map(c); 777 778 if (TidyUtils.toBoolean(m & LOWERCASE)) 779 { 780 c = (char) (c + 'A' - 'a'); 781 } 782 783 return c; 784 } 785 786 794 public static char foldCase(char c, boolean tocaps, boolean xmlTags) 795 { 796 797 if (!xmlTags) 798 { 799 800 if (tocaps) 801 { 802 if (isLower(c)) 803 { 804 c = toUpper(c); 805 } 806 } 807 else 808 { 809 if (isUpper(c)) 811 { 812 c = toLower(c); 813 } 814 } 815 } 816 817 return c; 818 } 819 820 825 private static void mapStr(String str, short code) 826 { 827 int c; 828 for (int i = 0; i < str.length(); i++) 829 { 830 c = str.charAt(i); 831 lexmap[c] |= code; 832 } 833 } 834 835 840 private static short map(char c) 841 { 842 return (c < 128 ? lexmap[c] : 0); 843 } 844 845 850 public static boolean isCharEncodingSupported(String name) 851 { 852 name = EncodingNameMapper.toJava(name); 853 if (name == null) 854 { 855 return false; 856 } 857 858 try 859 { 860 "".getBytes(name); 861 } 862 catch (java.io.UnsupportedEncodingException e) 863 { 864 return false; 865 } 866 return true; 867 } 868 } | Popular Tags |