1 7 8 package com.ibm.icu.text; 9 10 import java.io.IOException ; 11 import java.util.MissingResourceException ; 12 13 import com.ibm.icu.impl.UBiDiProps; 14 15 import com.ibm.icu.lang.*; 16 17 59 public final class ArabicShaping { 60 private final int options; 61 private boolean isLogical; 63 88 public int shape(char[] source, int sourceStart, int sourceLength, 89 char[] dest, int destStart, int destSize) throws ArabicShapingException { 90 if (source == null) { 91 throw new IllegalArgumentException ("source can not be null"); 92 } 93 if (sourceStart < 0 || sourceLength < 0 || sourceStart + sourceLength > source.length) { 94 throw new IllegalArgumentException ("bad source start (" + sourceStart + 95 ") or length (" + sourceLength + 96 ") for buffer of length " + source.length); 97 } 98 if (dest == null && destSize != 0) { 99 throw new IllegalArgumentException ("null dest requires destSize == 0"); 100 } 101 if ((destSize != 0) && 102 (destStart < 0 || destSize < 0 || destStart + destSize > dest.length)) { 103 throw new IllegalArgumentException ("bad dest start (" + destStart + 104 ") or size (" + destSize + 105 ") for buffer of length " + dest.length); 106 } 107 108 return internalShape(source, sourceStart, sourceLength, dest, destStart, destSize); 109 } 110 111 121 public void shape(char[] source, int start, int length) throws ArabicShapingException { 122 if ((options & LENGTH_MASK) == LENGTH_GROW_SHRINK) { 123 throw new ArabicShapingException("Cannot shape in place with length option grow/shrink."); 124 } 125 shape(source, start, length, source, start, length); 126 } 127 128 136 public String shape(String text) throws ArabicShapingException { 137 char[] src = text.toCharArray(); 138 char[] dest = src; 139 if (((options & LENGTH_MASK) == LENGTH_GROW_SHRINK) && 140 ((options & LETTERS_MASK) == LETTERS_UNSHAPE)) { 141 142 dest = new char[src.length * 2]; } 144 int len = shape(src, 0, src.length, dest, 0, dest.length); 145 146 return new String (dest, 0, len); 147 } 148 149 165 public ArabicShaping(int options) { 166 this.options = options; 167 if ((options & DIGITS_MASK) > 0x80) { 168 throw new IllegalArgumentException ("bad DIGITS options"); 169 } 170 isLogical = (options & TEXT_DIRECTION_MASK) == TEXT_DIRECTION_LOGICAL; 171 } 172 173 177 public static final int LENGTH_GROW_SHRINK = 0; 178 179 184 public static final int LENGTH_FIXED_SPACES_NEAR = 1; 185 186 191 public static final int LENGTH_FIXED_SPACES_AT_END = 2; 192 193 198 public static final int LENGTH_FIXED_SPACES_AT_BEGINNING = 3; 199 200 204 public static final int LENGTH_MASK = 3; 205 206 207 211 public static final int TEXT_DIRECTION_LOGICAL = 0; 212 213 218 public static final int TEXT_DIRECTION_VISUAL_LTR = 4; 219 220 224 public static final int TEXT_DIRECTION_MASK = 4; 225 226 227 231 public static final int LETTERS_NOOP = 0; 232 233 239 public static final int LETTERS_SHAPE = 8; 240 241 247 public static final int LETTERS_UNSHAPE = 0x10; 248 249 256 public static final int LETTERS_SHAPE_TASHKEEL_ISOLATED = 0x18; 257 258 262 public static final int LETTERS_MASK = 0x18; 263 264 265 269 public static final int DIGITS_NOOP = 0; 270 271 275 public static final int DIGITS_EN2AN = 0x20; 276 277 281 public static final int DIGITS_AN2EN = 0x40; 282 283 293 public static final int DIGITS_EN2AN_INIT_LR = 0x60; 294 295 305 public static final int DIGITS_EN2AN_INIT_AL = 0x80; 306 307 308 private static final int DIGITS_RESERVED = 0xa0; 309 310 314 public static final int DIGITS_MASK = 0xe0; 315 316 320 public static final int DIGIT_TYPE_AN = 0; 321 322 326 public static final int DIGIT_TYPE_AN_EXTENDED = 0x100; 327 328 332 public static final int DIGIT_TYPE_MASK = 0x0100; 334 337 public boolean equals(Object rhs) { 338 return rhs != null && 339 rhs.getClass() == ArabicShaping.class && 340 options == ((ArabicShaping)rhs).options; 341 } 342 343 346 public int hashCode() { 348 return options; 349 } 350 351 354 public String toString() { 355 StringBuffer buf = new StringBuffer (super.toString()); 356 buf.append('['); 357 switch (options & LENGTH_MASK) { 358 case LENGTH_GROW_SHRINK: buf.append("grow/shrink"); break; 359 case LENGTH_FIXED_SPACES_NEAR: buf.append("spaces near"); break; 360 case LENGTH_FIXED_SPACES_AT_END: buf.append("spaces at end"); break; 361 case LENGTH_FIXED_SPACES_AT_BEGINNING: buf.append("spaces at beginning"); break; 362 } 363 switch (options & TEXT_DIRECTION_MASK) { 364 case TEXT_DIRECTION_LOGICAL: buf.append(", logical"); break; 365 case TEXT_DIRECTION_VISUAL_LTR: buf.append(", visual"); break; 366 } 367 switch (options & LETTERS_MASK) { 368 case LETTERS_NOOP: buf.append(", no letter shaping"); break; 369 case LETTERS_SHAPE: buf.append(", shape letters"); break; 370 case LETTERS_SHAPE_TASHKEEL_ISOLATED: buf.append(", shape letters tashkeel isolated"); break; 371 case LETTERS_UNSHAPE: buf.append(", unshape letters"); break; 372 } 373 switch (options & DIGITS_MASK) { 374 case DIGITS_NOOP: buf.append(", no digit shaping"); break; 375 case DIGITS_EN2AN: buf.append(", shape digits to AN"); break; 376 case DIGITS_AN2EN: buf.append(", shape digits to EN"); break; 377 case DIGITS_EN2AN_INIT_LR: buf.append(", shape digits to AN contextually: default EN"); break; 378 case DIGITS_EN2AN_INIT_AL: buf.append(", shape digits to AN contextually: default AL"); break; 379 } 380 switch (options & DIGIT_TYPE_MASK) { 381 case DIGIT_TYPE_AN: buf.append(", standard Arabic-Indic digits"); break; 382 case DIGIT_TYPE_AN_EXTENDED: buf.append(", extended Arabic-Indic digits"); break; 383 } 384 buf.append("]"); 385 386 return buf.toString(); 387 } 388 390 394 private static final int IRRELEVANT = 4; 395 private static final int LAMTYPE = 16; 396 private static final int ALEFTYPE = 32; 397 398 private static final int LINKR = 1; 399 private static final int LINKL = 2; 400 private static final int LINK_MASK = 3; 401 402 private static final int irrelevantPos[] = { 403 0x0, 0x2, 0x4, 0x6, 0x8, 0xA, 0xC, 0xE 404 }; 405 406 private static final char convertLamAlef[] = { 407 '\u0622', '\u0622', '\u0623', '\u0623', '\u0625', '\u0625', '\u0627', '\u0627' }; 416 417 private static final char convertNormalizedLamAlef[] = { 418 '\u0622', '\u0623', '\u0625', '\u0627', }; 423 424 private static final int[] araLink = { 425 1 + 32 + 256 * 0x11, 426 1 + 32 + 256 * 0x13, 427 1 + 256 * 0x15, 428 1 + 32 + 256 * 0x17, 429 1 + 2 + 256 * 0x19, 430 1 + 32 + 256 * 0x1D, 431 1 + 2 + 256 * 0x1F, 432 1 + 256 * 0x23, 433 1 + 2 + 256 * 0x25, 434 1 + 2 + 256 * 0x29, 435 1 + 2 + 256 * 0x2D, 436 1 + 2 + 256 * 0x31, 437 1 + 2 + 256 * 0x35, 438 1 + 256 * 0x39, 439 1 + 256 * 0x3B, 440 1 + 256 * 0x3D, 441 1 + 256 * 0x3F, 442 1 + 2 + 256 * 0x41, 443 1 + 2 + 256 * 0x45, 444 1 + 2 + 256 * 0x49, 445 1 + 2 + 256 * 0x4D, 446 1 + 2 + 256 * 0x51, 447 1 + 2 + 256 * 0x55, 448 1 + 2 + 256 * 0x59, 449 1 + 2 + 256 * 0x5D, 450 0, 0, 0, 0, 0, 451 1 + 2, 452 1 + 2 + 256 * 0x61, 453 1 + 2 + 256 * 0x65, 454 1 + 2 + 256 * 0x69, 455 1 + 2 + 16 + 256 * 0x6D, 456 1 + 2 + 256 * 0x71, 457 1 + 2 + 256 * 0x75, 458 1 + 2 + 256 * 0x79, 459 1 + 256 * 0x7D, 460 1 + 256 * 0x7F, 461 1 + 2 + 256 * 0x81, 462 4, 4, 4, 4, 463 4, 4, 4, 4, 464 4, 4, 4, 0, 0, 465 0, 0, 0, 0, 466 1 + 256 * 0x85, 467 1 + 256 * 0x87, 468 1 + 256 * 0x89, 469 1 + 256 * 0x8B, 470 0, 0, 0, 0, 0, 471 0, 0, 0, 0, 0, 472 0, 0, 0, 0, 0, 0, 473 4, 474 0, 475 1 + 32, 476 1 + 32, 477 0, 478 1 + 32, 479 1, 1, 480 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 481 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 482 1+2, 1+2, 1+2, 1+2, 483 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 484 1, 1, 1, 1, 1, 1, 1, 1, 485 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 486 1+2, 1+2, 1+2, 1+2, 487 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 488 1+2, 1+2, 1+2, 1+2, 489 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 490 1+2, 1+2, 1+2, 1+2, 491 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, 492 1+2, 1+2, 493 1, 494 1+2, 495 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 496 1+2, 497 1, 498 1+2, 1+2, 1+2, 1+2, 499 1, 1 500 }; 501 502 private static final int[] presLink = { 503 1 + 2, 504 1 + 2, 505 1 + 2, 0, 1+ 2, 0, 1+ 2, 506 1 + 2, 507 1+ 2, 1 + 2, 1+2, 1 + 2, 508 1+ 2, 1 + 2, 1+2, 1 + 2, 509 0, 0 + 32, 1 + 32, 0 + 32, 510 1 + 32, 0, 1, 0 + 32, 511 1 + 32, 0, 2, 1 + 2, 512 1, 0 + 32, 1 + 32, 0, 513 2, 1 + 2, 1, 0, 514 1, 0, 2, 1 + 2, 515 1, 0, 2, 1 + 2, 516 1, 0, 2, 1 + 2, 517 1, 0, 2, 1 + 2, 518 1, 0, 2, 1 + 2, 519 1, 0, 1, 0, 520 1, 0, 1, 0, 521 1, 0, 2, 1+2, 522 1, 0, 2, 1+2, 523 1, 0, 2, 1+2, 524 1, 0, 2, 1+2, 525 1, 0, 2, 1+2, 526 1, 0, 2, 1+2, 527 1, 0, 2, 1+2, 528 1, 0, 2, 1+2, 529 1, 0, 2, 1+2, 530 1, 0, 2, 1+2, 531 1, 0, 2, 1+2, 532 1, 0 + 16, 2 + 16, 1 + 2 +16, 533 1 + 16, 0, 2, 1+2, 534 1, 0, 2, 1+2, 535 1, 0, 2, 1+2, 536 1, 0, 1, 0, 537 1, 0, 2, 1+2, 538 1, 0, 1, 0, 539 1, 0, 1, 0, 540 1 541 }; 542 543 private static int[] convertFEto06 = { 544 545 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652, 546 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628, 547 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, 548 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, 549 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636, 550 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A, 551 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644, 552 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649, 553 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F 554 }; 555 556 private static final int shapeTable[][][] = { 557 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} }, 558 { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }, 559 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} }, 560 { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} } 561 }; 562 563 568 private void shapeToArabicDigitsWithContext(char[] dest, 569 int start, 570 int length, 571 char digitBase, 572 boolean lastStrongWasAL) { 573 UBiDiProps bdp; 574 try { 575 bdp=UBiDiProps.getSingleton(); 576 } catch (IOException e) { 577 throw new MissingResourceException (e.getMessage(), "(BidiProps)", ""); 578 } 579 digitBase -= '0'; 581 for(int i = start + length; --i >= start;) { 582 char ch = dest[i]; 583 switch (bdp.getClass(ch)) { 584 case UCharacterDirection.LEFT_TO_RIGHT: 585 case UCharacterDirection.RIGHT_TO_LEFT: 586 lastStrongWasAL = false; 587 break; 588 case UCharacterDirection.RIGHT_TO_LEFT_ARABIC: 589 lastStrongWasAL = true; 590 break; 591 case UCharacterDirection.EUROPEAN_NUMBER: 592 if (lastStrongWasAL && ch <= '\u0039') { 593 dest[i] = (char)(ch + digitBase); 594 } 595 break; 596 default: 597 break; 598 } 599 } 600 } 601 602 608 private static void invertBuffer(char[] buffer, 609 int start, 610 int length) { 611 612 for(int i = start, j = start + length - 1; i < j; i++, --j) { 613 char temp = buffer[i]; 614 buffer[i] = buffer[j]; 615 buffer[j] = temp; 616 } 617 } 618 619 627 private static char changeLamAlef(char ch) { 628 switch(ch) { 629 case '\u0622': return '\u065C'; 630 case '\u0623': return '\u065D'; 631 case '\u0625': return '\u065E'; 632 case '\u0627': return '\u065F'; 633 default: return '\u0000'; } 635 } 636 637 642 private static int specialChar(char ch) { 643 if ((ch > '\u0621' && ch < '\u0626') || 644 (ch == '\u0627') || 645 (ch > '\u062E' && ch < '\u0633') || 646 (ch > '\u0647' && ch < '\u064A') || 647 (ch == '\u0629')) { 648 return 1; 649 } else if (ch >= '\u064B' && ch<= '\u0652') { 650 return 2; 651 } else if (ch >= 0x0653 && ch <= 0x0655 || 652 ch == 0x0670 || 653 ch >= 0xFE70 && ch <= 0xFE7F) { 654 return 3; 655 } else { 656 return 0; 657 } 658 } 659 660 666 private static int getLink(char ch) { 667 if (ch >= '\u0622' && ch <= '\u06D3') { 668 return araLink[ch - '\u0622']; 669 } else if (ch == '\u200D') { 670 return 3; 671 } else if (ch >= '\u206D' && ch <= '\u206F') { 672 return 4; 673 } else if (ch >= '\uFE70' && ch <= '\uFEFC') { 674 return presLink[ch - '\uFE70']; 675 } else { 676 return 0; 677 } 678 } 679 680 685 private static int countSpacesLeft(char[] dest, 686 int start, 687 int count) { 688 for (int i = start, e = start + count; i < e; ++i) { 689 if (dest[i] != '\u0020') { 690 return i - start; 691 } 692 } 693 return count; 694 } 695 696 private static int countSpacesRight(char[] dest, 697 int start, 698 int count) { 699 700 for (int i = start + count; --i >= start;) { 701 if (dest[i] != '\u0020') { 702 return start + count - 1 - i; 703 } 704 } 705 return count; 706 } 707 708 712 private static boolean isTashkeelChar(char ch) { 713 return ch >='\u064B' && ch <= '\u0652'; 714 } 715 716 720 private static boolean isAlefChar(char ch) { 721 return ch == '\u0622' || ch == '\u0623' || ch == '\u0625' || ch == '\u0627'; 722 } 723 724 728 private static boolean isLamAlefChar(char ch) { 729 return ch >= '\uFEF5' && ch <= '\uFEFC'; 730 } 731 732 private static boolean isNormalizedLamAlefChar(char ch) { 733 return ch >= '\u065C' && ch <= '\u065F'; 734 } 735 736 741 private int calculateSize(char[] source, 742 int sourceStart, 743 int sourceLength) { 744 745 int destSize = sourceLength; 746 747 switch (options & LETTERS_MASK) { 748 case LETTERS_SHAPE: 749 case LETTERS_SHAPE_TASHKEEL_ISOLATED: 750 if (isLogical) { 751 for (int i = sourceStart, e = sourceStart + sourceLength - 1; i < e; ++i) { 752 if (source[i] == '\u0644' && isAlefChar(source[i+1])) { 753 --destSize; 754 } 755 } 756 } else { for(int i = sourceStart + 1, e = sourceStart + sourceLength; i < e; ++i) { 758 if (source[i] == '\u0644' && isAlefChar(source[i-1])) { 759 --destSize; 760 } 761 } 762 } 763 break; 764 765 case LETTERS_UNSHAPE: 766 for(int i = sourceStart, e = sourceStart + sourceLength; i < e; ++i) { 767 if (isLamAlefChar(source[i])) { 768 destSize++; 769 } 770 } 771 break; 772 773 default: 774 break; 775 } 776 777 return destSize; 778 } 779 780 788 private int removeLamAlefSpaces(char[] dest, 789 int start, 790 int length) { 791 792 int lenOptions = options & LENGTH_MASK; 793 if (!isLogical) { 794 switch (lenOptions) { 795 case LENGTH_FIXED_SPACES_AT_BEGINNING: lenOptions = LENGTH_FIXED_SPACES_AT_END; break; 796 case LENGTH_FIXED_SPACES_AT_END: lenOptions = LENGTH_FIXED_SPACES_AT_BEGINNING; break; 797 default: break; 798 } 799 } 800 801 if (lenOptions == LENGTH_FIXED_SPACES_NEAR) { 802 for (int i = start, e = i + length; i < e; ++i) { 803 if (dest[i] == '\uffff') { 804 dest[i] = '\u0020'; 805 } 806 } 807 } else { 808 final int e = start + length; 809 int w = e; 810 int r = e; 811 while (--r >= start) { 812 char ch = dest[r]; 813 if (ch != '\uffff') { 814 --w; 815 if (w != r) { 816 dest[w] = ch; 817 } 818 } 819 } 820 821 if (lenOptions == LENGTH_FIXED_SPACES_AT_END) { 822 while (w > start) { 823 dest[--w] = '\u0020'; 824 } 825 } else { 826 if (w > start) { 827 r = w; 829 w = start; 830 while (r < e) { 831 dest[w++] = dest[r++]; 832 } 833 } else { 834 w = e; 835 } 836 if (lenOptions == LENGTH_GROW_SHRINK) { 837 length = w - start; 838 } else { while (w < e) { 840 dest[w++] = '\u0020'; 841 } 842 } 843 } 844 } 845 return length; 846 } 847 848 858 private int expandLamAlef(char[] dest, 859 int start, 860 int length, 861 int lacount) throws ArabicShapingException { 862 863 int lenOptions = options & LENGTH_MASK; 864 if (!isLogical) { 865 switch (lenOptions) { 866 case LENGTH_FIXED_SPACES_AT_BEGINNING: lenOptions = LENGTH_FIXED_SPACES_AT_END; break; 867 case LENGTH_FIXED_SPACES_AT_END: lenOptions = LENGTH_FIXED_SPACES_AT_BEGINNING; break; 868 default: break; 869 } 870 } 871 872 switch (lenOptions) { 873 case LENGTH_GROW_SHRINK: 874 { 875 for (int r = start + length, w = r + lacount; --r >= start;) { 876 char ch = dest[r]; 877 if (isNormalizedLamAlefChar(ch)) { 878 dest[--w] = '\u0644'; 879 dest[--w] = convertNormalizedLamAlef[ch - '\u065C']; 880 } else { 881 dest[--w] = ch; 882 } 883 } 884 } 885 length += lacount; 886 break; 887 888 case LENGTH_FIXED_SPACES_NEAR: 889 { 890 if (isNormalizedLamAlefChar(dest[start])) { 891 throw new ArabicShapingException("no space for lamalef"); 892 } 893 for (int i = start + length; --i > start;) { char ch = dest[i]; 895 if (isNormalizedLamAlefChar(ch)) { 896 if (dest[i-1] == '\u0020') { 897 dest[i] = '\u0644'; 898 dest[--i] = convertNormalizedLamAlef[ch - '\u065C']; 899 } else { 900 throw new ArabicShapingException("no space for lamalef"); 901 } 902 } 903 } 904 } 905 break; 906 907 case LENGTH_FIXED_SPACES_AT_END: 908 { 909 if (lacount > countSpacesLeft(dest, start, length)) { 910 throw new ArabicShapingException("no space for lamalef"); 911 } 912 for (int r = start + lacount, w = start, e = start + length; r < e; ++r) { 913 char ch = dest[r]; 914 if (isNormalizedLamAlefChar(ch)) { 915 dest[w++] = convertNormalizedLamAlef[ch - '\u065C']; 916 dest[w++] = '\u0644'; 917 } else { 918 dest[w++] = ch; 919 } 920 } 921 } 922 break; 923 924 case LENGTH_FIXED_SPACES_AT_BEGINNING: 925 { 926 if (lacount > countSpacesRight(dest, start, length)) { 927 throw new ArabicShapingException("no space for lamalef"); 928 } 929 for (int r = start + length - lacount, w = start + length; --r >= start;) { 930 char ch = dest[r]; 931 if (isNormalizedLamAlefChar(ch)) { 932 dest[--w] = '\u0644'; 933 dest[--w] = convertNormalizedLamAlef[ch - '\u065C']; 934 } else { 935 dest[--w] = ch; 936 } 937 } 938 } 939 break; 940 } 941 942 return length; 943 } 944 945 950 private int normalize(char[] dest, int start, int length) { 951 int lacount = 0; 952 for (int i = start, e = i + length; i < e; ++i) { 953 char ch = dest[i]; 954 if (ch >= '\uFE70' && ch <= '\uFEFC') { 955 if (isLamAlefChar(ch)) { 956 ++lacount; 957 } 958 dest[i] = (char)convertFEto06[ch - '\uFE70']; 959 } 960 } 961 return lacount; 962 } 963 964 969 private int shapeUnicode(char[] dest, 970 int start, 971 int length, 972 int destSize, 973 int tashkeelFlag) { 974 975 976 normalize(dest, start, length); 977 978 983 boolean lamalef_found = false; 984 int i = start + length - 1; 985 int currLink = getLink(dest[i]); 986 int nextLink = 0; 987 int prevLink = 0; 988 int lastLink = 0; 989 int prevPos = i; 990 int lastPos = i; 991 int nx = -2; 992 int nw = 0; 993 994 while (i >= 0) { 995 if ((currLink & '\uFF00') > 0 || isTashkeelChar(dest[i])) { 997 nw = i - 1; 998 nx = -2; 999 while (nx < 0) { if (nw == -1) { 1001 nextLink = 0; 1002 nx = Integer.MAX_VALUE; 1003 } else { 1004 nextLink = getLink(dest[nw]); 1005 if ((nextLink & IRRELEVANT) == 0) { 1006 nx = nw; 1007 } else { 1008 --nw; 1009 } 1010 } 1011 } 1012 1013 if (((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0)) { 1014 lamalef_found = true; 1015 char wLamalef = changeLamAlef(dest[i]); if (wLamalef != '\u0000') { 1017 dest[i] = '\uffff'; 1019 dest[lastPos] = wLamalef; 1020 i = lastPos; 1021 } 1022 1023 lastLink = prevLink; 1024 currLink = getLink(wLamalef); } 1026 1027 1031 int flag = specialChar(dest[i]); 1032 1033 int shape = shapeTable[nextLink & LINK_MASK] 1034 [lastLink & LINK_MASK] 1035 [currLink & LINK_MASK]; 1036 1037 if (flag == 1) { 1038 shape &= 0x1; 1039 } else if (flag == 2) { 1040 if (tashkeelFlag == 0 && 1041 ((lastLink & LINKL) != 0) && 1042 ((nextLink & LINKR) != 0) && 1043 dest[i] != '\u064C' && 1044 dest[i] != '\u064D' && 1045 !((nextLink & ALEFTYPE) == ALEFTYPE && 1046 (lastLink & LAMTYPE) == LAMTYPE)) { 1047 1048 shape = 1; 1049 } else { 1050 shape = 0; 1051 } 1052 } 1053 1054 if (flag == 2) { 1055 if (tashkeelFlag < 2) { 1056 dest[i] = (char)('\uFE70' + irrelevantPos[dest[i] - '\u064B'] + shape); 1057 } } else { 1059 dest[i] = (char)('\uFE70' + (currLink >> 8) + shape); 1060 } 1061 } 1062 1063 if ((currLink & IRRELEVANT) == 0) { 1065 prevLink = lastLink; 1066 lastLink = currLink; 1067 prevPos = lastPos; 1068 lastPos = i; 1069 } 1070 1071 --i; 1072 if (i == nx) { 1073 currLink = nextLink; 1074 nx = -2; 1075 } else if (i != -1) { 1076 currLink = getLink(dest[i]); 1077 } 1078 } 1079 1080 1083 if (lamalef_found) { 1084 destSize = removeLamAlefSpaces(dest, start, length); 1085 } else { 1086 destSize = length; 1087 } 1088 1089 return destSize; 1090 } 1091 1092 1097 private int deShapeUnicode(char[] dest, 1098 int start, 1099 int length, 1100 int destSize) throws ArabicShapingException { 1101 1102 int lamalef_count = normalize(dest, start, length); 1103 1104 if (lamalef_count != 0) { 1106 destSize = expandLamAlef(dest, start, length, lamalef_count); 1108 } else { 1109 destSize = length; 1110 } 1111 1112 return destSize; 1113 } 1114 1115 private int internalShape(char[] source, 1116 int sourceStart, 1117 int sourceLength, 1118 char[] dest, 1119 int destStart, 1120 int destSize) throws ArabicShapingException { 1121 1122 if (sourceLength == 0) { 1123 return 0; 1124 } 1125 1126 if (destSize == 0) { 1127 if (((options & LETTERS_MASK) != LETTERS_NOOP) && 1128 ((options & LENGTH_MASK) == LENGTH_GROW_SHRINK)) { 1129 1130 return calculateSize(source, sourceStart, sourceLength); 1131 } else { 1132 return sourceLength; } 1134 } 1135 1136 char[] temp = new char[sourceLength * 2]; System.arraycopy(source, sourceStart, temp, 0, sourceLength); 1139 1140 if (isLogical) { 1141 invertBuffer(temp, 0, sourceLength); 1142 } 1143 1144 int outputSize = sourceLength; 1145 1146 switch (options & LETTERS_MASK) { 1147 case LETTERS_SHAPE_TASHKEEL_ISOLATED: 1148 outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 1); 1149 break; 1150 1151 case LETTERS_SHAPE: 1152 outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 0); 1153 break; 1154 1155 case LETTERS_UNSHAPE: 1156 outputSize = deShapeUnicode(temp, 0, sourceLength, destSize); 1157 break; 1158 1159 default: 1160 break; 1161 } 1162 1163 if (outputSize > destSize) { 1164 throw new ArabicShapingException("not enough room for result data"); 1165 } 1166 1167 if ((options & DIGITS_MASK) != DIGITS_NOOP) { 1168 char digitBase = '\u0030'; switch (options & DIGIT_TYPE_MASK) { 1170 case DIGIT_TYPE_AN: 1171 digitBase = '\u0660'; break; 1173 1174 case DIGIT_TYPE_AN_EXTENDED: 1175 digitBase = '\u06f0'; break; 1177 1178 default: 1179 break; 1180 } 1181 1182 switch (options & DIGITS_MASK) { 1183 case DIGITS_EN2AN: 1184 { 1185 int digitDelta = digitBase - '\u0030'; 1186 for (int i = 0; i < outputSize; ++i) { 1187 char ch = temp[i]; 1188 if (ch <= '\u0039' && ch >= '\u0030') { 1189 temp[i] += digitDelta; 1190 } 1191 } 1192 } 1193 break; 1194 1195 case DIGITS_AN2EN: 1196 { 1197 char digitTop = (char)(digitBase + 9); 1198 int digitDelta = '\u0030' - digitBase; 1199 for (int i = 0; i < outputSize; ++i) { 1200 char ch = temp[i]; 1201 if (ch <= digitTop && ch >= digitBase) { 1202 temp[i] += digitDelta; 1203 } 1204 } 1205 } 1206 break; 1207 1208 case DIGITS_EN2AN_INIT_LR: 1209 shapeToArabicDigitsWithContext(temp, 0, outputSize, digitBase, false); 1210 break; 1211 1212 case DIGITS_EN2AN_INIT_AL: 1213 shapeToArabicDigitsWithContext(temp, 0, outputSize, digitBase, true); 1214 break; 1215 1216 default: 1217 break; 1218 } 1219 } 1220 1221 if (isLogical) { 1222 invertBuffer(temp, 0, outputSize); 1223 } 1224 1225 System.arraycopy(temp, 0, dest, destStart, outputSize); 1226 1227 return outputSize; 1228 } 1229} 1230 | Popular Tags |