1 47 package com.lowagie.text.pdf; 48 49 54 public class ArabicLigaturizer { 55 56 static boolean isVowel(char s) { 57 return ((s >= 0x064B) && (s <= 0x0655)) || (s == 0x0670); 58 } 59 60 static char charshape(char s, int which) 61 62 { 63 int l, r, m; 64 if ((s >= 0x0621) && (s <= 0x06D3)) { 65 l = 0; 66 r = chartable.length - 1; 67 while (l <= r) { 68 m = (l + r) / 2; 69 if (s == chartable[m][0]) { 70 return chartable[m][which + 1]; 71 } 72 else if (s < chartable[m][0]) { 73 r = m - 1; 74 } 75 else { 76 l = m + 1; 77 } 78 } 79 } 80 else if (s >= 0xfef5 && s <= 0xfefb) 81 return (char)(s + which); 82 return s; 83 } 84 85 static int shapecount(char s) { 86 int l, r, m; 87 if ((s >= 0x0621) && (s <= 0x06D3) && !isVowel(s)) { 88 l = 0; 89 r = chartable.length - 1; 90 while (l <= r) { 91 m = (l + r) / 2; 92 if (s == chartable[m][0]) { 93 return chartable[m].length - 1; 94 } 95 else if (s < chartable[m][0]) { 96 r = m - 1; 97 } 98 else { 99 l = m + 1; 100 } 101 } 102 } 103 else if (s == ZWJ) { 104 return 4; 105 } 106 return 1; 107 } 108 109 static int ligature(char newchar, charstruct oldchar) { 110 111 int retval = 0; 112 113 if (oldchar.basechar == 0) 114 return 0; 115 if (isVowel(newchar)) { 116 retval = 1; 117 if ((oldchar.vowel != 0) && (newchar != SHADDA)) { 118 retval = 2; 119 } 120 switch (newchar) { 121 case SHADDA: 122 if (oldchar.mark1 == 0) { 123 oldchar.mark1 = SHADDA; 124 } 125 else { 126 return 0; 127 } 128 break; 129 case HAMZABELOW: 130 switch (oldchar.basechar) { 131 case ALEF: 132 oldchar.basechar = ALEFHAMZABELOW; 133 retval = 2; 134 break; 135 case LAM_ALEF: 136 oldchar.basechar = LAM_ALEFHAMZABELOW; 137 retval = 2; 138 break; 139 default: 140 oldchar.mark1 = HAMZABELOW; 141 break; 142 } 143 break; 144 case HAMZAABOVE: 145 switch (oldchar.basechar) { 146 case ALEF: 147 oldchar.basechar = ALEFHAMZA; 148 retval = 2; 149 break; 150 case LAM_ALEF: 151 oldchar.basechar = LAM_ALEFHAMZA; 152 retval = 2; 153 break; 154 case WAW: 155 oldchar.basechar = WAWHAMZA; 156 retval = 2; 157 break; 158 case YEH: 159 case ALEFMAKSURA: 160 case FARSIYEH: 161 oldchar.basechar = YEHHAMZA; 162 retval = 2; 163 break; 164 default: 165 oldchar.mark1 = HAMZAABOVE; 166 break; 167 } 168 break; 169 case MADDA: 170 switch (oldchar.basechar) { 171 case ALEF: 172 oldchar.basechar = ALEFMADDA; 173 retval = 2; 174 break; 175 } 176 break; 177 default: 178 oldchar.vowel = newchar; 179 break; 180 } 181 if (retval == 1) { 182 oldchar.lignum++; 183 } 184 return retval; 185 } 186 if (oldchar.vowel != 0) { 187 return 0; 188 } 189 190 switch (oldchar.basechar) { 191 case LAM: 192 switch (newchar) { 193 case ALEF: 194 oldchar.basechar = LAM_ALEF; 195 oldchar.numshapes = 2; 196 retval = 3; 197 break; 198 case ALEFHAMZA: 199 oldchar.basechar = LAM_ALEFHAMZA; 200 oldchar.numshapes = 2; 201 retval = 3; 202 break; 203 case ALEFHAMZABELOW: 204 oldchar.basechar = LAM_ALEFHAMZABELOW; 205 oldchar.numshapes = 2; 206 retval = 3; 207 break; 208 case ALEFMADDA: 209 oldchar.basechar = LAM_ALEFMADDA; 210 oldchar.numshapes = 2; 211 retval = 3; 212 break; 213 } 214 break; 215 case 0: 216 oldchar.basechar = newchar; 217 oldchar.numshapes = shapecount(newchar); 218 retval = 1; 219 break; 220 } 221 return retval; 222 } 223 224 static void copycstostring(StringBuffer string, charstruct s, int level) { 225 226 if (s.basechar == 0) 227 return; 228 229 string.append(s.basechar); 230 s.lignum--; 231 if (s.mark1 != 0) { 232 if ((level & ar_novowel) == 0) { 233 string.append(s.mark1); 234 s.lignum--; 235 } 236 else { 237 s.lignum--; 238 } 239 } 240 if (s.vowel != 0) { 241 if ((level & ar_novowel) == 0) { 242 string.append(s.vowel); 243 s.lignum--; 244 } 245 else { 246 s.lignum--; 247 } 248 } 249 } 256 257 static void doublelig(StringBuffer string, int level) 259 260 { 261 int len; 262 int olen = len = string.length(); 263 int j = 0, si = 1; 264 char lapresult; 265 266 while (si < olen) { 267 lapresult = 0; 268 if ((level & ar_composedtashkeel) != 0) { 269 switch (string.charAt(j)) { 270 case SHADDA: 271 switch (string.charAt(si)) { 272 case KASRA: 273 lapresult = 0xFC62; 274 break; 275 case FATHA: 276 lapresult = 0xFC60; 277 break; 278 case DAMMA: 279 lapresult = 0xFC61; 280 break; 281 case 0x064C: 282 lapresult = 0xFC5E; 283 break; 284 case 0x064D: 285 lapresult = 0xFC5F; 286 break; 287 } 288 break; 289 case KASRA: 290 if (string.charAt(si) == SHADDA) 291 lapresult = 0xFC62; 292 break; 293 case FATHA: 294 if (string.charAt(si) == SHADDA) 295 lapresult = 0xFC60; 296 break; 297 case DAMMA: 298 if (string.charAt(si) == SHADDA) 299 lapresult = 0xFC61; 300 break; 301 } 302 } 303 304 if ((level & ar_lig) != 0) { 305 switch (string.charAt(j)) { 306 case 0xFEDF: 307 switch (string.charAt(si)) { 308 case 0xFE9E: 309 lapresult = 0xFC3F; 310 break; 311 case 0xFEA0: 312 lapresult = 0xFCC9; 313 break; 314 case 0xFEA2: 315 lapresult = 0xFC40; 316 break; 317 case 0xFEA4: 318 lapresult = 0xFCCA; 319 break; 320 case 0xFEA6: 321 lapresult = 0xFC41; 322 break; 323 case 0xFEA8: 324 lapresult = 0xFCCB; 325 break; 326 case 0xFEE2: 327 lapresult = 0xFC42; 328 break; 329 case 0xFEE4: 330 lapresult = 0xFCCC; 331 break; 332 } 333 break; 334 case 0xFE97: 335 switch (string.charAt(si)) { 336 case 0xFEA0: 337 lapresult = 0xFCA1; 338 break; 339 case 0xFEA4: 340 lapresult = 0xFCA2; 341 break; 342 case 0xFEA8: 343 lapresult = 0xFCA3; 344 break; 345 } 346 break; 347 case 0xFE91: 348 switch (string.charAt(si)) { 349 case 0xFEA0: 350 lapresult = 0xFC9C; 351 break; 352 case 0xFEA4: 353 lapresult = 0xFC9D; 354 break; 355 case 0xFEA8: 356 lapresult = 0xFC9E; 357 break; 358 } 359 break; 360 case 0xFEE7: 361 switch (string.charAt(si)) { 362 case 0xFEA0: 363 lapresult = 0xFCD2; 364 break; 365 case 0xFEA4: 366 lapresult = 0xFCD3; 367 break; 368 case 0xFEA8: 369 lapresult = 0xFCD4; 370 break; 371 } 372 break; 373 374 case 0xFEE8: 375 switch (string.charAt(si)) { 376 case 0xFEAE: 377 lapresult = 0xFC8A; 378 break; 379 case 0xFEB0: 380 lapresult = 0xFC8B; 381 break; 382 } 383 break; 384 case 0xFEE3: 385 switch (string.charAt(si)) { 386 case 0xFEA0: 387 lapresult = 0xFCCE; 388 break; 389 case 0xFEA4: 390 lapresult = 0xFCCF; 391 break; 392 case 0xFEA8: 393 lapresult = 0xFCD0; 394 break; 395 case 0xFEE4: 396 lapresult = 0xFCD1; 397 break; 398 } 399 break; 400 401 case 0xFED3: 402 switch (string.charAt(si)) { 403 case 0xFEF2: 404 lapresult = 0xFC32; 405 break; 406 } 407 break; 408 409 default: 410 break; 411 } 412 } 413 if (lapresult != 0) { 414 string.setCharAt(j, lapresult); 415 len--; 416 si++; 417 418 } 419 else { 420 j++; 421 string.setCharAt(j, string.charAt(si)); 422 si++; 423 } 424 } 425 string.setLength(len); 426 } 427 428 static boolean connects_to_left(charstruct a) { 429 return a.numshapes > 2; 430 } 431 432 static void shape(char text[], StringBuffer string, int level) { 433 440 int join; 441 int which; 442 char nextletter; 443 444 int p = 0; 445 charstruct oldchar = new charstruct(); 446 charstruct curchar = new charstruct(); 447 while (p < text.length) { 448 nextletter = text[p++]; 449 451 join = ligature(nextletter, curchar); 452 if (join == 0) { 453 int nc = shapecount(nextletter); 454 if (nc == 1) { 456 which = 0; 457 } 458 else { 459 which = 2; 460 } 461 if (connects_to_left(oldchar)) { 462 which++; 463 } 464 465 which = which % (curchar.numshapes); 466 curchar.basechar = charshape(curchar.basechar, which); 467 468 469 copycstostring(string, oldchar, level); 470 oldchar = curchar; 471 472 473 curchar = new charstruct(); 474 curchar.basechar = nextletter; 475 curchar.numshapes = nc; 476 curchar.lignum++; 477 } 479 else if (join == 1) { 480 } 481 } 487 488 489 if (connects_to_left(oldchar)) 490 which = 1; 491 else 492 which = 0; 493 which = which % (curchar.numshapes); 494 curchar.basechar = charshape(curchar.basechar, which); 495 496 497 copycstostring(string, oldchar, level); 498 copycstostring(string, curchar, level); 499 } 500 501 static int arabic_shape(char src[], int srcoffset, int srclength, char dest[], int destoffset, int destlength, int level) { 502 char str[] = new char[srclength]; 503 for (int k = srclength + srcoffset - 1; k >= srcoffset; --k) 504 str[k - srcoffset] = src[k]; 505 StringBuffer string = new StringBuffer (srclength); 506 shape(str, string, level); 507 if ((level & (ar_composedtashkeel | ar_lig)) != 0) 508 doublelig(string, level); 509 System.arraycopy(string.toString().toCharArray(), 0, dest, destoffset, string.length()); 511 return string.length(); 512 } 513 514 static void processNumbers(char text[], int offset, int length, int options) { 515 int limit = offset + length; 516 if ((options & DIGITS_MASK) != 0) { 517 char digitBase = '\u0030'; switch (options & DIGIT_TYPE_MASK) { 519 case DIGIT_TYPE_AN: 520 digitBase = '\u0660'; break; 522 523 case DIGIT_TYPE_AN_EXTENDED: 524 digitBase = '\u06f0'; break; 526 527 default: 528 break; 529 } 530 531 switch (options & DIGITS_MASK) { 532 case DIGITS_EN2AN: { 533 int digitDelta = digitBase - '\u0030'; 534 for (int i = offset; i < limit; ++i) { 535 char ch = text[i]; 536 if (ch <= '\u0039' && ch >= '\u0030') { 537 text[i] += digitDelta; 538 } 539 } 540 } 541 break; 542 543 case DIGITS_AN2EN: { 544 char digitTop = (char)(digitBase + 9); 545 int digitDelta = '\u0030' - digitBase; 546 for (int i = offset; i < limit; ++i) { 547 char ch = text[i]; 548 if (ch <= digitTop && ch >= digitBase) { 549 text[i] += digitDelta; 550 } 551 } 552 } 553 break; 554 555 case DIGITS_EN2AN_INIT_LR: 556 shapeToArabicDigitsWithContext(text, 0, length, digitBase, false); 557 break; 558 559 case DIGITS_EN2AN_INIT_AL: 560 shapeToArabicDigitsWithContext(text, 0, length, digitBase, true); 561 break; 562 563 default: 564 break; 565 } 566 } 567 } 568 569 static void shapeToArabicDigitsWithContext(char[] dest, int start, int length, char digitBase, boolean lastStrongWasAL) { 570 digitBase -= '0'; 572 int limit = start + length; 573 for(int i = start; i < limit; ++i) { 574 char ch = dest[i]; 575 switch (BidiOrder.getDirection(ch)) { 576 case BidiOrder.L: 577 case BidiOrder.R: 578 lastStrongWasAL = false; 579 break; 580 case BidiOrder.AL: 581 lastStrongWasAL = true; 582 break; 583 case BidiOrder.EN: 584 if (lastStrongWasAL && ch <= '\u0039') { 585 dest[i] = (char)(ch + digitBase); 586 } 587 break; 588 default: 589 break; 590 } 591 } 592 } 593 594 private static final char ALEF = 0x0627; 595 private static final char ALEFHAMZA = 0x0623; 596 private static final char ALEFHAMZABELOW = 0x0625; 597 private static final char ALEFMADDA = 0x0622; 598 private static final char LAM = 0x0644; 599 private static final char HAMZA = 0x0621; 600 private static final char TATWEEL = 0x0640; 601 private static final char ZWJ = 0x200D; 602 603 private static final char HAMZAABOVE = 0x0654; 604 private static final char HAMZABELOW = 0x0655; 605 606 private static final char WAWHAMZA = 0x0624; 607 private static final char YEHHAMZA = 0x0626; 608 private static final char WAW = 0x0648; 609 private static final char ALEFMAKSURA = 0x0649; 610 private static final char YEH = 0x064A; 611 private static final char FARSIYEH = 0x06CC; 612 613 private static final char SHADDA = 0x0651; 614 private static final char KASRA = 0x0650; 615 private static final char FATHA = 0x064E; 616 private static final char DAMMA = 0x064F; 617 private static final char MADDA = 0x0653; 618 619 private static final char LAM_ALEF = 0xFEFB; 620 private static final char LAM_ALEFHAMZA = 0xFEF7; 621 private static final char LAM_ALEFHAMZABELOW = 0xFEF9; 622 private static final char LAM_ALEFMADDA = 0xFEF5; 623 624 private static final char chartable[][] = { 625 {0x0621, 0xFE80}, 626 {0x0622, 0xFE81, 0xFE82}, 627 {0x0623, 0xFE83, 0xFE84}, 628 {0x0624, 0xFE85, 0xFE86}, 629 {0x0625, 0xFE87, 0xFE88}, 630 {0x0626, 0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, 631 {0x0627, 0xFE8D, 0xFE8E}, 632 {0x0628, 0xFE8F, 0xFE90, 0xFE91, 0xFE92}, 633 {0x0629, 0xFE93, 0xFE94}, 634 {0x062A, 0xFE95, 0xFE96, 0xFE97, 0xFE98}, 635 {0x062B, 0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, 636 {0x062C, 0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, 637 {0x062D, 0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, 638 {0x062E, 0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, 639 {0x062F, 0xFEA9, 0xFEAA}, 640 {0x0630, 0xFEAB, 0xFEAC}, 641 {0x0631, 0xFEAD, 0xFEAE}, 642 {0x0632, 0xFEAF, 0xFEB0}, 643 {0x0633, 0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, 644 {0x0634, 0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, 645 {0x0635, 0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, 646 {0x0636, 0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, 647 {0x0637, 0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, 648 {0x0638, 0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, 649 {0x0639, 0xFEC9, 0xFECA, 0xFECB, 0xFECC}, 650 {0x063A, 0xFECD, 0xFECE, 0xFECF, 0xFED0}, 651 {0x0640, 0x0640, 0x0640, 0x0640, 0x0640}, 652 {0x0641, 0xFED1, 0xFED2, 0xFED3, 0xFED4}, 653 {0x0642, 0xFED5, 0xFED6, 0xFED7, 0xFED8}, 654 {0x0643, 0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, 655 {0x0644, 0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, 656 {0x0645, 0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, 657 {0x0646, 0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, 658 {0x0647, 0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, 659 {0x0648, 0xFEED, 0xFEEE}, 660 {0x0649, 0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9}, 661 {0x064A, 0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, 662 {0x0671, 0xFB50, 0xFB51}, 663 {0x0679, 0xFB66, 0xFB67, 0xFB68, 0xFB69}, 664 {0x067A, 0xFB5E, 0xFB5F, 0xFB60, 0xFB61}, 665 {0x067B, 0xFB52, 0xFB53, 0xFB54, 0xFB55}, 666 {0x067E, 0xFB56, 0xFB57, 0xFB58, 0xFB59}, 667 {0x067F, 0xFB62, 0xFB63, 0xFB64, 0xFB65}, 668 {0x0680, 0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D}, 669 {0x0683, 0xFB76, 0xFB77, 0xFB78, 0xFB79}, 670 {0x0684, 0xFB72, 0xFB73, 0xFB74, 0xFB75}, 671 {0x0686, 0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D}, 672 {0x0687, 0xFB7E, 0xFB7F, 0xFB80, 0xFB81}, 673 {0x0688, 0xFB88, 0xFB89}, 674 {0x068C, 0xFB84, 0xFB85}, 675 {0x068D, 0xFB82, 0xFB83}, 676 {0x068E, 0xFB86, 0xFB87}, 677 {0x0691, 0xFB8C, 0xFB8D}, 678 {0x0698, 0xFB8A, 0xFB8B}, 679 {0x06A4, 0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D}, 680 {0x06A6, 0xFB6E, 0xFB6F, 0xFB70, 0xFB71}, 681 {0x06A9, 0xFB8E, 0xFB8F, 0xFB90, 0xFB91}, 682 {0x06AD, 0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6}, 683 {0x06AF, 0xFB92, 0xFB93, 0xFB94, 0xFB95}, 684 {0x06B1, 0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D}, 685 {0x06B3, 0xFB96, 0xFB97, 0xFB98, 0xFB99}, 686 {0x06BA, 0xFB9E, 0xFB9F}, 687 {0x06BB, 0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3}, 688 {0x06BE, 0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD}, 689 {0x06C0, 0xFBA4, 0xFBA5}, 690 {0x06C1, 0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9}, 691 {0x06C5, 0xFBE0, 0xFBE1}, 692 {0x06C6, 0xFBD9, 0xFBDA}, 693 {0x06C7, 0xFBD7, 0xFBD8}, 694 {0x06C8, 0xFBDB, 0xFBDC}, 695 {0x06C9, 0xFBE2, 0xFBE3}, 696 {0x06CB, 0xFBDE, 0xFBDF}, 697 {0x06CC, 0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF}, 698 {0x06D0, 0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7}, 699 {0x06D2, 0xFBAE, 0xFBAF}, 700 {0x06D3, 0xFBB0, 0xFBB1} 701 }; 702 703 public static final int ar_nothing = 0x0; 704 public static final int ar_novowel = 0x1; 705 public static final int ar_composedtashkeel = 0x4; 706 public static final int ar_lig = 0x8; 707 710 public static final int DIGITS_EN2AN = 0x20; 711 712 715 public static final int DIGITS_AN2EN = 0x40; 716 717 726 public static final int DIGITS_EN2AN_INIT_LR = 0x60; 727 728 737 public static final int DIGITS_EN2AN_INIT_AL = 0x80; 738 739 740 private static final int DIGITS_RESERVED = 0xa0; 741 742 745 public static final int DIGITS_MASK = 0xe0; 746 747 750 public static final int DIGIT_TYPE_AN = 0; 751 752 755 public static final int DIGIT_TYPE_AN_EXTENDED = 0x100; 756 757 760 public static final int DIGIT_TYPE_MASK = 0x0100; 762 static class charstruct { 763 char basechar; 764 char mark1; 765 char vowel; 766 int lignum; 767 int numshapes = 1; 768 }; 769 770 771 } 772 | Popular Tags |