| 1 47 package com.lowagie.text.pdf; 48 49 54 public class ArabicLigaturizer { 55 56 static boolean isVowel(char s) { 57 return ((s >= 0x064B) && (s <= 0x0655)) || (s == 0x0670); 58 } 59 60 static char charshape(char s, int which) 61 62 { 63 int l, r, m; 64 if ((s >= 0x0621) && (s <= 0x06D3)) { 65 l = 0; 66 r = chartable.length - 1; 67 while (l <= r) { 68 m = (l + r) / 2; 69 if (s == chartable[m][0]) { 70 return chartable[m][which + 1]; 71 } 72 else if (s < chartable[m][0]) { 73 r = m - 1; 74 } 75 else { 76 l = m + 1; 77 } 78 } 79 } 80 else if (s >= 0xfef5 && s <= 0xfefb) 81 return (char)(s + which); 82 return s; 83 } 84 85 static int shapecount(char s) { 86 int l, r, m; 87 if ((s >= 0x0621) && (s <= 0x06D3) && !isVowel(s)) { 88 l = 0; 89 r = chartable.length - 1; 90 while (l <= r) { 91 m = (l + r) / 2; 92 if (s == chartable[m][0]) { 93 return chartable[m].length - 1; 94 } 95 else if (s < chartable[m][0]) { 96 r = m - 1; 97 } 98 else { 99 l = m + 1; 100 } 101 } 102 } 103 else if (s == ZWJ) { 104 return 4; 105 } 106 return 1; 107 } 108 109 static int ligature(char newchar, charstruct oldchar) { 110 111 int retval = 0; 112 113 if (oldchar.basechar == 0) 114 return 0; 115 if (isVowel(newchar)) { 116 retval = 1; 117 if ((oldchar.vowel != 0) && (newchar != SHADDA)) { 118 retval = 2; 119 } 120 switch (newchar) { 121 case SHADDA: 122 if (oldchar.mark1 == 0) { 123 oldchar.mark1 = SHADDA; 124 } 125 else { 126 return 0; 127 } 128 break; 129 case HAMZABELOW: 130 switch (oldchar.basechar) { 131 case ALEF: 132 oldchar.basechar = ALEFHAMZABELOW; 133 retval = 2; 134 break; 135 case LAM_ALEF: 136 oldchar.basechar = LAM_ALEFHAMZABELOW; 137 retval = 2; 138 break; 139 default: 140 oldchar.mark1 = HAMZABELOW; 141 break; 142 } 143 break; 144 case HAMZAABOVE: 145 switch (oldchar.basechar) { 146 case ALEF: 147 oldchar.basechar = ALEFHAMZA; 148 retval = 2; 149 break; 150 case LAM_ALEF: 151 oldchar.basechar = LAM_ALEFHAMZA; 152 retval = 2; 153 break; 154 case WAW: 155 oldchar.basechar = WAWHAMZA; 156 retval = 2; 157 break; 158 case YEH: 159 case ALEFMAKSURA: 160 case FARSIYEH: 161 oldchar.basechar = YEHHAMZA; 162 retval = 2; 163 break; 164 default: 165 oldchar.mark1 = HAMZAABOVE; 166 break; 167 } 168 break; 169 case MADDA: 170 switch (oldchar.basechar) { 171 case ALEF: 172 oldchar.basechar = ALEFMADDA; 173 retval = 2; 174 break; 175 } 176 break; 177 default: 178 oldchar.vowel = newchar; 179 break; 180 } 181 if (retval == 1) { 182 oldchar.lignum++; 183 } 184 return retval; 185 } 186 if (oldchar.vowel != 0) { 187 return 0; 188 } 189 190 switch (oldchar.basechar) { 191 case LAM: 192 switch (newchar) { 193 case ALEF: 194 oldchar.basechar = LAM_ALEF; 195 oldchar.numshapes = 2; 196 retval = 3; 197 break; 198 case ALEFHAMZA: 199 oldchar.basechar = LAM_ALEFHAMZA; 200 oldchar.numshapes = 2; 201 retval = 3; 202 break; 203 case ALEFHAMZABELOW: 204 oldchar.basechar = LAM_ALEFHAMZABELOW; 205 oldchar.numshapes = 2; 206 retval = 3; 207 break; 208 case ALEFMADDA: 209 oldchar.basechar = LAM_ALEFMADDA; 210 oldchar.numshapes = 2; 211 retval = 3; 212 break; 213 } 214 break; 215 case 0: 216 oldchar.basechar = newchar; 217 oldchar.numshapes = shapecount(newchar); 218 retval = 1; 219 break; 220 } 221 return retval; 222 } 223 224 static void copycstostring(StringBuffer string, charstruct s, int level) { 225 226 if (s.basechar == 0) 227 return; 228 229 string.append(s.basechar); 230 s.lignum--; 231 if (s.mark1 != 0) { 232 if ((level & ar_novowel) == 0) { 233 string.append(s.mark1); 234 s.lignum--; 235 } 236 else { 237 s.lignum--; 238 } 239 } 240 if (s.vowel != 0) { 241 if ((level & ar_novowel) == 0) { 242 string.append(s.vowel); 243 s.lignum--; 244 } 245 else { 246 s.lignum--; 247 } 248 } 249 } 256 257 static void doublelig(StringBuffer string, int level) 259 260 { 261 int len; 262 int olen = len = string.length(); 263 int j = 0, si = 1; 264 char lapresult; 265 266 while (si < olen) { 267 lapresult = 0; 268 if ((level & ar_composedtashkeel) != 0) { 269 switch (string.charAt(j)) { 270 case SHADDA: 271 switch (string.charAt(si)) { 272 case KASRA: 273 lapresult = 0xFC62; 274 break; 275 case FATHA: 276 lapresult = 0xFC60; 277 break; 278 case DAMMA: 279 lapresult = 0xFC61; 280 break; 281 case 0x064C: 282 lapresult = 0xFC5E; 283 break; 284 case 0x064D: 285 lapresult = 0xFC5F; 286 break; 287 } 288 break; 289 case KASRA: 290 if (string.charAt(si) == SHADDA) 291 lapresult = 0xFC62; 292 break; 293 case FATHA: 294 if (string.charAt(si) == SHADDA) 295 lapresult = 0xFC60; 296 break; 297 case DAMMA: 298 if (string.charAt(si) == SHADDA) 299 lapresult = 0xFC61; 300 break; 301 } 302 } 303 304 if ((level & ar_lig) != 0) { 305 switch (string.charAt(j)) { 306 case 0xFEDF: 307 switch (string.charAt(si)) { 308 case 0xFE9E: 309 lapresult = 0xFC3F; 310 break; 311 case 0xFEA0: 312 lapresult = 0xFCC9; 313 break; 314 case 0xFEA2: 315 lapresult = 0xFC40; 316 break; 317 case 0xFEA4: 318 lapresult = 0xFCCA; 319 break; 320 case 0xFEA6: 321 lapresult = 0xFC41; 322 break; 323 case 0xFEA8: 324 lapresult = 0xFCCB; 325 break; 326 case 0xFEE2: 327 lapresult = 0xFC42; 328 break; 329 case 0xFEE4: 330 lapresult = 0xFCCC; 331 break; 332 } 333 break; 334 case 0xFE97: 335 switch (string.charAt(si)) { 336 case 0xFEA0: 337 lapresult = 0xFCA1; 338 break; 339 case 0xFEA4: 340 lapresult = 0xFCA2; 341 break; 342 case 0xFEA8: 343 lapresult = 0xFCA3; 344 break; 345 } 346 break; 347 case 0xFE91: 348 switch (string.charAt(si)) { 349 case 0xFEA0: 350 lapresult = 0xFC9C; 351 break; 352 case 0xFEA4: 353 lapresult = 0xFC9D; 354 break; 355 case 0xFEA8: 356 lapresult = 0xFC9E; 357 break; 358 } 359 break; 360 case 0xFEE7: 361 switch (string.charAt(si)) { 362 case 0xFEA0: 363 lapresult = 0xFCD2; 364 break; 365 case 0xFEA4: 366 lapresult = 0xFCD3; 367 break; 368 case 0xFEA8: 369 lapresult = 0xFCD4; 370 break; 371 } 372 break; 373 374 case 0xFEE8: 375 switch (string.charAt(si)) { 376 case 0xFEAE: 377 lapresult = 0xFC8A; 378 break; 379 case 0xFEB0: 380 lapresult = 0xFC8B; 381 break; 382 } 383 break; 384 case 0xFEE3: 385 switch (string.charAt(si)) { 386 case 0xFEA0: 387 lapresult = 0xFCCE; 388 break; 389 case 0xFEA4: 390 lapresult = 0xFCCF; 391 break; 392 case 0xFEA8: 393 lapresult = 0xFCD0; 394 break; 395 case 0xFEE4: 396 lapresult = 0xFCD1; 397 break; 398 } 399 break; 400 401 case 0xFED3: 402 switch (string.charAt(si)) { 403 case 0xFEF2: 404 lapresult = 0xFC32; 405 break; 406 } 407 break; 408 409 default: 410 break; 411 } 412 } 413 if (lapresult != 0) { 414 string.setCharAt(j, lapresult); 415 len--; 416 si++; 417 418 } 419 else { 420 j++; 421 string.setCharAt(j, string.charAt(si)); 422 si++; 423 } 424 } 425 string.setLength(len); 426 } 427 428 static boolean connects_to_left(charstruct a) { 429 return a.numshapes > 2; 430 } 431 432 static void shape(char text[], StringBuffer string, int level) { 433 440 int join; 441 int which; 442 char nextletter; 443 444 int p = 0; 445 charstruct oldchar = new charstruct(); 446 charstruct curchar = new charstruct(); 447 while (p < text.length) { 448 nextletter = text[p++]; 449 451 join = ligature(nextletter, curchar); 452 if (join == 0) { 453 int nc = shapecount(nextletter); 454 if (nc == 1) { 456 which = 0; 457 } 458 else { 459 which = 2; 460 } 461 if (connects_to_left(oldchar)) { 462 which++; 463 } 464 465 which = which % (curchar.numshapes); 466 curchar.basechar = charshape(curchar.basechar, which); 467 468 469 copycstostring(string, oldchar, level); 470 oldchar = curchar; 471 472 473 curchar = new charstruct(); 474 curchar.basechar = nextletter; 475 curchar.numshapes = nc; 476 curchar.lignum++; 477 } 479 else if (join == 1) { 480 } 481 } 487 488 489 if (connects_to_left(oldchar)) 490 which = 1; 491 else 492 which = 0; 493 which = which % (curchar.numshapes); 494 curchar.basechar = charshape(curchar.basechar, which); 495 496 497 copycstostring(string, oldchar, level); 498 copycstostring(string, curchar, level); 499 } 500 501 static int arabic_shape(char src[], int srcoffset, int srclength, char dest[], int destoffset, int destlength, int level) { 502 char str[] = new char[srclength]; 503 for (int k = srclength + srcoffset - 1; k >= srcoffset; --k) 504 str[k - srcoffset] = src[k]; 505 StringBuffer string = new StringBuffer (srclength); 506 shape(str, string, level); 507 if ((level & (ar_composedtashkeel | ar_lig)) != 0) 508 doublelig(string, level); 509 System.arraycopy(string.toString().toCharArray(), 0, dest, destoffset, string.length()); 511 return string.length(); 512 } 513 514 static void processNumbers(char text[], int offset, int length, int options) { 515 int limit = offset + length; 516 if ((options & DIGITS_MASK) != 0) { 517 char digitBase = '\u0030'; switch (options & DIGIT_TYPE_MASK) { 519 case DIGIT_TYPE_AN: 520 digitBase = '\u0660'; break; 522 523 case DIGIT_TYPE_AN_EXTENDED: 524 digitBase = '\u06f0'; break; 526 527 default: 528 break; 529 } 530 531 switch (options & DIGITS_MASK) { 532 case DIGITS_EN2AN: { 533 int digitDelta = digitBase - '\u0030'; 534 for (int i = offset; i < limit; ++i) { 535 char ch = text[i]; 536 if (ch <= '\u0039' && ch >= '\u0030') { 537 text[i] += digitDelta; 538 } 539 } 540 } 541 break; 542 543 case DIGITS_AN2EN: { 544 char digitTop = (char)(digitBase + 9); 545 int digitDelta = '\u0030' - digitBase; 546 for (int i = offset; i < limit; ++i) { 547 char ch = text[i]; 548 if (ch <= digitTop && ch >= digitBase) { 549 text[i] += digitDelta; 550 } 551 } 552 } 553 break; 554 555 case DIGITS_EN2AN_INIT_LR: 556 shapeToArabicDigitsWithContext(text, 0, length, digitBase, false); 557 break; 558 559 case DIGITS_EN2AN_INIT_AL: 560 shapeToArabicDigitsWithContext(text, 0, length, digitBase, true); 561 break; 562 563 default: 564 break; 565 } 566 } 567 } 568 569 static void shapeToArabicDigitsWithContext(char[] dest, int start, int length, char digitBase, boolean lastStrongWasAL) { 570 digitBase -= '0'; 572 int limit = start + length; 573 for(int i = start; i < limit; ++i) { 574 char ch = dest[i]; 575 switch (BidiOrder.getDirection(ch)) { 576 case BidiOrder.L: 577 case BidiOrder.R: 578 lastStrongWasAL = false; 579 break; 580 case BidiOrder.AL: 581 lastStrongWasAL = true; 582 break; 583 case BidiOrder.EN: 584 if (lastStrongWasAL && ch <= '\u0039') { 585 dest[i] = (char)(ch + digitBase); 586 } 587 break; 588 default: 589 break; 590 } 591 } 592 } 593 594 private static final char ALEF = 0x0627; 595 private static final char ALEFHAMZA = 0x0623; 596 private static final char ALEFHAMZABELOW = 0x0625; 597 private static final char ALEFMADDA = 0x0622; 598 private static final char LAM = 0x0644; 599 private static final char HAMZA = 0x0621; 600 private static final char TATWEEL = 0x0640; 601 private static final char ZWJ = 0x200D; 602 603 private static final char HAMZAABOVE = 0x0654; 604 private static final char HAMZABELOW = 0x0655; 605 606 private static final char WAWHAMZA = 0x0624; 607 private static final char YEHHAMZA = 0x0626; 608 private static final char WAW = 0x0648; 609 private static final char ALEFMAKSURA = 0x0649; 610 private static final char YEH = 0x064A; 611 private static final char FARSIYEH = 0x06CC; 612 613 private static final char SHADDA = 0x0651; 614 private static final char KASRA = 0x0650; 615 private static final char FATHA = 0x064E; 616 private static final char DAMMA = 0x064F; 617 private static final char MADDA = 0x0653; 618 619 private static final char LAM_ALEF = 0xFEFB; 620 private static final char LAM_ALEFHAMZA = 0xFEF7; 621 private static final char LAM_ALEFHAMZABELOW = 0xFEF9; 622 private static final char LAM_ALEFMADDA = 0xFEF5; 623 624 private static final char chartable[][] = { 625 {0x0621, 0xFE80}, 626 {0x0622, 0xFE81, 0xFE82}, 627 {0x0623, 0xFE83, 0xFE84}, 628 {0x0624, 0xFE85, 0xFE86}, 629 {0x0625, 0xFE87, 0xFE88}, 630 {0x0626, 0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, 631 {0x0627, 0xFE8D, 0xFE8E}, 632 {0x0628, 0xFE8F, 0xFE90, 0xFE91, 0xFE92}, 633 {0x0629, 0xFE93, 0xFE94}, 634 {0x062A, 0xFE95, 0xFE96, 0xFE97, 0xFE98}, 635 {0x062B, 0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, 636 {0x062C, 0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, 637 {0x062D, 0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, 638 {0x062E, 0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, 639 {0x062F, 0xFEA9, 0xFEAA}, 640 {0x0630, 0xFEAB, 0xFEAC}, 641 {0x0631, 0xFEAD, 0xFEAE}, 642 {0x0632, 0xFEAF, 0xFEB0}, 643 {0x0633, 0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, 644 {0x0634, 0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, 645 {0x0635, 0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, 646 {0x0636, 0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, |