KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > lowagie > text > pdf > ArabicLigaturizer


1 /*
2  * Copyright 2003 by Paulo Soares.
3  *
4  * The contents of this file are subject to the Mozilla Public License Version 1.1
5  * (the "License"); you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
7  *
8  * Software distributed under the License is distributed on an "AS IS" basis,
9  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10  * for the specific language governing rights and limitations under the License.
11  *
12  * The Original Code is 'iText, a free JAVA-PDF library'.
13  *
14  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
15  * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
16  * All Rights Reserved.
17  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
18  * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
19  *
20  * Contributor(s): all the names of the contributors are added in the source code
21  * where applicable.
22  *
23  * Alternatively, the contents of this file may be used under the terms of the
24  * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
25  * provisions of LGPL are applicable instead of those above. If you wish to
26  * allow use of your version of this file only under the terms of the LGPL
27  * License and not to allow others to use your version of this file under
28  * the MPL, indicate your decision by deleting the provisions above and
29  * replace them with the notice and other provisions required by the LGPL.
30  * If you do not delete the provisions above, a recipient may use your version
31  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
32  *
33  * This library is free software; you can redistribute it and/or modify it
34  * under the terms of the MPL as stated above or under the terms of the GNU
35  * Library General Public License as published by the Free Software Foundation;
36  * either version 2 of the License, or any later version.
37  *
38  * This library is distributed in the hope that it will be useful, but WITHOUT
39  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
40  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
41  * details.
42  *
43  * If you didn't download this code from the following link, you should check if
44  * you aren't using an obsolete version:
45  * http://www.lowagie.com/iText/
46  */

47 package com.lowagie.text.pdf;
48
49 /** Shape arabic characters. This code was converted from a C version
50  * at www.pango.org.
51  *
52  * @author Paulo Soares (psoares@consiste.pt)
53  */

54 public class ArabicLigaturizer {
55     
56     static boolean isVowel(char s) {
57         return ((s >= 0x064B) && (s <= 0x0655)) || (s == 0x0670);
58     }
59
60     static char charshape(char s, int which)
61     /* which 0=isolated 1=final 2=initial 3=medial */
62     {
63         int l, r, m;
64         if ((s >= 0x0621) && (s <= 0x06D3)) {
65             l = 0;
66             r = chartable.length - 1;
67             while (l <= r) {
68                 m = (l + r) / 2;
69                 if (s == chartable[m][0]) {
70                     return chartable[m][which + 1];
71                 }
72                 else if (s < chartable[m][0]) {
73                     r = m - 1;
74                 }
75                 else {
76                     l = m + 1;
77                 }
78             }
79         }
80         else if (s >= 0xfef5 && s <= 0xfefb)
81             return (char)(s + which);
82         return s;
83     }
84
85     static int shapecount(char s) {
86         int l, r, m;
87         if ((s >= 0x0621) && (s <= 0x06D3) && !isVowel(s)) {
88             l = 0;
89             r = chartable.length - 1;
90             while (l <= r) {
91                 m = (l + r) / 2;
92                 if (s == chartable[m][0]) {
93                     return chartable[m].length - 1;
94                 }
95                 else if (s < chartable[m][0]) {
96                     r = m - 1;
97                 }
98                 else {
99                     l = m + 1;
100                 }
101             }
102         }
103         else if (s == ZWJ) {
104             return 4;
105         }
106         return 1;
107     }
108     
109     static int ligature(char newchar, charstruct oldchar) {
110     /* 0 == no ligature possible; 1 == vowel; 2 == two chars; 3 == Lam+Alef */
111         int retval = 0;
112         
113         if (oldchar.basechar == 0)
114             return 0;
115         if (isVowel(newchar)) {
116             retval = 1;
117             if ((oldchar.vowel != 0) && (newchar != SHADDA)) {
118                 retval = 2; /* we eliminate the old vowel .. */
119             }
120             switch (newchar) {
121                 case SHADDA:
122                     if (oldchar.mark1 == 0) {
123                         oldchar.mark1 = SHADDA;
124                     }
125                     else {
126                         return 0; /* no ligature possible */
127                     }
128                     break;
129                 case HAMZABELOW:
130                     switch (oldchar.basechar) {
131                         case ALEF:
132                             oldchar.basechar = ALEFHAMZABELOW;
133                             retval = 2;
134                             break;
135                         case LAM_ALEF:
136                             oldchar.basechar = LAM_ALEFHAMZABELOW;
137                             retval = 2;
138                             break;
139                         default:
140                             oldchar.mark1 = HAMZABELOW;
141                             break;
142                     }
143                     break;
144                 case HAMZAABOVE:
145                     switch (oldchar.basechar) {
146                         case ALEF:
147                             oldchar.basechar = ALEFHAMZA;
148                             retval = 2;
149                             break;
150                         case LAM_ALEF:
151                             oldchar.basechar = LAM_ALEFHAMZA;
152                             retval = 2;
153                             break;
154                         case WAW:
155                             oldchar.basechar = WAWHAMZA;
156                             retval = 2;
157                             break;
158                         case YEH:
159                         case ALEFMAKSURA:
160                         case FARSIYEH:
161                             oldchar.basechar = YEHHAMZA;
162                             retval = 2;
163                             break;
164                         default: /* whatever sense this may make .. */
165                             oldchar.mark1 = HAMZAABOVE;
166                             break;
167                     }
168                     break;
169                 case MADDA:
170                     switch (oldchar.basechar) {
171                         case ALEF:
172                             oldchar.basechar = ALEFMADDA;
173                             retval = 2;
174                             break;
175                     }
176                     break;
177                 default:
178                     oldchar.vowel = newchar;
179                     break;
180             }
181             if (retval == 1) {
182                 oldchar.lignum++;
183             }
184             return retval;
185         }
186         if (oldchar.vowel != 0) { /* if we already joined a vowel, we can't join a Hamza */
187             return 0;
188         }
189         
190         switch (oldchar.basechar) {
191             case LAM:
192                 switch (newchar) {
193                     case ALEF:
194                         oldchar.basechar = LAM_ALEF;
195                         oldchar.numshapes = 2;
196                         retval = 3;
197                         break;
198                     case ALEFHAMZA:
199                         oldchar.basechar = LAM_ALEFHAMZA;
200                         oldchar.numshapes = 2;
201                         retval = 3;
202                         break;
203                     case ALEFHAMZABELOW:
204                         oldchar.basechar = LAM_ALEFHAMZABELOW;
205                         oldchar.numshapes = 2;
206                         retval = 3;
207                         break;
208                     case ALEFMADDA:
209                         oldchar.basechar = LAM_ALEFMADDA;
210                         oldchar.numshapes = 2;
211                         retval = 3;
212                         break;
213                 }
214                 break;
215             case 0:
216                 oldchar.basechar = newchar;
217                 oldchar.numshapes = shapecount(newchar);
218                 retval = 1;
219                 break;
220         }
221         return retval;
222     }
223     
224     static void copycstostring(StringBuffer JavaDoc string, charstruct s, int level) {
225     /* s is a shaped charstruct; i is the index into the string */
226         if (s.basechar == 0)
227             return;
228         
229         string.append(s.basechar);
230         s.lignum--;
231         if (s.mark1 != 0) {
232             if ((level & ar_novowel) == 0) {
233                 string.append(s.mark1);
234                 s.lignum--;
235             }
236             else {
237                 s.lignum--;
238             }
239         }
240         if (s.vowel != 0) {
241             if ((level & ar_novowel) == 0) {
242                 string.append(s.vowel);
243                 s.lignum--;
244             }
245             else { /* vowel elimination */
246                 s.lignum--;
247             }
248         }
249 // while (s.lignum > 0) { /* NULL-insertion for Langbox-font */
250
// string[i] = 0;
251
// i++;
252
// (s.lignum)--;
253
// }
254
// return i;
255
}
256
257     // return len
258
static void doublelig(StringBuffer JavaDoc string, int level)
259     /* Ok. We have presentation ligatures in our font. */
260     {
261         int len;
262         int olen = len = string.length();
263         int j = 0, si = 1;
264         char lapresult;
265         
266         while (si < olen) {
267             lapresult = 0;
268             if ((level & ar_composedtashkeel) != 0) {
269                 switch (string.charAt(j)) {
270                     case SHADDA:
271                         switch (string.charAt(si)) {
272                             case KASRA:
273                                 lapresult = 0xFC62;
274                                 break;
275                             case FATHA:
276                                 lapresult = 0xFC60;
277                                 break;
278                             case DAMMA:
279                                 lapresult = 0xFC61;
280                                 break;
281                             case 0x064C:
282                                 lapresult = 0xFC5E;
283                                 break;
284                             case 0x064D:
285                                 lapresult = 0xFC5F;
286                                 break;
287                         }
288                         break;
289                     case KASRA:
290                         if (string.charAt(si) == SHADDA)
291                             lapresult = 0xFC62;
292                         break;
293                     case FATHA:
294                         if (string.charAt(si) == SHADDA)
295                             lapresult = 0xFC60;
296                         break;
297                     case DAMMA:
298                         if (string.charAt(si) == SHADDA)
299                             lapresult = 0xFC61;
300                         break;
301                 }
302             }
303             
304             if ((level & ar_lig) != 0) {
305                 switch (string.charAt(j)) {
306                     case 0xFEDF: /* LAM initial */
307                         switch (string.charAt(si)) {
308                             case 0xFE9E:
309                                 lapresult = 0xFC3F;
310                                 break; /* JEEM final */
311                             case 0xFEA0:
312                                 lapresult = 0xFCC9;
313                                 break; /* JEEM medial */
314                             case 0xFEA2:
315                                 lapresult = 0xFC40;
316                                 break; /* HAH final */
317                             case 0xFEA4:
318                                 lapresult = 0xFCCA;
319                                 break; /* HAH medial */
320                             case 0xFEA6:
321                                 lapresult = 0xFC41;
322                                 break; /* KHAH final */
323                             case 0xFEA8:
324                                 lapresult = 0xFCCB;
325                                 break; /* KHAH medial */
326                             case 0xFEE2:
327                                 lapresult = 0xFC42;
328                                 break; /* MEEM final */
329                             case 0xFEE4:
330                                 lapresult = 0xFCCC;
331                                 break; /* MEEM medial */
332                         }
333                         break;
334                     case 0xFE97: /* TEH inital */
335                         switch (string.charAt(si)) {
336                             case 0xFEA0:
337                                 lapresult = 0xFCA1;
338                                 break; /* JEEM medial */
339                             case 0xFEA4:
340                                 lapresult = 0xFCA2;
341                                 break; /* HAH medial */
342                             case 0xFEA8:
343                                 lapresult = 0xFCA3;
344                                 break; /* KHAH medial */
345                         }
346                         break;
347                     case 0xFE91: /* BEH inital */
348                         switch (string.charAt(si)) {
349                             case 0xFEA0:
350                                 lapresult = 0xFC9C;
351                                 break; /* JEEM medial */
352                             case 0xFEA4:
353                                 lapresult = 0xFC9D;
354                                 break; /* HAH medial */
355                             case 0xFEA8:
356                                 lapresult = 0xFC9E;
357                                 break; /* KHAH medial */
358                         }
359                         break;
360                     case 0xFEE7: /* NOON inital */
361                         switch (string.charAt(si)) {
362                             case 0xFEA0:
363                                 lapresult = 0xFCD2;
364                                 break; /* JEEM initial */
365                             case 0xFEA4:
366                                 lapresult = 0xFCD3;
367                                 break; /* HAH medial */
368                             case 0xFEA8:
369                                 lapresult = 0xFCD4;
370                                 break; /* KHAH medial */
371                         }
372                         break;
373                         
374                     case 0xFEE8: /* NOON medial */
375                         switch (string.charAt(si)) {
376                             case 0xFEAE:
377                                 lapresult = 0xFC8A;
378                                 break; /* REH final */
379                             case 0xFEB0:
380                                 lapresult = 0xFC8B;
381                                 break; /* ZAIN final */
382                         }
383                         break;
384                     case 0xFEE3: /* MEEM initial */
385                         switch (string.charAt(si)) {
386                             case 0xFEA0:
387                                 lapresult = 0xFCCE;
388                                 break; /* JEEM medial */
389                             case 0xFEA4:
390                                 lapresult = 0xFCCF;
391                                 break; /* HAH medial */
392                             case 0xFEA8:
393                                 lapresult = 0xFCD0;
394                                 break; /* KHAH medial */
395                             case 0xFEE4:
396                                 lapresult = 0xFCD1;
397                                 break; /* MEEM medial */
398                         }
399                         break;
400                         
401                     case 0xFED3: /* FEH initial */
402                         switch (string.charAt(si)) {
403                             case 0xFEF2:
404                                 lapresult = 0xFC32;
405                                 break; /* YEH final */
406                         }
407                         break;
408                         
409                     default:
410                         break;
411                 } /* end switch string[si] */
412             }
413             if (lapresult != 0) {
414                 string.setCharAt(j, lapresult);
415                 len--;
416                 si++; /* jump over one character */
417                 /* we'll have to change this, too. */
418             }
419             else {
420                 j++;
421                 string.setCharAt(j, string.charAt(si));
422                 si++;
423             }
424         }
425         string.setLength(len);
426     }
427
428     static boolean connects_to_left(charstruct a) {
429         return a.numshapes > 2;
430     }
431     
432     static void shape(char text[], StringBuffer JavaDoc string, int level) {
433   /* string is assumed to be empty and big enough.
434    * text is the original text.
435    * This routine does the basic arabic reshaping.
436    * *len the number of non-null characters.
437    *
438    * Note: We have to unshape each character first!
439    */

440         int join;
441         int which;
442         char nextletter;
443         
444         int p = 0; /* initialize for output */
445         charstruct oldchar = new charstruct();
446         charstruct curchar = new charstruct();
447         while (p < text.length) {
448             nextletter = text[p++];
449             //nextletter = unshape (nextletter);
450

451             join = ligature(nextletter, curchar);
452             if (join == 0) { /* shape curchar */
453                 int nc = shapecount(nextletter);
454                 //(*len)++;
455
if (nc == 1) {
456                     which = 0; /* final or isolated */
457                 }
458                 else {
459                     which = 2; /* medial or initial */
460                 }
461                 if (connects_to_left(oldchar)) {
462                     which++;
463                 }
464                 
465                 which = which % (curchar.numshapes);
466                 curchar.basechar = charshape(curchar.basechar, which);
467                 
468                 /* get rid of oldchar */
469                 copycstostring(string, oldchar, level);
470                 oldchar = curchar; /* new values in oldchar */
471                 
472                 /* init new curchar */
473                 curchar = new charstruct();
474                 curchar.basechar = nextletter;
475                 curchar.numshapes = nc;
476                 curchar.lignum++;
477                 // (*len) += unligature (&curchar, level);
478
}
479             else if (join == 1) {
480             }
481             // else
482
// {
483
// (*len) += unligature (&curchar, level);
484
// }
485
// p = g_utf8_next_char (p);
486
}
487         
488         /* Handle last char */
489         if (connects_to_left(oldchar))
490             which = 1;
491         else
492             which = 0;
493         which = which % (curchar.numshapes);
494         curchar.basechar = charshape(curchar.basechar, which);
495         
496         /* get rid of oldchar */
497         copycstostring(string, oldchar, level);
498         copycstostring(string, curchar, level);
499     }
500
501     static int arabic_shape(char src[], int srcoffset, int srclength, char dest[], int destoffset, int destlength, int level) {
502         char str[] = new char[srclength];
503         for (int k = srclength + srcoffset - 1; k >= srcoffset; --k)
504             str[k - srcoffset] = src[k];
505         StringBuffer JavaDoc string = new StringBuffer JavaDoc(srclength);
506         shape(str, string, level);
507         if ((level & (ar_composedtashkeel | ar_lig)) != 0)
508             doublelig(string, level);
509 // string.reverse();
510
System.arraycopy(string.toString().toCharArray(), 0, dest, destoffset, string.length());
511         return string.length();
512     }
513
514     static void processNumbers(char text[], int offset, int length, int options) {
515         int limit = offset + length;
516         if ((options & DIGITS_MASK) != 0) {
517             char digitBase = '\u0030'; // European digits
518
switch (options & DIGIT_TYPE_MASK) {
519                 case DIGIT_TYPE_AN:
520                     digitBase = '\u0660'; // Arabic-Indic digits
521
break;
522                     
523                 case DIGIT_TYPE_AN_EXTENDED:
524                     digitBase = '\u06f0'; // Eastern Arabic-Indic digits (Persian and Urdu)
525
break;
526                     
527                 default:
528                     break;
529             }
530             
531             switch (options & DIGITS_MASK) {
532                 case DIGITS_EN2AN: {
533                     int digitDelta = digitBase - '\u0030';
534                     for (int i = offset; i < limit; ++i) {
535                         char ch = text[i];
536                         if (ch <= '\u0039' && ch >= '\u0030') {
537                             text[i] += digitDelta;
538                         }
539                     }
540                 }
541                 break;
542                 
543                 case DIGITS_AN2EN: {
544                     char digitTop = (char)(digitBase + 9);
545                     int digitDelta = '\u0030' - digitBase;
546                     for (int i = offset; i < limit; ++i) {
547                         char ch = text[i];
548                         if (ch <= digitTop && ch >= digitBase) {
549                             text[i] += digitDelta;
550                         }
551                     }
552                 }
553                 break;
554                 
555                 case DIGITS_EN2AN_INIT_LR:
556                     shapeToArabicDigitsWithContext(text, 0, length, digitBase, false);
557                     break;
558                     
559                 case DIGITS_EN2AN_INIT_AL:
560                     shapeToArabicDigitsWithContext(text, 0, length, digitBase, true);
561                     break;
562                     
563                 default:
564                     break;
565             }
566         }
567     }
568     
569     static void shapeToArabicDigitsWithContext(char[] dest, int start, int length, char digitBase, boolean lastStrongWasAL) {
570         digitBase -= '0'; // move common adjustment out of loop
571

572         int limit = start + length;
573         for(int i = start; i < limit; ++i) {
574             char ch = dest[i];
575             switch (BidiOrder.getDirection(ch)) {
576             case BidiOrder.L:
577             case BidiOrder.R:
578                 lastStrongWasAL = false;
579                 break;
580             case BidiOrder.AL:
581                 lastStrongWasAL = true;
582                 break;
583             case BidiOrder.EN:
584                 if (lastStrongWasAL && ch <= '\u0039') {
585                     dest[i] = (char)(ch + digitBase);
586                 }
587                 break;
588             default:
589                 break;
590             }
591         }
592     }
593
594     private static final char ALEF = 0x0627;
595     private static final char ALEFHAMZA = 0x0623;
596     private static final char ALEFHAMZABELOW = 0x0625;
597     private static final char ALEFMADDA = 0x0622;
598     private static final char LAM = 0x0644;
599     private static final char HAMZA = 0x0621;
600     private static final char TATWEEL = 0x0640;
601     private static final char ZWJ = 0x200D;
602
603     private static final char HAMZAABOVE = 0x0654;
604     private static final char HAMZABELOW = 0x0655;
605
606     private static final char WAWHAMZA = 0x0624;
607     private static final char YEHHAMZA = 0x0626;
608     private static final char WAW = 0x0648;
609     private static final char ALEFMAKSURA = 0x0649;
610     private static final char YEH = 0x064A;
611     private static final char FARSIYEH = 0x06CC;
612
613     private static final char SHADDA = 0x0651;
614     private static final char KASRA = 0x0650;
615     private static final char FATHA = 0x064E;
616     private static final char DAMMA = 0x064F;
617     private static final char MADDA = 0x0653;
618
619     private static final char LAM_ALEF = 0xFEFB;
620     private static final char LAM_ALEFHAMZA = 0xFEF7;
621     private static final char LAM_ALEFHAMZABELOW = 0xFEF9;
622     private static final char LAM_ALEFMADDA = 0xFEF5;
623
624     private static final char chartable[][] = {
625         {0x0621, 0xFE80}, /* HAMZA */
626         {0x0622, 0xFE81, 0xFE82}, /* ALEF WITH MADDA ABOVE */
627         {0x0623, 0xFE83, 0xFE84}, /* ALEF WITH HAMZA ABOVE */
628         {0x0624, 0xFE85, 0xFE86}, /* WAW WITH HAMZA ABOVE */
629         {0x0625, 0xFE87, 0xFE88}, /* ALEF WITH HAMZA BELOW */
630         {0x0626, 0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}, /* YEH WITH HAMZA ABOVE */
631         {0x0627, 0xFE8D, 0xFE8E}, /* ALEF */
632         {0x0628, 0xFE8F, 0xFE90, 0xFE91, 0xFE92}, /* BEH */
633         {0x0629, 0xFE93, 0xFE94}, /* TEH MARBUTA */
634         {0x062A, 0xFE95, 0xFE96, 0xFE97, 0xFE98}, /* TEH */
635         {0x062B, 0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}, /* THEH */
636         {0x062C, 0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}, /* JEEM */
637         {0x062D, 0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}, /* HAH */
638         {0x062E, 0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}, /* KHAH */
639         {0x062F, 0xFEA9, 0xFEAA}, /* DAL */
640         {0x0630, 0xFEAB, 0xFEAC}, /* THAL */
641         {0x0631, 0xFEAD, 0xFEAE}, /* REH */
642         {0x0632, 0xFEAF, 0xFEB0}, /* ZAIN */
643         {0x0633, 0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}, /* SEEN */
644         {0x0634, 0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}, /* SHEEN */
645         {0x0635, 0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}, /* SAD */
646         {0x0636, 0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}, /* DAD */
647         {0x0637, 0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}, /* TAH */
648         {0x0638, 0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}, /* ZAH */
649         {0x0639, 0xFEC9, 0xFECA, 0xFECB, 0xFECC}, /* AIN */
650         {0x063A, 0xFECD, 0xFECE, 0xFECF, 0xFED0}, /* GHAIN */
651         {0x0640, 0x0640, 0x0640, 0x0640, 0x0640}, /* TATWEEL */
652         {0x0641, 0xFED1, 0xFED2, 0xFED3, 0xFED4}, /* FEH */
653         {0x0642, 0xFED5, 0xFED6, 0xFED7, 0xFED8}, /* QAF */
654         {0x0643, 0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}, /* KAF */
655         {0x0644, 0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}, /* LAM */
656         {0x0645, 0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}, /* MEEM */
657         {0x0646, 0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}, /* NOON */
658         {0x0647, 0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}, /* HEH */
659         {0x0648, 0xFEED, 0xFEEE}, /* WAW */
660         {0x0649, 0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9}, /* ALEF MAKSURA */
661         {0x064A, 0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}, /* YEH */
662         {0x0671, 0xFB50, 0xFB51}, /* ALEF WASLA */
663         {0x0679, 0xFB66, 0xFB67, 0xFB68, 0xFB69}, /* TTEH */
664         {0x067A, 0xFB5E, 0xFB5F, 0xFB60, 0xFB61}, /* TTEHEH */
665         {0x067B, 0xFB52, 0xFB53, 0xFB54, 0xFB55}, /* BEEH */
666         {0x067E, 0xFB56, 0xFB57, 0xFB58, 0xFB59}, /* PEH */
667         {0x067F, 0xFB62, 0xFB63, 0xFB64, 0xFB65}, /* TEHEH */
668         {0x0680, 0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D}, /* BEHEH */
669         {0x0683, 0xFB76, 0xFB77, 0xFB78, 0xFB79}, /* NYEH */
670         {0x0684, 0xFB72, 0xFB73, 0xFB74, 0xFB75}, /* DYEH */
671         {0x0686, 0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D}, /* TCHEH */
672         {0x0687, 0xFB7E, 0xFB7F, 0xFB80, 0xFB81}, /* TCHEHEH */
673         {0x0688, 0xFB88, 0xFB89}, /* DDAL */
674         {0x068C, 0xFB84, 0xFB85}, /* DAHAL */
675         {0x068D, 0xFB82, 0xFB83}, /* DDAHAL */
676         {0x068E, 0xFB86, 0xFB87}, /* DUL */
677         {0x0691, 0xFB8C, 0xFB8D}, /* RREH */
678         {0x0698, 0xFB8A, 0xFB8B}, /* JEH */
679         {0x06A4, 0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D}, /* VEH */
680         {0x06A6, 0xFB6E, 0xFB6F, 0xFB70, 0xFB71}, /* PEHEH */
681         {0x06A9, 0xFB8E, 0xFB8F, 0xFB90, 0xFB91}, /* KEHEH */
682         {0x06AD, 0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6}, /* NG */
683         {0x06AF, 0xFB92, 0xFB93, 0xFB94, 0xFB95}, /* GAF */
684         {0x06B1, 0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D}, /* NGOEH */
685         {0x06B3, 0xFB96, 0xFB97, 0xFB98, 0xFB99}, /* GUEH */
686         {0x06BA, 0xFB9E, 0xFB9F}, /* NOON GHUNNA */
687         {0x06BB, 0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3}, /* RNOON */
688         {0x06BE, 0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD}, /* HEH DOACHASHMEE */
689         {0x06C0, 0xFBA4, 0xFBA5}, /* HEH WITH YEH ABOVE */
690         {0x06C1, 0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9}, /* HEH GOAL */
691         {0x06C5, 0xFBE0, 0xFBE1}, /* KIRGHIZ OE */
692         {0x06C6, 0xFBD9, 0xFBDA}, /* OE */
693         {0x06C7, 0xFBD7, 0xFBD8}, /* U */
694         {0x06C8, 0xFBDB, 0xFBDC}, /* YU */
695         {0x06C9, 0xFBE2, 0xFBE3}, /* KIRGHIZ YU */
696         {0x06CB, 0xFBDE, 0xFBDF}, /* VE */
697         {0x06CC, 0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF}, /* FARSI YEH */
698         {0x06D0, 0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7}, /* E */
699         {0x06D2, 0xFBAE, 0xFBAF}, /* YEH BARREE */
700         {0x06D3, 0xFBB0, 0xFBB1} /* YEH BARREE WITH HAMZA ABOVE */
701         };
702
703         public static final int ar_nothing = 0x0;
704         public static final int ar_novowel = 0x1;
705         public static final int ar_composedtashkeel = 0x4;
706         public static final int ar_lig = 0x8;
707         /**
708          * Digit shaping option: Replace European digits (U+0030...U+0039) by Arabic-Indic digits.
709          */

710         public static final int DIGITS_EN2AN = 0x20;
711         
712         /**
713          * Digit shaping option: Replace Arabic-Indic digits by European digits (U+0030...U+0039).
714          */

715         public static final int DIGITS_AN2EN = 0x40;
716         
717         /**
718          * Digit shaping option:
719          * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
720          * if the most recent strongly directional character
721          * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
722          * The initial state at the start of the text is assumed to be not an Arabic,
723          * letter, so European digits at the start of the text will not change.
724          * Compare to DIGITS_ALEN2AN_INIT_AL.
725          */

726         public static final int DIGITS_EN2AN_INIT_LR = 0x60;
727         
728         /**
729          * Digit shaping option:
730          * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
731          * if the most recent strongly directional character
732          * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
733          * The initial state at the start of the text is assumed to be an Arabic,
734          * letter, so European digits at the start of the text will change.
735          * Compare to DIGITS_ALEN2AN_INT_LR.
736          */

737         public static final int DIGITS_EN2AN_INIT_AL = 0x80;
738         
739         /** Not a valid option value. */
740         private static final int DIGITS_RESERVED = 0xa0;
741         
742         /**
743          * Bit mask for digit shaping options.
744          */

745         public static final int DIGITS_MASK = 0xe0;
746         
747         /**
748          * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).
749          */

750         public static final int DIGIT_TYPE_AN = 0;
751         
752         /**
753          * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).
754          */

755         public static final int DIGIT_TYPE_AN_EXTENDED = 0x100;
756
757         /**
758          * Bit mask for digit type options.
759          */

760         public static final int DIGIT_TYPE_MASK = 0x0100; // 0x3f00?
761

762         static class charstruct {
763             char basechar;
764             char mark1; /* has to be initialized to zero */
765             char vowel;
766             int lignum; /* is a ligature with lignum aditional characters */
767             int numshapes = 1;
768         };
769
770
771 }
772
Popular Tags