KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > java > awt > font > NumericShaper


1 /*
2  * @(#)NumericShaper.java 1.10 03/12/19
3  *
4  * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
5  * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6  */

7
8 package java.awt.font;
9
10 /**
11  * The <code>NumericShaper</code> class is used to convert Latin-1 (European)
12  * digits to other Unicode decimal digits. Users of this class will
13  * primarily be people who wish to present data using
14  * national digit shapes, but find it more convenient to represent the
15  * data internally using Latin-1 (European) digits. This does not
16  * interpret the deprecated numeric shape selector character (U+206E).
17  * <p>
18  * Instances of <code>NumericShaper</code> are typically applied
19  * as attributes to text with the
20  * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
21  * of the <code>TextAttribute</code> class.
22  * For example, this code snippet causes a <code>TextLayout</code> to
23  * shape European digits to Arabic in an Arabic context:<br>
24  * <blockquote><pre>
25  * Map map = new HashMap();
26  * map.put(TextAttribute.NUMERIC_SHAPING,
27  * NumericShaper.getContextualShaper(NumericShaper.ARABIC));
28  * FontRenderContext frc = ...;
29  * TextLayout layout = new TextLayout(text, map, frc);
30  * layout.draw(g2d, x, y);
31  * </pre></blockquote>
32  * <br>
33  * It is also possible to perform numeric shaping explicitly using instances
34  * of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
35  * <blockquote><pre>
36  * char[] text = ...;
37  * // shape all EUROPEAN digits (except zero) to ARABIC digits
38  * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
39  * shaper.shape(text, start, count);
40  *
41  * // shape European digits to ARABIC digits if preceeding text is Arabic, or
42  * // shape European digits to TAMIL digits if preceeding text is Tamil, or
43  * // leave European digits alone if there is no preceeding text, or
44  * // preceeding text is neither Arabic nor Tamil
45  * NumericShaper shaper =
46  * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
47  * NumericShaper.TAMIL,
48  * NumericShaper.EUROPEAN);
49  * shaper.shape(text. start, count);
50  * </pre></blockquote>
51  *
52  * @since 1.4
53  */

54
55 public final class NumericShaper implements java.io.Serializable JavaDoc {
56     /** index of context for contextual shaping - values range from 0 to 18 */
57     private int key;
58
59     /** flag indicating whether to shape contextually (high bit) and which
60      * digit ranges to shape (bits 0-18)
61      */

62     private int mask;
63
64     /** Identifies the Latin-1 (European) and extended range, and
65      * Latin-1 (European) decimal base.
66      */

67     public static final int EUROPEAN = 1<<0;
68
69     /** Identifies the ARABIC range and decimal base. */
70     public static final int ARABIC = 1<<1;
71
72     /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
73     public static final int EASTERN_ARABIC = 1<<2;
74
75     /** Identifies the DEVANAGARI range and decimal base. */
76     public static final int DEVANAGARI = 1<<3;
77
78     /** Identifies the BENGALI range and decimal base. */
79     public static final int BENGALI = 1<<4;
80
81     /** Identifies the GURMUKHI range and decimal base. */
82     public static final int GURMUKHI = 1<<5;
83
84     /** Identifies the GUJARATI range and decimal base. */
85     public static final int GUJARATI = 1<<6;
86
87     /** Identifies the ORIYA range and decimal base. */
88     public static final int ORIYA = 1<<7;
89
90     /** Identifies the TAMIL range and decimal base. Tamil does not have a
91      * decimal digit 0 so Latin-1 (European) 0 is used.
92      */

93     public static final int TAMIL = 1<<8;
94
95     /** Identifies the TELUGU range and decimal base. */
96     public static final int TELUGU = 1<<9;
97
98     /** Identifies the KANNADA range and decimal base. */
99     public static final int KANNADA = 1<<10;
100
101     /** Identifies the MALAYALAM range and decimal base. */
102     public static final int MALAYALAM = 1<<11;
103
104     /** Identifies the THAI range and decimal base. */
105     public static final int THAI = 1<<12;
106
107     /** Identifies the LAO range and decimal base. */
108     public static final int LAO = 1<<13;
109
110     /** Identifies the TIBETAN range and decimal base. */
111     public static final int TIBETAN = 1<<14;
112
113     /** Identifies the MYANMAR range and decimal base. */
114     public static final int MYANMAR = 1<<15;
115
116     /** Identifies the ETHIOPIC range and decimal base. */
117     public static final int ETHIOPIC = 1<<16;
118
119     /** Identifies the KHMER range and decimal base. */
120     public static final int KHMER = 1<<17;
121
122     /** Identifies the MONGOLIAN range and decimal base. */
123     public static final int MONGOLIAN = 1<<18;
124
125     /** Identifies all ranges, for full contextual shaping. */
126     public static final int ALL_RANGES = 0x0007ffff;
127
128     private static final int EUROPEAN_KEY = 0;
129     private static final int ARABIC_KEY = 1;
130     private static final int EASTERN_ARABIC_KEY = 2;
131     private static final int DEVANAGARI_KEY = 3;
132     private static final int BENGALI_KEY = 4;
133     private static final int GURMUKHI_KEY = 5;
134     private static final int GUJARATI_KEY = 6;
135     private static final int ORIYA_KEY = 7;
136     private static final int TAMIL_KEY = 8;
137     private static final int TELUGU_KEY = 9;
138     private static final int KANNADA_KEY = 10;
139     private static final int MALAYALAM_KEY = 11;
140     private static final int THAI_KEY = 12;
141     private static final int LAO_KEY = 13;
142     private static final int TIBETAN_KEY = 14;
143     private static final int MYANMAR_KEY = 15;
144     private static final int ETHIOPIC_KEY = 16;
145     private static final int KHMER_KEY = 17;
146     private static final int MONGOLIAN_KEY = 18;
147
148     private static final int NUM_KEYS = 19;
149
150     private static final String JavaDoc[] keyNames = {
151     "EUROPEAN",
152     "ARABIC",
153     "EASTERN_ARABIC",
154     "DEVANAGARI",
155     "BENGALI",
156     "GURMUKHI",
157     "GUJARATI",
158     "ORIYA",
159     "TAMIL",
160     "TELUGU",
161     "KANNADA",
162     "MALAYALAM",
163     "THAI",
164     "LAO",
165     "TIBETAN",
166     "MYANMAR",
167     "ETHIOPIC",
168     "KHMER",
169     "MONGOLIAN"
170     };
171
172     private static final int CONTEXTUAL_MASK = 1<<31;
173
174     private static final char[] bases = {
175     '\u0030' - '\u0030', // EUROPEAN
176
'\u0660' - '\u0030', // ARABIC
177
'\u06f0' - '\u0030', // EASTERN_ARABIC
178
'\u0966' - '\u0030', // DEVANAGARI
179
'\u09e6' - '\u0030', // BENGALI
180
'\u0a66' - '\u0030', // GURMUKHI
181
'\u0ae6' - '\u0030', // GUJARATI
182
'\u0b66' - '\u0030', // ORIYA
183
'\u0be7' - '\u0030', // TAMIL - note missing zero
184
'\u0c66' - '\u0030', // TELUGU
185
'\u0ce6' - '\u0030', // KANNADA
186
'\u0d66' - '\u0030', // MALAYALAM
187
'\u0e50' - '\u0030', // THAI
188
'\u0ed0' - '\u0030', // LAO
189
'\u0f20' - '\u0030', // TIBETAN
190
'\u1040' - '\u0030', // MYANMAR
191
'\u1369' - '\u0030', // ETHIOPIC
192
'\u17e0' - '\u0030', // KHMER
193
'\u1810' - '\u0030', // MONGOLIAN
194
};
195
196     // some ranges adjoin or overlap, rethink if we want to do a binary search on this
197

198     private static final char[] contexts = {
199     '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
200
'\u0600', '\u0700', // ARABIC
201
'\u0600', '\u0700', // EASTERN_ARABIC -- note overlap with arabic
202
'\u0900', '\u0980', // DEVANAGARI
203
'\u0980', '\u0a00', // BENGALI
204
'\u0a00', '\u0a80', // GURMUKHI
205
'\u0a80', '\u0b00', // GUJARATI
206
'\u0b00', '\u0b80', // ORIYA
207
'\u0b80', '\u0c00', // TAMIL - note missing zero
208
'\u0c00', '\u0c80', // TELUGU
209
'\u0c80', '\u0d00', // KANNADA
210
'\u0d00', '\u0d80', // MALAYALAM
211
'\u0e00', '\u0e80', // THAI
212
'\u0e80', '\u0f00', // LAO
213
'\u0f00', '\u1000', // TIBETAN
214
'\u1000', '\u1080', // MYANMAR
215
'\u1200', '\u1380', // ETHIOPIC
216
'\u1780', '\u1800', // KHMER
217
'\u1800', '\u1900', // MONGOLIAN
218
'\uffff',
219     };
220
221     // assume most characters are near each other so probing the cache is infrequent,
222
// and a linear probe is ok.
223

224     private static int ctCache = 0;
225     private static int ctCacheLimit = contexts.length - 2;
226
227     // warning, synchronize access to this as it modifies state
228
private static int getContextKey(char c) {
229     if (c < contexts[ctCache]) {
230         while (ctCache > 0 && c < contexts[ctCache]) --ctCache;
231     } else if (c >= contexts[ctCache + 1]) {
232         while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache;
233     }
234     
235     // if we're not in a known range, then return EUROPEAN as the range key
236
return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
237     }
238
239     /*
240      * A range table of strong directional characters (types L, R, AL).
241      * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
242      * characters, odd (right) indexes are starts of ranges of strong directional
243      * characters.
244      */

245     private static char[] strongTable = {
246     '\u0000', '\u0041',
247     '\u005b', '\u0061',
248     '\u007b', '\u00aa',
249     '\u00ab', '\u00b5',
250     '\u00b6', '\u00ba',
251     '\u00bb', '\u00c0',
252     '\u00d7', '\u00d8',
253     '\u00f7', '\u00f8',
254     '\u0220', '\u0222',
255     '\u0234', '\u0250',
256     '\u02ae', '\u02b0',
257     '\u02b9', '\u02bb',
258     '\u02c2', '\u02d0',
259     '\u02d2', '\u02e0',
260     '\u02e5', '\u02ee',
261     '\u02ef', '\u037a',
262     '\u037b', '\u0386',
263     '\u0387', '\u0388',
264     '\u038b', '\u038c',
265     '\u038d', '\u038e',
266     '\u03a2', '\u03a3',
267     '\u03cf', '\u03d0',
268     '\u03d8', '\u03da',
269     '\u03f4', '\u0400',
270     '\u0483', '\u048c',
271     '\u04c5', '\u04c7',
272     '\u04c9', '\u04cb',
273     '\u04cd', '\u04d0',
274     '\u04f6', '\u04f8',
275     '\u04fa', '\u0531',
276     '\u0557', '\u0559',
277     '\u0560', '\u0561',
278     '\u0588', '\u0589',
279     '\u058a', '\u05be',
280     '\u05bf', '\u05c0',
281     '\u05c1', '\u05c3',
282     '\u05c4', '\u05d0',
283     '\u05eb', '\u05f0',
284     '\u05f5', '\u061b',
285     '\u061c', '\u061f',
286     '\u0620', '\u0621',
287     '\u063b', '\u0640',
288     '\u064b', '\u066d',
289     '\u066e', '\u0671',
290     '\u06d6', '\u06e5',
291     '\u06e7', '\u06fa',
292     '\u06ff', '\u0700',
293     '\u070e', '\u0710',
294     '\u0711', '\u0712',
295     '\u072d', '\u0780',
296     '\u07a6', '\u0903',
297     '\u0904', '\u0905',
298     '\u093a', '\u093d',
299     '\u0941', '\u0949',
300     '\u094d', '\u0950',
301     '\u0951', '\u0958',
302     '\u0962', '\u0964',
303     '\u0971', '\u0982',
304     '\u0984', '\u0985',
305     '\u098d', '\u098f',
306     '\u0991', '\u0993',
307     '\u09a9', '\u09aa',
308     '\u09b1', '\u09b2',
309     '\u09b3', '\u09b6',
310     '\u09ba', '\u09be',
311     '\u09c1', '\u09c7',
312     '\u09c9', '\u09cb',
313     '\u09cd', '\u09d7',
314     '\u09d8', '\u09dc',
315     '\u09de', '\u09df',
316     '\u09e2', '\u09e6',
317     '\u09f2', '\u09f4',
318     '\u09fb', '\u0a05',
319     '\u0a0b', '\u0a0f',
320     '\u0a11', '\u0a13',
321     '\u0a29', '\u0a2a',
322     '\u0a31', '\u0a32',
323     '\u0a34', '\u0a35',
324     '\u0a37', '\u0a38',
325     '\u0a3a', '\u0a3e',
326     '\u0a41', '\u0a59',
327     '\u0a5d', '\u0a5e',
328     '\u0a5f', '\u0a66',
329     '\u0a70', '\u0a72',
330     '\u0a75', '\u0a83',
331     '\u0a84', '\u0a85',
332     '\u0a8c', '\u0a8d',
333     '\u0a8e', '\u0a8f',
334     '\u0a92', '\u0a93',
335     '\u0aa9', '\u0aaa',
336     '\u0ab1', '\u0ab2',
337     '\u0ab4', '\u0ab5',
338     '\u0aba', '\u0abd',
339     '\u0ac1', '\u0ac9',
340     '\u0aca', '\u0acb',
341     '\u0acd', '\u0ad0',
342     '\u0ad1', '\u0ae0',
343     '\u0ae1', '\u0ae6',
344     '\u0af0', '\u0b02',
345     '\u0b04', '\u0b05',
346     '\u0b0d', '\u0b0f',
347     '\u0b11', '\u0b13',
348     '\u0b29', '\u0b2a',
349     '\u0b31', '\u0b32',
350     '\u0b34', '\u0b36',
351     '\u0b3a', '\u0b3d',
352     '\u0b3f', '\u0b40',
353     '\u0b41', '\u0b47',
354     '\u0b49', '\u0b4b',
355     '\u0b4d', '\u0b57',
356     '\u0b58', '\u0b5c',
357     '\u0b5e', '\u0b5f',
358     '\u0b62', '\u0b66',
359     '\u0b71', '\u0b83',
360     '\u0b84', '\u0b85',
361     '\u0b8b', '\u0b8e',
362     '\u0b91', '\u0b92',
363     '\u0b96', '\u0b99',
364     '\u0b9b', '\u0b9c',
365     '\u0b9d', '\u0b9e',
366     '\u0ba0', '\u0ba3',
367     '\u0ba5', '\u0ba8',
368     '\u0bab', '\u0bae',
369     '\u0bb6', '\u0bb7',
370     '\u0bba', '\u0bbe',
371     '\u0bc0', '\u0bc1',
372     '\u0bc3', '\u0bc6',
373     '\u0bc9', '\u0bca',
374     '\u0bcd', '\u0bd7',
375     '\u0bd8', '\u0be7',
376     '\u0bf3', '\u0c01',
377     '\u0c04', '\u0c05',
378     '\u0c0d', '\u0c0e',
379     '\u0c11', '\u0c12',
380     '\u0c29', '\u0c2a',
381     '\u0c34', '\u0c35',
382     '\u0c3a', '\u0c41',
383     '\u0c45', '\u0c60',
384     '\u0c62', '\u0c66',
385     '\u0c70', '\u0c82',
386     '\u0c84', '\u0c85',
387     '\u0c8d', '\u0c8e',
388     '\u0c91', '\u0c92',
389     '\u0ca9', '\u0caa',
390     '\u0cb4', '\u0cb5',
391     '\u0cba', '\u0cbe',
392     '\u0cbf', '\u0cc0',
393     '\u0cc5', '\u0cc7',
394     '\u0cc9', '\u0cca',
395     '\u0ccc', '\u0cd5',
396     '\u0cd7', '\u0cde',
397     '\u0cdf', '\u0ce0',
398     '\u0ce2', '\u0ce6',
399     '\u0cf0', '\u0d02',
400     '\u0d04', '\u0d05',
401     '\u0d0d', '\u0d0e',
402     '\u0d11', '\u0d12',
403     '\u0d29', '\u0d2a',
404     '\u0d3a', '\u0d3e',
405     '\u0d41', '\u0d46',
406     '\u0d49', '\u0d4a',
407     '\u0d4d', '\u0d57',
408     '\u0d58', '\u0d60',
409     '\u0d62', '\u0d66',
410     '\u0d70', '\u0d82',
411     '\u0d84', '\u0d85',
412     '\u0d97', '\u0d9a',
413     '\u0db2', '\u0db3',
414     '\u0dbc', '\u0dbd',
415     '\u0dbe', '\u0dc0',
416     '\u0dc7', '\u0dcf',
417     '\u0dd2', '\u0dd8',
418     '\u0de0', '\u0df2',
419     '\u0df5', '\u0e01',
420     '\u0e31', '\u0e32',
421     '\u0e34', '\u0e40',
422     '\u0e47', '\u0e4f',
423     '\u0e5c', '\u0e81',
424     '\u0e83', '\u0e84',
425     '\u0e85', '\u0e87',
426     '\u0e89', '\u0e8a',
427     '\u0e8b', '\u0e8d',
428     '\u0e8e', '\u0e94',
429     '\u0e98', '\u0e99',
430     '\u0ea0', '\u0ea1',
431     '\u0ea4', '\u0ea5',
432     '\u0ea6', '\u0ea7',
433     '\u0ea8', '\u0eaa',
434     '\u0eac', '\u0ead',
435     '\u0eb1', '\u0eb2',
436     '\u0eb4', '\u0ebd',
437     '\u0ebe', '\u0ec0',
438     '\u0ec5', '\u0ec6',
439     '\u0ec7', '\u0ed0',
440     '\u0eda', '\u0edc',
441     '\u0ede', '\u0f00',
442     '\u0f18', '\u0f1a',
443     '\u0f35', '\u0f36',
444     '\u0f37', '\u0f38',
445     '\u0f39', '\u0f3e',
446     '\u0f48', '\u0f49',
447     '\u0f6b', '\u0f7f',
448     '\u0f80', '\u0f85',
449     '\u0f86', '\u0f88',
450     '\u0f8c', '\u0fbe',
451     '\u0fc6', '\u0fc7',
452     '\u0fcd', '\u0fcf',
453     '\u0fd0', '\u1000',
454     '\u1022', '\u1023',
455     '\u1028', '\u1029',
456     '\u102b', '\u102c',
457     '\u102d', '\u1031',
458     '\u1032', '\u1038',
459     '\u1039', '\u1040',
460     '\u1058', '\u10a0',
461     '\u10c6', '\u10d0',
462     '\u10f7', '\u10fb',
463     '\u10fc', '\u1100',
464     '\u115a', '\u115f',
465     '\u11a3', '\u11a8',
466     '\u11fa', '\u1200',
467     '\u1207', '\u1208',
468     '\u1247', '\u1248',
469     '\u1249', '\u124a',
470     '\u124e', '\u1250',
471     '\u1257', '\u1258',
472     '\u1259', '\u125a',
473     '\u125e', '\u1260',
474     '\u1287', '\u1288',
475     '\u1289', '\u128a',
476     '\u128e', '\u1290',
477     '\u12af', '\u12b0',
478     '\u12b1', '\u12b2',
479     '\u12b6', '\u12b8',
480     '\u12bf', '\u12c0',
481     '\u12c1', '\u12c2',
482     '\u12c6', '\u12c8',
483     '\u12cf', '\u12d0',
484     '\u12d7', '\u12d8',
485     '\u12ef', '\u12f0',
486     '\u130f', '\u1310',
487     '\u1311', '\u1312',
488     '\u1316', '\u1318',
489     '\u131f', '\u1320',
490     '\u1347', '\u1348',
491     '\u135b', '\u1361',
492     '\u137d', '\u13a0',
493     '\u13f5', '\u1401',
494     '\u1677', '\u1681',
495     '\u169b', '\u16a0',
496     '\u16f1', '\u1780',
497     '\u17b7', '\u17be',
498     '\u17c6', '\u17c7',
499     '\u17c9', '\u17d4',
500     '\u17db', '\u17dc',
501     '\u17dd', '\u17e0',
502     '\u17ea', '\u1810',
503     '\u181a', '\u1820',
504     '\u1878', '\u1880',
505     '\u18a9', '\u1e00',
506     '\u1e9c', '\u1ea0',
507     '\u1efa', '\u1f00',
508     '\u1f16', '\u1f18',
509     '\u1f1e', '\u1f20',
510     '\u1f46', '\u1f48',
511     '\u1f4e', '\u1f50',
512     '\u1f58', '\u1f59',
513     '\u1f5a', '\u1f5b',
514     '\u1f5c', '\u1f5d',
515     '\u1f5e', '\u1f5f',
516     '\u1f7e', '\u1f80',
517     '\u1fb5', '\u1fb6',
518     '\u1fbd', '\u1fbe',
519     '\u1fbf', '\u1fc2',
520     '\u1fc5', '\u1fc6',
521     '\u1fcd', '\u1fd0',
522     '\u1fd4', '\u1fd6',
523     '\u1fdc', '\u1fe0',
524     '\u1fed', '\u1ff2',
525     '\u1ff5', '\u1ff6',
526     '\u1ffd', '\u200e',
527     '\u2010', '\u207f',
528     '\u2080', '\u2102',
529     '\u2103', '\u2107',
530     '\u2108', '\u210a',
531     '\u2114', '\u2115',
532     '\u2116', '\u2119',
533     '\u211e', '\u2124',
534     '\u2125', '\u2126',
535     '\u2127', '\u2128',
536     '\u2129', '\u212a',
537     '\u212e', '\u212f',
538     '\u2132', '\u2133',
539     '\u213a', '\u2160',
540     '\u2184', '\u2336',
541     '\u237b', '\u2395',
542     '\u2396', '\u249c',
543     '\u24ea', '\u3005',
544     '\u3008', '\u3021',
545     '\u302a', '\u3031',
546     '\u3036', '\u3038',
547     '\u303b', '\u3041',
548     '\u3095', '\u309d',
549     '\u309f', '\u30a1',
550     '\u30fb', '\u30fc',
551     '\u30ff', '\u3105',
552     '\u312d', '\u3131',
553     '\u318f', '\u3190',
554     '\u31b8', '\u3200',
555     '\u321d', '\u3220',
556     '\u3244', '\u3260',
557     '\u327c', '\u327f',
558     '\u32b1', '\u32c0',
559     '\u32cc', '\u32d0',
560     '\u32ff', '\u3300',
561     '\u3377', '\u337b',
562     '\u33de', '\u33e0',
563     '\u33ff', '\u3400',
564     '\u4db6', '\u4e00',
565     '\u9fa6', '\ua000',
566     '\ua48d', '\uac00',
567     '\ud7a4', '\uf900',
568     '\ufa2e', '\ufb00',
569     '\ufb07', '\ufb13',
570     '\ufb18', '\ufb1d',
571     '\ufb1e', '\ufb1f',
572     '\ufb29', '\ufb2a',
573     '\ufb37', '\ufb38',
574     '\ufb3d', '\ufb3e',
575     '\ufb3f', '\ufb40',
576     '\ufb42', '\ufb43',
577     '\ufb45', '\ufb46',
578     '\ufbb2', '\ufbd3',
579     '\ufd3e', '\ufd50',
580     '\ufd90', '\ufd92',
581     '\ufdc8', '\ufdf0',
582     '\ufdfc', '\ufe70',
583     '\ufe73', '\ufe74',
584     '\ufe75', '\ufe76',
585     '\ufefd', '\uff21',
586     '\uff3b', '\uff41',
587     '\uff5b', '\uff66',
588     '\uffbf', '\uffc2',
589     '\uffc8', '\uffca',
590     '\uffd0', '\uffd2',
591     '\uffd8', '\uffda',
592     '\uffdd', '\uffff' // last entry is sentinel, actually never checked
593
};
594
595
596     // use a binary search with a cache
597

598     private static int stCache = 0;
599
600     // warning, synchronize access to this as it modifies state
601
private static boolean isStrongDirectional(char c) {
602     if (c < strongTable[stCache]) {
603         stCache = search(c, strongTable, 0, stCache);
604     } else if (c >= strongTable[stCache + 1]) {
605         stCache = search(c, strongTable, stCache + 1, strongTable.length - stCache - 1);
606     }
607     return (stCache & 0x1) == 1;
608     }
609
610     static private int getKeyFromMask(int mask) {
611     int key = 0;
612     while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
613         ++key;
614     }
615     if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) {
616         throw new IllegalArgumentException JavaDoc("invalid shaper: " + Integer.toHexString(mask));
617     }
618     return key;
619     }
620
621     /**
622      * Returns a shaper for the provided unicode range. All
623      * Latin-1 (EUROPEAN) digits are converted
624      * to the corresponding decimal unicode digits.
625      * @param singleRange the specified Unicode range
626      * @return a non-contextual numeric shaper
627      * @throws IllegalArgumentException if the range is not a single range
628      */

629     static public NumericShaper JavaDoc getShaper(int singleRange) {
630     int key = getKeyFromMask(singleRange);
631     return new NumericShaper JavaDoc(key, singleRange);
632     }
633     
634     /**
635      * Returns a contextual shaper for the provided unicode range(s).
636      * Latin-1 (EUROPEAN) digits are converted to the decimal digits
637      * corresponding to the range of the preceeding text, if the
638      * range is one of the provided ranges. Multiple ranges are
639      * represented by or-ing the values together, such as,
640      * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The
641      * shaper assumes EUROPEAN as the starting context, that is, if
642      * EUROPEAN digits are encountered before any strong directional
643      * text in the string, the context is presumed to be EUROPEAN, and
644      * so the digits will not shape.
645      * @param ranges the specified Unicode ranges
646      * @return a shaper for the specified ranges
647      */

648     static public NumericShaper JavaDoc getContextualShaper(int ranges) {
649     ranges |= CONTEXTUAL_MASK;
650     return new NumericShaper JavaDoc(EUROPEAN_KEY, ranges);
651     }
652
653     /**
654      * Returns a contextual shaper for the provided unicode range(s).
655      * Latin-1 (EUROPEAN) digits will be converted to the decimal digits
656      * corresponding to the range of the preceeding text, if the
657      * range is one of the provided ranges. Multiple ranges are
658      * represented by or-ing the values together, for example,
659      * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The
660      * shaper uses defaultContext as the starting context.
661      * @param ranges the specified Unicode ranges
662      * @param defaultContext the starting context, such as
663      * <code>NumericShaper.EUROPEAN</code>
664      * @return a shaper for the specified Unicode ranges.
665      */

666     static public NumericShaper JavaDoc getContextualShaper(int ranges, int defaultContext) {
667     int key = getKeyFromMask(defaultContext);
668     ranges |= CONTEXTUAL_MASK;
669     return new NumericShaper JavaDoc(key, ranges);
670     }
671
672     /**
673      * Private constructor.
674      */

675     private NumericShaper(int key, int mask) {
676     this.key = key;
677     this.mask = mask;
678     }
679
680     /**
681      * Converts the digits in the text that occur between start and
682      * start + count.
683      * @param text an array of characters to convert
684      * @param start the index into <code>text</code> to start
685      * converting
686      * @param count the number of characters in <code>text</code>
687      * to convert
688      */

689     public void shape(char[] text, int start, int count) {
690     if (isContextual()) {
691         shapeContextually(text, start, count, key);
692     } else {
693         shapeNonContextually(text, start, count);
694     }
695     }
696
697     /**
698      * Converts the digits in the text that occur between start and
699      * start + count, using the provided context.
700      * Context is ignored if the shaper is not a contextual shaper.
701      * @param text an array of characters
702      * @param start the index into <code>text</code> to start
703      * converting
704      * @param count the number of characters in <code>text</code>
705      * to convert
706      * @param context the context to which to convert the
707      * characters, such as <code>NumericShaper.EUROPEAN</code>
708      */

709     public void shape(char[] text, int start, int count, int context) {
710     if (isContextual()) {
711         int ctxKey = getKeyFromMask(context);
712         shapeContextually(text, start, count, ctxKey);
713     } else {
714         shapeNonContextually(text, start, count);
715     }
716     }
717
718     /**
719      * Returns a <code>boolean</code> indicating whether or not
720      * this shaper shapes contextually.
721      * @return <code>true</code> if this shaper is contextual;
722      * <code>false</code> otherwise.
723      */

724     public boolean isContextual() {
725     return (mask & CONTEXTUAL_MASK) != 0;
726     }
727
728     /**
729      * Returns an <code>int</code> that ORs together the values for
730      * all the ranges that will be shaped.
731      * <p>
732      * For example, to check if a shaper shapes to Arabic, you would use the
733      * following:
734      * <blockquote>
735      * <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code>
736      * </blockquote>
737      * @return the values for all the ranges to be shaped.
738      */

739     public int getRanges() {
740     return mask & ~CONTEXTUAL_MASK;
741     }
742
743     /**
744      * Perform non-contextual shaping.
745      */

746     private void shapeNonContextually(char[] text, int start, int count) {
747     int base = bases[key];
748     char minDigit = key == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
749
for (int i = start, e = start + count; i < e; ++i) {
750         char c = text[i];
751         if (c >= minDigit && c <= '\u0039') {
752         text[i] = (char)(c + base);
753         }
754     }
755     }
756
757     /**
758      * Perform contextual shaping.
759      * Synchronized to protect caches used in getContextKey and isStrongDirectional.
760      */

761     private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {
762
763     // if we don't support this context, then don't shape
764
if ((mask & (1<<ctxKey)) == 0) {
765         ctxKey = EUROPEAN_KEY;
766     }
767     int lastkey = ctxKey;
768
769     int base = bases[ctxKey];
770     char minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
771

772     for (int i = start, e = start + count; i < e; ++i) {
773         char c = text[i];
774         if (c >= minDigit && c <= '\u0039') {
775         text[i] = (char)(c + base);
776         }
777
778         if (isStrongDirectional(c)) {
779         int newkey = getContextKey(c);
780         if (newkey != lastkey) {
781             lastkey = newkey;
782
783             ctxKey = newkey;
784             if (((mask & EASTERN_ARABIC) != 0) && (ctxKey == ARABIC_KEY || ctxKey == EASTERN_ARABIC_KEY)) {
785             ctxKey = EASTERN_ARABIC_KEY;
786             } else if ((mask & (1<<ctxKey)) == 0) {
787             ctxKey = EUROPEAN_KEY;
788             }
789
790             base = bases[ctxKey];
791
792             minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
793
}
794         }
795     }
796     }
797
798     /**
799      * Returns a hash code for this shaper.
800      * @return this shaper's hash code.
801      * @see java.lang.Object#hashCode
802      */

803     public int hashCode() {
804     return mask;
805     }
806
807     /**
808      * Returns true if the specified object is an instance of
809      * <code>NumericShaper</code> and shapes identically to this one.
810      * @param o the specified object to compare to this
811      * <code>NumericShaper</code>
812      * @return <code>true</code> if <code>o</code> is an instance
813      * of <code>NumericShaper</code> and shapes in the same way;
814      * <code>false</code> otherwise.
815      * @see java.lang.Object#equals(java.lang.Object)
816      */

817     public boolean equals(Object JavaDoc o) {
818     if (o != null) {
819         try {
820         NumericShaper JavaDoc rhs = (NumericShaper JavaDoc)o;
821         return rhs.mask == mask && rhs.key == key;
822         }
823         catch (ClassCastException JavaDoc e) {
824         }
825     }
826     return false;
827     }
828
829     /**
830      * Returns a <code>String</code> that describes this shaper. This method
831      * is used for debugging purposes only.
832      * @return a <code>String</code> describing this shaper.
833      */

834     public String JavaDoc toString() {
835     StringBuffer JavaDoc buf = new StringBuffer JavaDoc(super.toString());
836
837     buf.append("[contextual:" + isContextual());
838
839     if (isContextual()) {
840         buf.append(", context:" + keyNames[key]);
841     }
842
843     buf.append(", range(s): ");
844     boolean first = true;
845     for (int i = 0; i < NUM_KEYS; ++i) {
846         if ((mask & (1 << i)) != 0) {
847         if (first) {
848             first = false;
849         } else {
850             buf.append(", ");
851         }
852         buf.append(keyNames[i]);
853         }
854     }
855     buf.append(']');
856
857     return buf.toString();
858     }
859
860     /**
861      * Returns the index of the high bit in value (assuming le, actually
862      * power of 2 >= value). value must be positive.
863      */

864     private static int getHighBit(int value) {
865     if (value <= 0) {
866         return -32;
867     }
868
869     int bit = 0;
870
871     if (value >= 1 << 16) {
872         value >>= 16;
873         bit += 16;
874     }
875
876     if (value >= 1 << 8) {
877         value >>= 8;
878         bit += 8;
879     }
880
881     if (value >= 1 << 4) {
882         value >>= 4;
883         bit += 4;
884     }
885
886     if (value >= 1 << 2) {
887         value >>= 2;
888         bit += 2;
889     }
890
891     if (value >= 1 << 1) {
892         value >>= 1;
893         bit += 1;
894     }
895     
896     return bit;
897     }
898
899     /**
900      * fast binary search over subrange of array.
901      */

902     private static int search(char value, char[] array, int start, int length)
903     {
904     int power = 1 << getHighBit(length);
905     int extra = length - power;
906     int probe = power;
907     int index = start;
908
909     if (value >= array[index + extra]) {
910         index += extra;
911     }
912
913     while (probe > 1) {
914         probe >>= 1;
915
916         if (value >= array[index + probe]) {
917         index += probe;
918         }
919     }
920
921     return index;
922     }
923 }
924
925
Popular Tags