KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > impl > ImplicitCEGenerator


1 /**
2  *******************************************************************************
3  * Copyright (C) 2004, International Business Machines Corporation and *
4  * others. All Rights Reserved. *
5  *******************************************************************************
6  */

7 package com.ibm.icu.impl;
8
9 /**
10  * For generation of Implicit CEs
11  * @author Mark Davis
12  *
13  * Cleaned up so that changes can be made more easily.
14  * Old values:
15 # First Implicit: E26A792D
16 # Last Implicit: E3DC70C0
17 # First CJK: E0030300
18 # Last CJK: E0A9DD00
19 # First CJK_A: E0A9DF00
20 # Last CJK_A: E0DE3100
21 @internal
22  */

23 public class ImplicitCEGenerator {
24     
25     /**
26      * constants
27      */

28     static final boolean DEBUG = false;
29     
30     static final long topByte = 0xFF000000L;
31     static final long bottomByte = 0xFFL;
32     static final long fourBytes = 0xFFFFFFFFL;
33     
34     static final int MAX_INPUT = 0x220001; // 2 * Unicode range + 2
35

36     public static final int
37         CJK_BASE = 0x4E00,
38         CJK_LIMIT = 0x9FFF+1,
39         CJK_COMPAT_USED_BASE = 0xFA0E,
40         CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
41         CJK_A_BASE = 0x3400,
42         CJK_A_LIMIT = 0x4DBF+1,
43         CJK_B_BASE = 0x20000,
44         CJK_B_LIMIT = 0x2A6DF+1;
45     
46     private void throwError(String JavaDoc title, int cp) {
47         throw new IllegalArgumentException JavaDoc(title + "\t" + Utility.hex(cp, 6) + "\t" +
48                                            Utility.hex(getImplicitFromRaw(cp) & fourBytes));
49     }
50
51     private void throwError(String JavaDoc title, long ce) {
52         throw new IllegalArgumentException JavaDoc(title + "\t" + Utility.hex(ce & fourBytes));
53     }
54
55     private void show(int i) {
56         if (i >= 0 && i <= MAX_INPUT) {
57             System.out.println(Utility.hex(i) + "\t" + Utility.hex(getImplicitFromRaw(i) & fourBytes));
58         }
59     }
60     
61     /**
62      * Precomputed by constructor
63      */

64     int final3Multiplier;
65     int final4Multiplier;
66     int final3Count;
67     int final4Count;
68     int medialCount;
69     int min3Primary;
70     int min4Primary;
71     int max4Primary;
72     int minTrail;
73     int maxTrail;
74     int max3Trail;
75     int max4Trail;
76     int min4Boundary;
77     
78     public int getGap4() {
79         return final4Multiplier - 1;
80     }
81     
82     public int getGap3() {
83         return final3Multiplier - 1;
84     }
85     
86     // old comment
87
// we must skip all 00, 01, 02, FF bytes, so most bytes have 252 values
88
// we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
89
// we shift so that HAN all has the same first primary, for compression.
90
// for the 4 byte case, we make the gap as large as we can fit.
91

92     /**
93      * Supply parameters for generating implicit CEs
94      */

95     public ImplicitCEGenerator(int minPrimary, int maxPrimary) {
96         // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
97
this(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1);
98     }
99     
100     /**
101      * Set up to generate implicits.
102      * @param minPrimary
103      * @param maxPrimary
104      * @param minTrail final byte
105      * @param maxTrail final byte
106      * @param gap3 the gap we leave for tailoring for 3-byte forms
107      * @param primaries3count number of 3-byte primarys we can use (normally 1)
108      */

109     public ImplicitCEGenerator(int minPrimary, int maxPrimary, int minTrail, int maxTrail, int gap3, int primaries3count) {
110         // some simple parameter checks
111
if (minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) {
112             throw new IllegalArgumentException JavaDoc("bad lead bytes");
113         }
114         if (minTrail < 0 || minTrail >= maxTrail || maxTrail > 0xFF) {
115             throw new IllegalArgumentException JavaDoc("bad trail bytes");
116         }
117         if (primaries3count < 1) {
118             throw new IllegalArgumentException JavaDoc("bad three-byte primaries");
119         }
120         
121         this.minTrail = minTrail;
122         this.maxTrail = maxTrail;
123         
124         min3Primary = minPrimary;
125         max4Primary = maxPrimary;
126         // compute constants for use later.
127
// number of values we can use in trailing bytes
128
// leave room for empty values between AND above, e.g. if gap = 2
129
// range 3..7 => +3 -4 -5 -6 -7: so 1 value
130
// range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
131
// range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
132
final3Multiplier = gap3 + 1;
133         final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
134         max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
135         
136         // medials can use full range
137
medialCount = (maxTrail - minTrail + 1);
138         // find out how many values fit in each form
139
int threeByteCount = medialCount * final3Count;
140         // now determine where the 3/4 boundary is.
141
// we use 3 bytes below the boundary, and 4 above
142
int primariesAvailable = maxPrimary - minPrimary + 1;
143         int primaries4count = primariesAvailable - primaries3count;
144         
145         int min3ByteCoverage = primaries3count * threeByteCount;
146         min4Primary = minPrimary + primaries3count;
147         min4Boundary = min3ByteCoverage;
148         // Now expand out the multiplier for the 4 bytes, and redo.
149

150         int totalNeeded = MAX_INPUT - min4Boundary;
151         int neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
152         if (DEBUG) System.out.println("neededPerPrimaryByte: " + neededPerPrimaryByte);
153         
154         int neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
155         if (DEBUG) System.out.println("neededPerFinalByte: " + neededPerFinalByte);
156         
157         int gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
158         if (DEBUG) System.out.println("expandedGap: " + gap4);
159         if (gap4 < 1) throw new IllegalArgumentException JavaDoc("must have larger gap4s");
160         
161         final4Multiplier = gap4 + 1;
162         final4Count = neededPerFinalByte;
163         max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
164         
165         if (primaries4count * medialCount * medialCount * final4Count < MAX_INPUT) {
166             throw new IllegalArgumentException JavaDoc("internal error");
167         }
168         if (DEBUG) {
169             System.out.println("final4Count: " + final4Count);
170             for (int counter = 0; counter < final4Count; ++counter) {
171                 int value = minTrail + (1 + counter)*final4Multiplier;
172                 System.out.println(counter + "\t" + value + "\t" + Utility.hex(value));
173             }
174         }
175     }
176     
177     static public int divideAndRoundUp(int a, int b) {
178         return 1 + (a-1)/b;
179     }
180
181     /**
182      * Converts implicit CE into raw integer
183      * @param implicit
184      * @return -1 if illegal format
185      */

186     public int getRawFromImplicit(int implicit) {
187         int result;
188         int b3 = implicit & 0xFF;
189         implicit >>= 8;
190         int b2 = implicit & 0xFF;
191         implicit >>= 8;
192         int b1 = implicit & 0xFF;
193         implicit >>= 8;
194         int b0 = implicit & 0xFF;
195
196         // simple parameter checks
197
if (b0 < min3Primary || b0 > max4Primary
198             || b1 < minTrail || b1 > maxTrail) return -1;
199         // normal offsets
200
b1 -= minTrail;
201
202         // take care of the final values, and compose
203
if (b0 < min4Primary) {
204             if (b2 < minTrail || b2 > max3Trail || b3 != 0) return -1;
205             b2 -= minTrail;
206             int remainder = b2 % final3Multiplier;
207             if (remainder != 0) return -1;
208             b0 -= min3Primary;
209             b2 /= final3Multiplier;
210             result = ((b0 * medialCount) + b1) * final3Count + b2;
211         } else {
212             if (b2 < minTrail || b2 > maxTrail
213                 || b3 < minTrail || b3 > max4Trail) return -1;
214             b2 -= minTrail;
215             b3 -= minTrail;
216             int remainder = b3 % final4Multiplier;
217             if (remainder != 0) return -1;
218             b3 /= final4Multiplier;
219             b0 -= min4Primary;
220             result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
221         }
222         // final check
223
if (result < 0 || result > MAX_INPUT) return -1;
224         return result;
225     }
226     
227     /**
228      * Generate the implicit CE, from raw integer.
229      * Left shifted to put the first byte at the top of an int.
230      * @param cp code point
231      * @return Primary implicit weight
232      */

233     public int getImplicitFromRaw(int cp) {
234         if (cp < 0 || cp > MAX_INPUT) {
235             throw new IllegalArgumentException JavaDoc("Code point out of range " + Utility.hex(cp));
236         }
237         int last0 = cp - min4Boundary;
238         if (last0 < 0) {
239             int last1 = cp / final3Count;
240             last0 = cp % final3Count;
241                         
242             int last2 = last1 / medialCount;
243             last1 %= medialCount;
244             
245             last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
246
last1 = minTrail + last1; // offset
247
last2 = min3Primary + last2; // offset
248

249             if (last2 >= min4Primary) {
250                 throw new IllegalArgumentException JavaDoc("4-byte out of range: " +
251                                                    Utility.hex(cp) + ", " + Utility.hex(last2));
252             }
253             
254             return (last2 << 24) + (last1 << 16) + (last0 << 8);
255         } else {
256             int last1 = last0 / final4Count;
257             last0 %= final4Count;
258             
259             int last2 = last1 / medialCount;
260             last1 %= medialCount;
261             
262             int last3 = last2 / medialCount;
263             last2 %= medialCount;
264             
265             last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start
266
last1 = minTrail + last1; // offset
267
last2 = minTrail + last2; // offset
268
last3 = min4Primary + last3; // offset
269

270             if (last3 > max4Primary) {
271                 throw new IllegalArgumentException JavaDoc("4-byte out of range: " +
272                                                    Utility.hex(cp) + ", " + Utility.hex(last3));
273             }
274             
275             return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
276         }
277     }
278
279     /**
280      * Gets an Implicit from a code point. Internally,
281      * swaps (which produces a raw value 0..220000,
282      * then converts raw to implicit.
283      * @param cp
284      * @return Primary implicit weight
285      */

286     public int getImplicitFromCodePoint(int cp) {
287         if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
288         
289         // Produce Raw value
290
// note, we add 1 so that the first value is always empty!!
291
cp = ImplicitCEGenerator.swapCJK(cp) + 1;
292         // we now have a range of numbers from 0 to 220000.
293

294         if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
295             
296         return getImplicitFromRaw(cp);
297     }
298
299     /**
300      * Function used to:
301      * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
302      * b) bump any non-CJK characters by 10FFFF.
303      * The relevant blocks are:
304      * A: 4E00..9FFF; CJK Unified Ideographs
305      * F900..FAFF; CJK Compatibility Ideographs
306      * B: 3400..4DBF; CJK Unified Ideographs Extension A
307      * 20000..XX; CJK Unified Ideographs Extension B (and others later on)
308      * As long as
309      * no new B characters are allocated between 4E00 and FAFF, and
310      * no new A characters are outside of this range,
311      * (very high probability) this simple code will work.
312      * The reordered blocks are:
313      * Block1 is CJK
314      * Block2 is CJK_COMPAT_USED
315      * Block3 is CJK_A
316      * (all contiguous)
317      * Any other CJK gets its normal code point
318      * Any non-CJK gets +10FFFF
319      * When we reorder Block1, we make sure that it is at the very start,
320      * so that it will use a 3-byte form.
321      * Warning: the we only pick up the compatibility characters that are
322      * NOT decomposed, so that block is smaller!
323      */

324     
325     static int NON_CJK_OFFSET = 0x110000;
326         
327     static int swapCJK(int i) {
328         
329         if (i >= CJK_BASE) {
330             if (i < CJK_LIMIT) return i - CJK_BASE;
331             
332             if (i < CJK_COMPAT_USED_BASE) return i + NON_CJK_OFFSET;
333             
334             if (i < CJK_COMPAT_USED_LIMIT) return i - CJK_COMPAT_USED_BASE
335                                                 + (CJK_LIMIT - CJK_BASE);
336             if (i < CJK_B_BASE) return i + NON_CJK_OFFSET;
337             
338             if (i < CJK_B_LIMIT) return i; // non-BMP-CJK
339

340             return i + NON_CJK_OFFSET; // non-CJK
341
}
342         if (i < CJK_A_BASE) return i + NON_CJK_OFFSET;
343         
344         if (i < CJK_A_LIMIT) return i - CJK_A_BASE
345                                                 + (CJK_LIMIT - CJK_BASE)
346                                                 + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
347         return i + NON_CJK_OFFSET; // non-CJK
348
}
349     
350
351     /**
352      * @return Minimal trail value
353      */

354     public int getMinTrail() {
355         return minTrail;
356     }
357
358     /**
359      * @return Maximal trail value
360      */

361     public int getMaxTrail() {
362         return maxTrail;
363     }
364     
365     public int getCodePointFromRaw(int i) {
366         i--;
367         int result = 0;
368         if(i >= NON_CJK_OFFSET) {
369             result = i - NON_CJK_OFFSET;
370         } else if(i >= CJK_B_BASE) {
371             result = i;
372         } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
373             // rest of CJKs, compacted
374
if(i < CJK_LIMIT - CJK_BASE) {
375                 result = i + CJK_BASE;
376             } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
377                 result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);
378             } else {
379                 result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
380             }
381         } else {
382             result = -1;
383         }
384         return result;
385     }
386
387     public int getRawFromCodePoint(int i) {
388         return swapCJK(i)+1;
389     }
390 }
391
Popular Tags