KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > java > text > RBCollationTables


1 /*
2  * @(#)RBCollationTables.java 1.9 03/12/19
3  *
4  * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
5  * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6  */

7
8 /*
9  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
10  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
11  *
12  * The original version of this source code and documentation is copyrighted
13  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
14  * materials are provided under terms of a License Agreement between Taligent
15  * and Sun. This technology is protected by multiple US and International
16  * patents. This notice and attribution to Taligent may not be removed.
17  * Taligent is a registered trademark of Taligent, Inc.
18  *
19  */

20
21 package java.text;
22
23 import java.util.Vector JavaDoc;
24 import sun.text.UCompactIntArray;
25 import sun.text.IntHashtable;
26
27 /**
28  * This class contains the static state of a RuleBasedCollator: The various
29  * tables that are used by the collation routines. Several RuleBasedCollators
30  * can share a single RBCollationTables object, easing memory requirements and
31  * improving performance.
32  */

33 final class RBCollationTables {
34     //===========================================================================================
35
// The following diagram shows the data structure of the RBCollationTables object.
36
// Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
37
// "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
38
// What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
39
// sorts 'o-umlaut' as if it's always expanded with 'e'.
40
//
41
// mapping table contracting list expanding list
42
// (contains all unicode char
43
// entries) ___ ____________ _________________________
44
// ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')|
45
// |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|
46
// |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |
47
// |____:___| | |_:_| |------------| | |-------------------------|
48
// |____:___| | |'cH'|v('cH')| | | : |
49
// |__'a'___|-> v('a') | |------------| | |-------------------------|
50
// |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |
51
// |____:___| | |------------| | |-------------------------|
52
// |____:___| | |'CH'|v('CH')| | | : |
53
// |___'c'__|---------------- ------------ | |-------------------------|
54
// |____:___| | | : |
55
// |o-umlaut|---------------------------------------- |_________________________|
56
// |____:___|
57
//
58
// Noted by Helena Shih on 6/23/97
59
//============================================================================================
60

61     public RBCollationTables(String JavaDoc rules, int decmp) throws ParseException JavaDoc {
62         this.rules = rules;
63
64         RBTableBuilder JavaDoc builder = new RBTableBuilder JavaDoc(new BuildAPI());
65         builder.build(rules, decmp); // this object is filled in through
66
// the BuildAPI object
67
}
68
69     final class BuildAPI {
70         /**
71          * Private constructor. Prevents anyone else besides RBTableBuilder
72          * from gaining direct access to the internals of this class.
73          */

74         private BuildAPI() {
75         }
76
77         /**
78          * This function is used by RBTableBuilder to fill in all the members of this
79          * object. (Effectively, the builder class functions as a "friend" of this
80          * class, but to avoid changing too much of the logic, it carries around "shadow"
81          * copies of all these variables until the end of the build process and then
82          * copies them en masse into the actual tables object once all the construction
83          * logic is complete. This function does that "copying en masse".
84          * @param f2ary The value for frenchSec (the French-secondary flag)
85      * @param swap The value for SE Asian swapping rule
86          * @param map The collator's character-mapping table (the value for mapping)
87          * @param cTbl The collator's contracting-character table (the value for contractTable)
88          * @param eTbl The collator's expanding-character table (the value for expandTable)
89          * @param cFlgs The hash table of characters that participate in contracting-
90          * character sequences (the value for contractFlags)
91          * @param mso The value for maxSecOrder
92          * @param mto The value for maxTerOrder
93          */

94         void fillInTables(boolean f2ary,
95               boolean swap,
96                           UCompactIntArray map,
97                           Vector JavaDoc cTbl,
98                           Vector JavaDoc eTbl,
99                           IntHashtable cFlgs,
100                           short mso,
101                           short mto) {
102             frenchSec = f2ary;
103         seAsianSwapping = swap;
104             mapping = map;
105             contractTable = cTbl;
106             expandTable = eTbl;
107             contractFlags = cFlgs;
108             maxSecOrder = mso;
109             maxTerOrder = mto;
110         }
111     }
112
113     /**
114      * Gets the table-based rules for the collation object.
115      * @return returns the collation rules that the table collation object
116      * was created from.
117      */

118     public String JavaDoc getRules()
119     {
120         return rules;
121     }
122
123     public boolean isFrenchSec() {
124         return frenchSec;
125     }
126     
127     public boolean isSEAsianSwapping() {
128         return seAsianSwapping;
129     }
130     
131     // ==============================================================
132
// internal (for use by CollationElementIterator)
133
// ==============================================================
134

135     /**
136      * Get the entry of hash table of the contracting string in the collation
137      * table.
138      * @param ch the starting character of the contracting string
139      */

140     Vector JavaDoc getContractValues(int ch)
141     {
142         int index = mapping.elementAt(ch);
143         return getContractValuesImpl(index - CONTRACTCHARINDEX);
144     }
145
146     //get contract values from contractTable by index
147
private Vector JavaDoc getContractValuesImpl(int index)
148     {
149         if (index >= 0)
150         {
151             return (Vector JavaDoc)contractTable.elementAt(index);
152         }
153         else // not found
154
{
155             return null;
156         }
157     }
158
159     /**
160      * Returns true if this character appears anywhere in a contracting
161      * character sequence. (Used by CollationElementIterator.setOffset().)
162      */

163     boolean usedInContractSeq(int c) {
164         return contractFlags.get(c) == 1;
165     }
166
167     /**
168       * Return the maximum length of any expansion sequences that end
169       * with the specified comparison order.
170       *
171       * @param order a collation order returned by previous or next.
172       * @return the maximum length of any expansion seuences ending
173       * with the specified order.
174       *
175       * @see CollationElementIterator#getMaxExpansion
176       */

177     int getMaxExpansion(int order)
178     {
179         int result = 1;
180
181         if (expandTable != null) {
182             // Right now this does a linear search through the entire
183
// expandsion table. If a collator had a large number of expansions,
184
// this could cause a performance problem, but in practise that
185
// rarely happens
186
for (int i = 0; i < expandTable.size(); i++) {
187                 int[] valueList = (int [])expandTable.elementAt(i);
188                 int length = valueList.length;
189
190                 if (length > result && valueList[length-1] == order) {
191                     result = length;
192                 }
193             }
194         }
195
196         return result;
197     }
198
199     /**
200      * Get the entry of hash table of the expanding string in the collation
201      * table.
202      * @param idx the index of the expanding string value list
203      */

204     final int[] getExpandValueList(int order) {
205         return (int[])expandTable.elementAt(order - EXPANDCHARINDEX);
206     }
207
208     /**
209      * Get the comarison order of a character from the collation table.
210      * @return the comparison order of a character.
211      */

212     int getUnicodeOrder(int ch)
213     {
214         return mapping.elementAt(ch);
215     }
216
217     short getMaxSecOrder() {
218         return maxSecOrder;
219     }
220
221     short getMaxTerOrder() {
222         return maxTerOrder;
223     }
224
225     /**
226      * Reverse a string.
227      */

228     //shemran/Note: this is used for secondary order value reverse, no
229
// need to consider supplementary pair.
230
static void reverse (StringBuffer JavaDoc result, int from, int to)
231     {
232         int i = from;
233         char swap;
234
235         int j = to - 1;
236         while (i < j) {
237             swap = result.charAt(i);
238             result.setCharAt(i, result.charAt(j));
239             result.setCharAt(j, swap);
240             i++;
241             j--;
242         }
243     }
244
245     final static int getEntry(Vector JavaDoc list, String JavaDoc name, boolean fwd) {
246         for (int i = 0; i < list.size(); i++) {
247             EntryPair JavaDoc pair = (EntryPair JavaDoc)list.elementAt(i);
248             if (pair.fwd == fwd && pair.entryName.equals(name)) {
249                 return i;
250             }
251         }
252         return UNMAPPED;
253     }
254
255     // ==============================================================
256
// constants
257
// ==============================================================
258
//sherman/Todo: is the value big enough?????
259
final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
260
final static int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow
261
final static int UNMAPPED = 0xFFFFFFFF;
262
263     final static int PRIMARYORDERMASK = 0xffff0000;
264     final static int SECONDARYORDERMASK = 0x0000ff00;
265     final static int TERTIARYORDERMASK = 0x000000ff;
266     final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
267     final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
268     final static int PRIMARYORDERSHIFT = 16;
269     final static int SECONDARYORDERSHIFT = 8;
270
271     // ==============================================================
272
// instance variables
273
// ==============================================================
274
private String JavaDoc rules = null;
275     private boolean frenchSec = false;
276     private boolean seAsianSwapping = false;
277
278     private UCompactIntArray mapping = null;
279     private Vector JavaDoc contractTable = null;
280     private Vector JavaDoc expandTable = null;
281     private IntHashtable contractFlags = null;
282
283     private short maxSecOrder = 0;
284     private short maxTerOrder = 0;
285 }
286
Popular Tags