CollationParsedRuleBuilder


1   /**
2   *******************************************************************************
3   * Copyright (C) 1996-2006, International Business Machines Corporation and    *
4   * others. All Rights Reserved.                                                *
5   *******************************************************************************
6   */
7   package com.ibm.icu.text;
8    
9   import java.io.IOException  ;
10  import java.text.ParseException  ;
11  import java.util.Hashtable  ;
12  import java.util.Vector  ;
13  import java.util.Arrays  ;
14  import java.util.Enumeration  ;
15  
16  import com.ibm.icu.impl.TrieBuilder;
17  import com.ibm.icu.impl.IntTrieBuilder;
18  import com.ibm.icu.impl.TrieIterator;
19  import com.ibm.icu.impl.Utility;
20  import com.ibm.icu.impl.UCharacterProperty;
21  import com.ibm.icu.lang.UCharacter;
22  import com.ibm.icu.lang.UCharacterCategory;
23  import com.ibm.icu.impl.NormalizerImpl;
24  import com.ibm.icu.util.RangeValueIterator;
25  import com.ibm.icu.util.VersionInfo;
26  
27  /**
28  * Class for building a collator from a list of collation rules.
29  * This class is uses CollationRuleParser
30  * @author Syn Wee Quek
31  * @since release 2.2, June 11 2002
32  * @draft 2.2
33  */
34  final class CollationParsedRuleBuilder
35  {     
36      // package private constructors ------------------------------------------
37  
38      /**
39       * Constructor
40       * @param rules collation rules
41       * @exception ParseException thrown when argument rules have an invalid 
42       *            syntax 
43       */
44      CollationParsedRuleBuilder(String   rules) throws ParseException  
45      {
46          m_parser_ = new CollationRuleParser(rules);
47          m_parser_.assembleTokenList();
48          m_utilColEIter_ = RuleBasedCollator.UCA_.getCollationElementIterator(
49                                           "");
50      }
51      
52      // package private inner classes -----------------------------------------
53      
54      /** 
55       * Inverse UCA wrapper
56       */
57      static class InverseUCA 
58      {
59          // package private constructor ---------------------------------------
60          
61          InverseUCA() 
62          {
63          }
64          
65          // package private data member ---------------------------------------
66          
67          /**
68           * Array list of characters
69           */
70          int m_table_[];
71          /**
72           * Array list of continuation characters
73           */
74          char m_continuations_[];
75          
76          /**
77           * UCA version of inverse UCA table
78           */
79          VersionInfo m_UCA_version_;
80          
81          // package private method --------------------------------------------
82          
83          /**
84       * Returns the previous inverse ces of the argument ces
85       * @param ce ce to test
86       * @param contce continuation ce to test
87       * @param strength collation strength
88       * @param prevresult an array to store the return results previous 
89           *                   inverse ce and previous inverse continuation ce
90           * @return result of the inverse ce 
91       */
92      final int getInversePrevCE(int ce, int contce, int strength, 
93                     int prevresult[]) 
94      {
95          int result = findInverseCE(ce, contce);
96          
97          if (result < 0) {
98          prevresult[0] = CollationElementIterator.NULLORDER;
99          return -1;
100         }
101         
102         ce &= STRENGTH_MASK_[strength];
103         contce &= STRENGTH_MASK_[strength];
104         
105             prevresult[0] = ce;
106         prevresult[1] = contce;
107         
108         while ((prevresult[0]  & STRENGTH_MASK_[strength]) == ce 
109            && (prevresult[1]  & STRENGTH_MASK_[strength])== contce
110            && result > 0) { 
111                         // this condition should prevent falling off the edge of the 
112                         // world 
113                 // here, we end up in a singularity - zero
114                 prevresult[0] = m_table_[3 * (-- result)];
115                 prevresult[1] = m_table_[3 * result + 1];
116            }
117            return result;
118         }
119         
120         final int getCEStrengthDifference(int CE, int contCE, 
121                 int prevCE, int prevContCE) {
122             int strength = Collator.TERTIARY;
123             while(
124             ((prevCE & STRENGTH_MASK_[strength]) != (CE & STRENGTH_MASK_[strength]) 
125             || (prevContCE & STRENGTH_MASK_[strength]) != (contCE & STRENGTH_MASK_[strength]))
126             && (strength != 0)) {
127                 strength--;
128             }
129             return strength;                
130         }
131 
132         private int compareCEs(int source0, int source1, int target0, int target1) {
133             int s1 = source0, s2, t1 = target0, t2;
134             if(RuleBasedCollator.isContinuation(source1)) {
135                 s2 = source1;
136             } else {
137                 s2 = 0;
138             }
139             if(RuleBasedCollator.isContinuation(target1)) {
140                 t2 = target1;
141             } else {
142                 t2 = 0;
143             }
144             
145             int s = 0, t = 0;
146             if(s1 == t1 && s2 == t2) {
147                 return 0;
148             }
149             s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16); 
150             t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
151             if(s == t) {
152                 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
153                 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
154                 if(s == t) {
155                     s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
156                     t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
157                     return Utility.compareUnsigned(s, t);
158                 } else { 
159                     return Utility.compareUnsigned(s, t);
160                 }
161             } else {
162                 return Utility.compareUnsigned(s, t);                
163             }
164         }
165         
166         /**
167          * Finding the inverse CE of the argument CEs
168          * @param ce CE to be tested
169          * @param contce continuation CE
170          * @return inverse CE
171          */
172         int findInverseCE(int ce, int contce) 
173         {
174             int bottom = 0;
175             int top = m_table_.length / 3;
176             int result = 0;
177             
178             while (bottom < top - 1) {
179                 result = (top + bottom) >> 1;
180                 int first = m_table_[3 * result];
181                 int second = m_table_[3 * result + 1];
182                 int comparison = compareCEs(first, second, ce, contce);
183                 if (comparison > 0) {
184                     top = result;
185                 } 
186                 else if (comparison < 0) {
187                     bottom = result;
188                 } 
189                 else { 
190                     break;
191                 }
192             }
193             
194             return result;
195         }
196     
197     /**
198      * Getting gap offsets in the inverse UCA
199      * @param listheader parsed token lists
200      * @exception Exception thrown when error occurs while finding the 
201      *            collation gaps
202      */
203     void getInverseGapPositions(CollationRuleParser.TokenListHeader 
204                     listheader)
205         throws Exception   
206     {
207         // reset all the gaps
208         CollationRuleParser.Token token = listheader.m_first_;
209         int tokenstrength = token.m_strength_;
210     
211         for (int i = 0; i < 3; i ++) {
212         listheader.m_gapsHi_[3 * i] = 0;
213         listheader.m_gapsHi_[3 * i + 1] = 0;
214         listheader.m_gapsHi_[3 * i + 2] = 0;
215         listheader.m_gapsLo_[3 * i] = 0;
216         listheader.m_gapsLo_[3 * i + 1] = 0;
217         listheader.m_gapsLo_[3 * i + 2] = 0;
218         listheader.m_numStr_[i] = 0;
219         listheader.m_fStrToken_[i] = null;
220         listheader.m_lStrToken_[i] = null;
221         listheader.m_pos_[i] = -1;
222         }
223     
224         if ((listheader.m_baseCE_ >>> 24) 
225                 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_
226         && (listheader.m_baseCE_ >>> 24)
227                 <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_) 
228         { 
229                 // implicits -
230             listheader.m_pos_[0] = 0;
231             int t1 = listheader.m_baseCE_;
232             int t2 = listheader.m_baseContCE_;
233             listheader.m_gapsLo_[0] = mergeCE(t1, t2, 
234                                                   Collator.PRIMARY);
235             listheader.m_gapsLo_[1] = mergeCE(t1, t2, 
236                                                   Collator.SECONDARY);
237             listheader.m_gapsLo_[2] = mergeCE(t1, t2, 
238                                                   Collator.TERTIARY);
239             int primaryCE = t1 & RuleBasedCollator.CE_PRIMARY_MASK_ | (t2 & RuleBasedCollator.CE_PRIMARY_MASK_) >>> 16;
240             primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(RuleBasedCollator.impCEGen_.getRawFromImplicit(primaryCE)+1);
241 
242             t1 = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
243             t2 = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
244                 
245             //                if (listheader.m_baseCE_ < 0xEF000000) {
246             //                    // first implicits have three byte primaries, with a gap of
247             //                    // one so we esentially need to add 2 to the top byte in 
248             //                    // listheader.m_baseContCE_
249             //                    t2 += 0x02000000;
250             //                } 
251             //                else {
252             //                    // second implicits have four byte primaries, with a gap of
253             //                    // IMPLICIT_LAST2_MULTIPLIER_
254             //                    // Now, this guy is not really accessible here, so until we 
255             //                    // find a better way to pass it around, assume that the gap is 1
256             //                    t2 += 0x00020000;
257             //                }
258             listheader.m_gapsHi_[0] = mergeCE(t1, t2, 
259                                                   Collator.PRIMARY);
260             listheader.m_gapsHi_[1] = mergeCE(t1, t2, 
261                                                   Collator.SECONDARY);
262             listheader.m_gapsHi_[2] = mergeCE(t1, t2, 
263                                                   Collator.TERTIARY);
264         } 
265         else if (listheader.m_indirect_ == true 
266                      && listheader.m_nextCE_ != 0) {
267         listheader.m_pos_[0] = 0;
268         int t1 = listheader.m_baseCE_;
269         int t2 = listheader.m_baseContCE_;
270         listheader.m_gapsLo_[0] = mergeCE(t1, t2, 
271                           Collator.PRIMARY);
272         listheader.m_gapsLo_[1] = mergeCE(t1, t2, 
273                           Collator.SECONDARY);
274         listheader.m_gapsLo_[2] = mergeCE(t1, t2, 
275                           Collator.TERTIARY);
276         t1 = listheader.m_nextCE_;
277         t2 = listheader.m_nextContCE_;
278         listheader.m_gapsHi_[0] = mergeCE(t1, t2, 
279                           Collator.PRIMARY);
280         listheader.m_gapsHi_[1] = mergeCE(t1, t2, 
281                           Collator.SECONDARY);
282         listheader.m_gapsHi_[2] = mergeCE(t1, t2, 
283                           Collator.TERTIARY);
284         } 
285         else {
286         while (true) {
287             if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) {
288             listheader.m_pos_[tokenstrength] 
289                 = getInverseNext(listheader, 
290                          tokenstrength);
291             if (listheader.m_pos_[tokenstrength] >= 0) {
292                 listheader.m_fStrToken_[tokenstrength] = token;
293             } 
294             else { 
295                             // The CE must be implicit, since it's not in the 
296                             // table 
297                 // Error
298                 throw new Exception  ("Internal program error");
299             }
300             }
301             
302             while (token != null && token.m_strength_ >= tokenstrength) 
303             {
304                 if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) {
305                 listheader.m_lStrToken_[tokenstrength] = token;
306                 }
307                 token = token.m_next_;
308             }
309             if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_ - 1) {
310             // check if previous interval is the same and merge the 
311             // intervals if it is so
312             if (listheader.m_pos_[tokenstrength] 
313                 == listheader.m_pos_[tokenstrength + 1]) {
314                 listheader.m_fStrToken_[tokenstrength] 
315                 = listheader.m_fStrToken_[tokenstrength 
316                              + 1];
317                 listheader.m_fStrToken_[tokenstrength + 1] = null;
318                 listheader.m_lStrToken_[tokenstrength + 1] = null;
319                 listheader.m_pos_[tokenstrength + 1] = -1;
320             }
321             }
322             if (token != null) {
323             tokenstrength = token.m_strength_;
324             } 
325             else {
326             break;
327             }
328         }
329         for (int st = 0; st < 3; st ++) {
330             int pos = listheader.m_pos_[st];
331             if (pos >= 0) {
332             int t1 = m_table_[3 * pos];
333             int t2 = m_table_[3 * pos + 1];
334             listheader.m_gapsHi_[3 * st] = mergeCE(t1, t2, 
335                                    Collator.PRIMARY);
336             listheader.m_gapsHi_[3 * st + 1] = mergeCE(t1, t2, 
337                                    Collator.SECONDARY);
338             listheader.m_gapsHi_[3 * st + 2] = (t1 & 0x3f) << 24 
339                 | (t2 & 0x3f) << 16;
340             //pos --;
341             //t1 = m_table_[3 * pos];
342             //t2 = m_table_[3 * pos + 1];
343             t1 = listheader.m_baseCE_;
344             t2 = listheader.m_baseContCE_;
345             
346             listheader.m_gapsLo_[3 * st] = mergeCE(t1, t2, 
347                                    Collator.PRIMARY);
348             listheader.m_gapsLo_[3 * st + 1] = mergeCE(t1, t2, 
349                                    Collator.SECONDARY);
350             listheader.m_gapsLo_[3 * st + 2] = (t1 & 0x3f) << 24 
351                 | (t2 & 0x3f) << 16;
352             }
353         }
354         }
355         }
356         
357     /**
358      * Gets the next CE in the inverse table
359      * @param listheader token list header
360      * @param strength collation strength
361      * @return next ce
362      */
363     private final int getInverseNext(CollationRuleParser.TokenListHeader 
364                      listheader, 
365                      int strength) 
366     {
367         int ce = listheader.m_baseCE_;
368         int secondce = listheader.m_baseContCE_; 
369         int result = findInverseCE(ce, secondce);
370             
371         if (result < 0) {
372         return -1;
373         }
374             
375         ce &= STRENGTH_MASK_[strength];
376         secondce &= STRENGTH_MASK_[strength];
377         
378         int nextce = ce;
379         int nextcontce = secondce;
380         
381         while((nextce & STRENGTH_MASK_[strength]) == ce 
382           && (nextcontce  & STRENGTH_MASK_[strength]) == secondce) {
383         nextce = m_table_[3 * (++ result)];
384         nextcontce = m_table_[3 * result + 1];
385         }
386             
387         listheader.m_nextCE_ = nextce;
388         listheader.m_nextContCE_ = nextcontce;
389         
390         return result;
391     }
392     }
393 
394     // package private data members ------------------------------------------
395     
396     /**
397      * Inverse UCA, instantiate only when required
398      */
399     static final InverseUCA INVERSE_UCA_; 
400     
401     /**
402      * UCA and Inverse UCA version do not match
403      */
404     private static final String   INV_UCA_VERSION_MISMATCH_ =
405     "UCA versions of UCA and inverse UCA should match";
406            
407     /**
408      * UCA and Inverse UCA version do not match
409      */
410     private static final String   UCA_NOT_INSTANTIATED_ =
411     "UCA is not instantiated!";
412     
413     /**
414      * Initializing the inverse UCA
415      */
416     static {
417         InverseUCA temp = null;
418         try {
419         temp = CollatorReader.getInverseUCA();
420     } catch (IOException   e) {
421     }
422         /*
423       try
424       {
425       String invdat = "/com/ibm/icu/impl/data/invuca.icu";
426       InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
427       BufferedInputStream b = new BufferedInputStream(i, 110000);
428       INVERSE_UCA_ = CollatorReader.readInverseUCA(b);
429       b.close();
430       i.close();
431       }
432       catch (Exception e)
433       {
434       e.printStackTrace();
435       throw new RuntimeException(e.getMessage());
436       }
437         */
438         
439         if(temp != null && RuleBasedCollator.UCA_ != null) {
440             if(!temp.m_UCA_version_.equals(RuleBasedCollator.UCA_.m_UCA_version_)) {
441                 throw new RuntimeException  (INV_UCA_VERSION_MISMATCH_);
442             }
443         } else {
444             throw new RuntimeException  (UCA_NOT_INSTANTIATED_);
445         }
446         
447         INVERSE_UCA_ = temp;
448     }
449     
450     // package private methods -----------------------------------------------
451     
452     /**
453      * Parse and sets the collation rules in the argument collator
454      * @param collator to set
455      * @exception Exception thrown when internal program error occurs
456      */
457     void setRules(RuleBasedCollator collator) throws Exception  
458     {
459         if (m_parser_.m_resultLength_ > 0 || m_parser_.m_removeSet_ != null) { 
460         // we have a set of rules, let's make something of it 
461         assembleTailoringTable(collator);
462     } 
463     else { // no rules, but no error either must be only options
464         // We will init the collator from UCA   
465         collator.setWithUCATables();
466     }
467         // And set only the options
468         m_parser_.setDefaultOptionsInCollator(collator);
469     }
470     
471     private void copyRangeFromUCA(BuildTable t, int start, int end) {
472         int u = 0;
473         for (u = start; u <= end; u ++) {
474             // if ((CE = ucmpe32_get(t.m_mapping, u)) == UCOL_NOT_FOUND
475             int CE = t.m_mapping_.getValue(u);
476             if (CE == CE_NOT_FOUND_ 
477                 // this test is for contractions that are missing the starting 
478                 // element. Looks like latin-1 should be done before 
479                 // assembling the table, even if it results in more false 
480                 // closure elements
481                 || (isContractionTableElement(CE) 
482             && getCE(t.m_contractions_, CE, 0) == CE_NOT_FOUND_)) {
483                 //m_utilElement_.m_uchars_ = str.toString();
484                 m_utilElement_.m_uchars_ = UCharacter.toString(u);
485                 m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
486                 m_utilElement_.m_prefix_ = 0;
487                 m_utilElement_.m_CELength_ = 0;
488                 m_utilColEIter_.setText(m_utilElement_.m_uchars_);
489                 while (CE != CollationElementIterator.NULLORDER) {
490                     CE = m_utilColEIter_.next();
491                     if (CE != CollationElementIterator.NULLORDER) {
492                         m_utilElement_.m_CEs_[m_utilElement_.m_CELength_ ++] 
493                 = CE;
494                     }
495                 }
496                 addAnElement(t, m_utilElement_);
497             }
498         }
499     }
500             
501     /**
502      * 2.  Eliminate the negative lists by doing the following for each 
503      * non-null negative list: 
504      * o   if previousCE(baseCE, strongestN) != some ListHeader X's baseCE, 
505      * create new ListHeader X 
506      * o   reverse the list, add to the end of X's positive list. Reset the 
507      * strength of the first item you add, based on the stronger strength 
508      * levels of the two lists. 
509      * 
510      * 3.  For each ListHeader with a non-null positive list: 
511      * o   Find all character strings with CEs between the baseCE and the 
512      * next/previous CE, at the strength of the first token. Add these to the 
513      * tailoring. 
514      *     ? That is, if UCA has ...  x <<< X << x' <<< X' < y ..., and the 
515      *       tailoring has & x < z... 
516      *     ? Then we change the tailoring to & x  <<< X << x' <<< X' < z ... 
517      * 
518      * It is possible that this part should be done even while constructing list
519      * The problem is that it is unknown what is going to be the strongest 
520      * weight.
521      * So we might as well do it here
522      * o   Allocate CEs for each token in the list, based on the total number N 
523      * of the largest level difference, and the gap G between baseCE and nextCE 
524      * at that level. The relation * between the last item and nextCE is the 
525      * same as the strongest strength. 
526      * o   Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1) 
527      *     ? There are 3 primary items: a, d, e. Fit them into the primary gap. 
528      *     Then fit b and c into the secondary gap between a and d, then fit q 
529      *     into the tertiary gap between b and c. 
530      * o   Example: baseCE << b <<< q << c * nextCE(X,2) 
531      *     ? There are 2 secondary items: b, c. Fit them into the secondary gap. 
532      *       Then fit q into the tertiary gap between b and c. 
533      * o   When incrementing primary values, we will not cross high byte 
534      *     boundaries except where there is only a single-byte primary. That is 
535      *     to ensure that the script reordering will continue to work. 
536      * @param collator the rule based collator to update
537      * @exception Exception thrown when internal program error occurs
538      */
539     void assembleTailoringTable(RuleBasedCollator collator) throws Exception  
540     {
541         
542     for (int i = 0; i < m_parser_.m_resultLength_; i ++) {
543         // now we need to generate the CEs  
544         // We stuff the initial value in the buffers, and increase the 
545             // appropriate buffer according to strength
546             if  (m_parser_.m_listHeader_[i].m_first_ != null) { 
547                 // if there are any elements
548                 // due to the way parser works, subsequent tailorings
549                 // may remove all the elements from a sequence, therefore
550                 // leaving an empty tailoring sequence.
551         initBuffers(m_parser_.m_listHeader_[i]);
552             }
553     }
554         
555         if (m_parser_.m_variableTop_ != null) { 
556             // stuff the variable top value
557         m_parser_.m_options_.m_variableTopValue_ 
558         = m_parser_.m_variableTop_.m_CE_[0] >>> 16;
559         // remove it from the list
560         if (m_parser_.m_variableTop_.m_listHeader_.m_first_ 
561                 == m_parser_.m_variableTop_) { // first in list
562         m_parser_.m_variableTop_.m_listHeader_.m_first_ 
563             = m_parser_.m_variableTop_.m_next_;
564         }
565         if (m_parser_.m_variableTop_.m_listHeader_.m_last_ 
566         == m_parser_.m_variableTop_) { 
567                 // first in list
568         m_parser_.m_variableTop_.m_listHeader_.m_last_ 
569             = m_parser_.m_variableTop_.m_previous_;    
570         }
571         if (m_parser_.m_variableTop_.m_next_ != null) {
572         m_parser_.m_variableTop_.m_next_.m_previous_ 
573             = m_parser_.m_variableTop_.m_previous_;
574         }
575         if (m_parser_.m_variableTop_.m_previous_ != null) {
576         m_parser_.m_variableTop_.m_previous_.m_next_ 
577             = m_parser_.m_variableTop_.m_next_;
578         }
579     }
580         
581     BuildTable t = new BuildTable(m_parser_);
582         
583         // After this, we have assigned CE values to all regular CEs now we 
584     // will go through list once more and resolve expansions, make 
585     // UCAElements structs and add them to table               
586     for (int i = 0; i < m_parser_.m_resultLength_; i ++) {
587         // now we need to generate the CEs 
588         // We stuff the initial value in the buffers, and increase the 
589         // appropriate buffer according to strength                                                          */
590         createElements(t, m_parser_.m_listHeader_[i]);
591     }
592         
593         m_utilElement_.clear();
594         StringBuffer   str = new StringBuffer  ();
595         
596     // add latin-1 stuff
597         copyRangeFromUCA(t, 0, 0xFF);
598     
599         // add stuff for copying 
600         if(m_parser_.m_copySet_ != null) {
601             int i = 0;
602             for(i = 0; i < m_parser_.m_copySet_.getRangeCount(); i++) {
603                 copyRangeFromUCA(t, m_parser_.m_copySet_.getRangeStart(i), 
604                                  m_parser_.m_copySet_.getRangeEnd(i));
605             }
606         }
607         
608         // copy contractions from the UCA - this is felt mostly for cyrillic
609     char conts[] = RuleBasedCollator.UCA_CONTRACTIONS_;
610         int offset = 0;
611     while (conts[offset] != 0) {
612         // tailoredCE = ucmpe32_get(t.m_mapping, *conts);
613             int tailoredCE = t.m_mapping_.getValue(conts[offset]);
614         if (tailoredCE != CE_NOT_FOUND_) {         
615         boolean needToAdd = true;
616         if (isContractionTableElement(tailoredCE)) {
617                     if (isTailored(t.m_contractions_, tailoredCE, 
618                                    conts, offset + 1) == true) {
619             needToAdd = false;
620             }
621         }
622                 if(m_parser_.m_removeSet_ != null && m_parser_.m_removeSet_.contains(conts[offset])) {
623                     needToAdd = false;
624                 }
625 
626                 
627                 if (needToAdd == true) { 
628                     // we need to add if this contraction is not tailored.
629             m_utilElement_.m_prefix_ = 0;
630             m_utilElement_.m_prefixChars_ = null;
631             m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
632                     str.delete(0, str.length());
633                     str.append(conts[offset]);
634                     str.append(conts[offset + 1]);
635             if (conts[offset + 2] != 0) {
636             str.append(conts[offset + 2]);
637             } 
638                     m_utilElement_.m_uchars_ = str.toString();
639                     m_utilElement_.m_CELength_ = 0;
640                     m_utilColEIter_.setText(m_utilElement_.m_uchars_);
641                     while (true) {
642                         int CE = m_utilColEIter_.next();
643                         if (CE != CollationElementIterator.NULLORDER) {
644                             m_utilElement_.m_CEs_[m_utilElement_.m_CELength_ 
645                          ++] = CE;
646                         }
647                         else {
648                             break;
649                         }
650                     }
651                     addAnElement(t, m_utilElement_);
652                 }
653         } else if(m_parser_.m_removeSet_ != null && m_parser_.m_removeSet_.contains(conts[offset])) {
654                 copyRangeFromUCA(t, conts[offset], conts[offset]);
655             }
656             
657         offset += 3;
658     }
659         
660         // Add completely ignorable elements
661         processUCACompleteIgnorables(t);
662         
663         // canonical closure 
664         canonicalClosure(t);
665   
666     // still need to produce compatibility closure
667     assembleTable(t, collator);  
668     }
669     
670     // private inner classes -------------------------------------------------
671     
672     private static class CEGenerator 
673     {
674         // package private data members --------------------------------------
675         
676     WeightRange m_ranges_[];
677     int m_rangesLength_;
678     int m_byteSize_; 
679         int m_start_; 
680         int m_limit_;
681     int m_maxCount_;
682     int m_count_;
683     int m_current_;
684     int m_fLow_; // forbidden Low 
685     int m_fHigh_; // forbidden High 
686         
687         // package private constructor ---------------------------------------
688         
689         CEGenerator() 
690         {
691             m_ranges_ = new WeightRange[7];      
692             for (int i = 6; i >= 0; i --) {
693                 m_ranges_[i] = new WeightRange();
694             }
695         }
696     }
697 
698     private static class WeightRange implements Comparable  
699     {
700         // public methods ----------------------------------------------------
701         
702         /**
703          * Compares this object with target
704          * @param target object to compare with
705          * @return 0 if equals, 1 if this is > target, -1 otherwise
706          */
707         public int compareTo(Object   target) 
708         {
709             if (this == target) {
710                 return 0;
711             }
712             int tstart = ((WeightRange)target).m_start_;   
713             if (m_start_ == tstart) {
714                 return 0;
715             }
716             if (m_start_ > tstart) {
717                 return 1;
718             }
719             return -1;
720         }
721         
722         /**
723          * Initialize 
724          */
725         public void clear()
726         {
727             m_start_ = 0;
728             m_end_ = 0;
729             m_length_ = 0; 
730             m_count_ = 0;
731             m_length2_ = 0;
732             m_count2_ = 0;
733         }
734         
735         // package private data members --------------------------------------
736         
737     int m_start_;
738         int m_end_;
739     int m_length_; 
740         int m_count_;
741     int m_length2_;
742     int m_count2_;
743         
744         // package private constructor ---------------------------------------
745         
746         WeightRange()
747         {
748             clear();
749         }
750         
751         /**
752          * Copy constructor.
753          * Cloneable is troublesome, needs to check for exception
754          * @param source to clone
755          */
756         WeightRange(WeightRange source)
757         {
758             m_start_ = source.m_start_;
759             m_end_ = source.m_end_;
760             m_length_ = source.m_length_; 
761             m_count_ = source.m_count_;
762             m_length2_ = source.m_length2_;
763             m_count2_ = source.m_count2_;
764         }
765     }
766     
767     private static class MaxJamoExpansionTable
768     {
769     // package private data members --------------------------------------
770         
771     Vector   m_endExpansionCE_;
772     // vector of booleans
773     Vector   m_isV_;
774     byte m_maxLSize_;
775     byte m_maxVSize_;
776     byte m_maxTSize_;
777         
778     // package private constructor ---------------------------------------
779         
780     MaxJamoExpansionTable()
781     {
782         m_endExpansionCE_ = new Vector  ();
783         m_isV_ = new Vector  ();
784         m_endExpansionCE_.add(new Integer  (0));
785         m_isV_.add(new Boolean  (false));
786             m_maxLSize_ = 1;
787             m_maxVSize_ = 1;
788             m_maxTSize_ = 1;
789     }
790         
791         MaxJamoExpansionTable(MaxJamoExpansionTable table)
792         {
793             m_endExpansionCE_ = (Vector  )table.m_endExpansionCE_.clone();
794             m_isV_ = (Vector  )table.m_isV_.clone();
795             m_maxLSize_ = table.m_maxLSize_;
796             m_maxVSize_ = table.m_maxVSize_;
797             m_maxTSize_ = table.m_maxTSize_;
798         }
799     }
800     
801     private static class MaxExpansionTable 
802     {
803     // package private constructor --------------------------------------
804         
805     MaxExpansionTable() 
806     {
807         m_endExpansionCE_ = new Vector  ();
808         m_expansionCESize_ = new Vector  ();
809         m_endExpansionCE_.add(new Integer  (0));
810         m_expansionCESize_.add(new Byte  ((byte)0));
811     }
812         
813         MaxExpansionTable(MaxExpansionTable table) 
814         {
815             m_endExpansionCE_ = (Vector  )table.m_endExpansionCE_.clone();
816             m_expansionCESize_ = (Vector  )table.m_expansionCESize_.clone();
817         }
818         
819     // package private data member --------------------------------------
820         
821     Vector   m_endExpansionCE_;
822     Vector   m_expansionCESize_;
823     }
824     
825     private static class BasicContractionTable 
826     {
827     // package private constructors -------------------------------------
828         
829     BasicContractionTable()
830     {
831         m_CEs_ = new Vector  ();
832         m_codePoints_ = new StringBuffer  ();
833     }
834         
835     // package private data members -------------------------------------
836         
837     StringBuffer   m_codePoints_;
838     Vector   m_CEs_;
839     }
840     
841     private static class ContractionTable 
842     {
843     // package private constructor --------------------------------------
844         
845     /**
846      * Builds a contraction table
847      * @param mapping
848      */
849     ContractionTable(IntTrieBuilder mapping) 
850     {
851         m_mapping_ = mapping;
852         m_elements_ = new Vector  ();
853         m_CEs_ = new Vector  ();
854         m_codePoints_ = new StringBuffer  ();
855         m_offsets_ = new Vector  ();
856         m_currentTag_ = CE_NOT_FOUND_TAG_;
857     }
858         
859         /**
860          * Copies a contraction table.
861          * Not all data will be copied into their own object.
862          * @param table
863          */
864         ContractionTable(ContractionTable table) 
865         {
866             m_mapping_ = table.m_mapping_;
867             m_elements_ = (Vector  )table.m_elements_.clone();
868             m_codePoints_ = new StringBuffer  (table.m_codePoints_.toString());
869             m_CEs_ = (Vector  )table.m_CEs_.clone();
870             m_offsets_ = (Vector  )table.m_offsets_.clone();
871             m_currentTag_ = table.m_currentTag_;
872         }
873     
874     // package private data members ------------------------------------
875         
876     /**
877      * Vector of BasicContractionTable
878      */
879         Vector   m_elements_;
880         IntTrieBuilder m_mapping_;
881         StringBuffer   m_codePoints_;
882     Vector   m_CEs_;
883     Vector   m_offsets_;
884     int m_currentTag_;
885     }
886 
887     private static final class BuildTable implements TrieBuilder.DataManipulate
888     {
889     // package private methods ------------------------------------------
890         
891         /**
892      * For construction of the Trie tables.
893      * Has to be labeled public
894      * @param cp
895      * @param offset
896      * @return data offset or 0 
897      * @draft 2.2
898      */
899     public int getFoldedValue(int cp, int offset)
900     {
901         int limit = cp + 0x400;
902         while (cp < limit) {
903         int value = m_mapping_.getValue(cp);
904         boolean inBlockZero = m_mapping_.isInZeroBlock(cp);
905         int tag = getCETag(value);
906         if (inBlockZero == true) {
907             cp += TrieBuilder.DATA_BLOCK_LENGTH;
908         } 
909         else if (!(isSpecial(value) && (tag == CE_IMPLICIT_TAG_ 
910                         || tag == CE_NOT_FOUND_TAG_))) {
911                     // These are values that are starting in either UCA 
912                     // (IMPLICIT_TAG) or in the tailorings (NOT_FOUND_TAG). 
913                     // Presence of these tags means that there is nothing in 
914                     // this position and that it should be skipped.
915             return RuleBasedCollator.CE_SPECIAL_FLAG_ 
916             | (CE_SURROGATE_TAG_ << 24) | offset;
917         } 
918         else {
919             ++ cp;
920         }
921         }
922         return 0;
923     }
924     
925     // package private constructor --------------------------------------
926         
927     /**
928      * Returns a table
929      */
930     BuildTable(CollationRuleParser parser) 
931     {
932         m_collator_ = new RuleBasedCollator();
933         m_collator_.setWithUCAData();
934         MaxExpansionTable maxet = new MaxExpansionTable();
935         MaxJamoExpansionTable maxjet = new MaxJamoExpansionTable();
936         m_options_ = parser.m_options_;
937         m_expansions_ = new Vector  ();
938         // Do your own mallocs for the structure, array and have linear 
939         // Latin 1
940             int trieinitialvalue = RuleBasedCollator.CE_SPECIAL_FLAG_
941         | (CE_NOT_FOUND_TAG_ << 24);
942         // temporary fix for jb3822, 0x100000 -> 30000
943         m_mapping_ = new IntTrieBuilder(null, 0x30000, trieinitialvalue, 
944                                             trieinitialvalue, true); 
945         m_prefixLookup_ = new Hashtable  ();
946         // uhash_open(prefixLookupHash, prefixLookupComp);
947         m_contractions_ = new ContractionTable(m_mapping_);
948         // copy UCA's maxexpansion and merge as we go along
949         m_maxExpansions_ = maxet;
950         // adding an extra initial value for easier manipulation 
951         for (int i = 0; 
952          i < RuleBasedCollator.UCA_.m_expansionEndCE_.length; i ++) {
953         maxet.m_endExpansionCE_.add(new Integer  (
954                             RuleBasedCollator.UCA_.m_expansionEndCE_[i]));
955         maxet.m_expansionCESize_.add(new Byte  (
956                               RuleBasedCollator.UCA_.m_expansionEndCEMaxSize_[i]));
957         }
958         m_maxJamoExpansions_ = maxjet;
959         
960         m_unsafeCP_ = new byte[UNSAFECP_TABLE_SIZE_];
961         m_contrEndCP_ = new byte[UNSAFECP_TABLE_SIZE_];
962         Arrays.fill(m_unsafeCP_, (byte)0);
963         Arrays.fill(m_contrEndCP_, (byte)0);
964     }
965     
966         /**
967          * Duplicating a BuildTable.
968          * Not all data will be duplicated into their own object.
969          * @param table to clone
970          */
971         BuildTable(BuildTable table) 
972         {
973             m_collator_ = table.m_collator_;
974             m_mapping_ = new IntTrieBuilder(table.m_mapping_);
975             m_expansions_ = (Vector  )table.m_expansions_.clone();
976             m_contractions_ = new ContractionTable(table.m_contractions_);
977             m_contractions_.m_mapping_ = m_mapping_;
978             m_options_ = table.m_options_;
979             m_maxExpansions_ = new MaxExpansionTable(table.m_maxExpansions_);
980             m_maxJamoExpansions_ 
981         = new MaxJamoExpansionTable(table.m_maxJamoExpansions_);
982             m_unsafeCP_ = new byte[table.m_unsafeCP_.length];
983             System.arraycopy(table.m_unsafeCP_, 0, m_unsafeCP_, 0,
984                              m_unsafeCP_.length);
985             m_contrEndCP_ = new byte[table.m_contrEndCP_.length];
986             System.arraycopy(table.m_contrEndCP_, 0, m_contrEndCP_, 0,
987                              m_contrEndCP_.length);
988         }
989         
990     // package private data members -------------------------------------
991         
992     RuleBasedCollator m_collator_;
993         IntTrieBuilder m_mapping_; 
994         Vector   m_expansions_; 
995         ContractionTable m_contractions_;
996     // UCATableHeader image;
997     CollationRuleParser.OptionSet m_options_;
998     MaxExpansionTable m_maxExpansions_;
999     MaxJamoExpansionTable m_maxJamoExpansions_;
1000    byte m_unsafeCP_[];
1001    byte m_contrEndCP_[];
1002    Hashtable   m_prefixLookup_;
1003    } 
1004    
1005    private static class Elements
1006    {
1007    // package private data members -------------------------------------
1008        
1009    String   m_prefixChars_;
1010    int m_prefix_;
1011    String   m_uchars_;
1012    /**
1013     * Working string
1014     */
1015    String   m_cPoints_;    
1016    /**
1017     * Offset to the working string
1018     */
1019    int m_cPointsOffset_;
1020    /** 
1021     * These are collation elements - there could be more than one - in 
1022     * case of expansion 
1023     */    
1024    int m_CEs_[];      
1025        int m_CELength_;
1026    /** 
1027     * This is the value element maps in original table   
1028     */
1029    int m_mapCE_;         
1030    int m_sizePrim_[];
1031    int m_sizeSec_[];
1032    int m_sizeTer_[];
1033    boolean m_variableTop_;
1034    boolean m_caseBit_;
1035        
1036    // package private constructors -------------------------------------
1037        
1038    /**
1039     * Package private constructor
1040     */
1041    Elements()
1042    {
1043        m_sizePrim_ = new int[128];    
1044        m_sizeSec_ = new int[128];    
1045        m_sizeTer_ = new int[128];    
1046            m_CEs_ = new int[256];
1047            m_CELength_ = 0;
1048    }
1049
1050        /**
1051     * Package private constructor
1052     */
1053    Elements(Elements element)
1054    {
1055            m_prefixChars_ = element.m_prefixChars_;
1056            m_prefix_ = element.m_prefix_;
1057            m_uchars_ = element.m_uchars_;
1058            m_cPoints_ = element.m_cPoints_;    
1059            m_cPointsOffset_ = element.m_cPointsOffset_;    
1060            m_CEs_ = element.m_CEs_;
1061            m_CELength_ = element.m_CELength_;
1062            m_mapCE_ = element.m_mapCE_;
1063        m_sizePrim_ = element.m_sizePrim_;
1064        m_sizeSec_ = element.m_sizeSec_;
1065        m_sizeTer_ = element.m_sizeTer_;
1066        m_variableTop_ = element.m_variableTop_;
1067        m_caseBit_ = element.m_caseBit_;
1068    }
1069
1070        // package private methods -------------------------------------------
1071        
1072        /**
1073         * Initializing the elements
1074         */
1075        public void clear()
1076        {
1077            m_prefixChars_ = null;
1078            m_prefix_ = 0;
1079            m_uchars_ = null;
1080            m_cPoints_ = null;    
1081            m_cPointsOffset_ = 0;  
1082            m_CELength_ = 0;
1083            m_mapCE_ = 0;
1084            Arrays.fill(m_sizePrim_, 0);
1085            Arrays.fill(m_sizeSec_, 0);
1086            Arrays.fill(m_sizeTer_, 0);
1087            m_variableTop_ = false;
1088            m_caseBit_ = false;
1089        }
1090
1091        
1092        /**
1093         * Hashcode calculation for token
1094         * @return the hashcode
1095         */
1096        public int hashCode()
1097        {
1098        String   str = m_cPoints_.substring(m_cPointsOffset_);
1099        return str.hashCode();
1100    }
1101        
1102    /**
1103     * Equals calculation
1104     * @param target object to compare
1105     * @return true if target is the same as this object
1106     */
1107    public boolean equals(Object   target)
1108    {
1109        if (target == this) {
1110        return true;
1111        }
1112        if (target instanceof Elements) {
1113        Elements t = (Elements)target;
1114        int size = m_cPoints_.length() - m_cPointsOffset_;
1115        if (size == t.m_cPoints_.length() - t.m_cPointsOffset_) {
1116            return t.m_cPoints_.regionMatches(t.m_cPointsOffset_, 
1117                              m_cPoints_, 
1118                              m_cPointsOffset_, size);
1119        }
1120            }
1121            return false;
1122    }
1123    }
1124
1125    // private data member ---------------------------------------------------
1126    
1127    /**
1128     * Maximum strength used in CE building
1129     */
1130    private static final int CE_BASIC_STRENGTH_LIMIT_ = 3;
1131    /**
1132     * Maximum collation strength
1133     */
1134    private static final int CE_STRENGTH_LIMIT_ = 16;
1135    /**
1136     * Strength mask array, used in inverse UCA
1137     */
1138    private static final int STRENGTH_MASK_[] = {0xFFFF0000, 0xFFFFFF00, 
1139                                                 0xFFFFFFFF};
1140    /**
1141     * CE tag for not found
1142     */
1143    private static final int CE_NOT_FOUND_ = 0xF0000000;
1144    /**
1145     * CE tag for not found
1146     */
1147    private static final int CE_NOT_FOUND_TAG_ = 0;
1148    /**
1149     * This code point results in an expansion 
1150     */
1151    private static final int CE_EXPANSION_TAG_ = 1;
1152    /** 
1153     * Start of a contraction 
1154     */
1155    private static final int CE_CONTRACTION_TAG_ = 2;
1156    /** 
1157     * Thai character - do the reordering 
1158     */
1159    private static final int CE_THAI_TAG_ = 3;            
1160    /** 
1161     * Charset processing, not yet implemented
1162     */
1163    private static final int CE_CHARSET_TAG_ = 4;         
1164    /** 
1165     * Lead surrogate that is tailored and doesn't start a contraction 
1166     */
1167    private static final int CE_SURROGATE_TAG_ = 5;
1168    /** 
1169     * AC00-D7AF
1170     */
1171    private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
1172    /** 
1173     * D800-DBFF
1174     */
1175    private static final int CE_LEAD_SURROGATE_TAG_ = 7;
1176    /** 
1177     * DC00-DFFF
1178     */
1179    private static final int CE_TRAIL_SURROGATE_TAG_ = 8; 
1180    /** 
1181     * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
1182     */    
1183    private static final int CE_CJK_IMPLICIT_TAG_ = 9;
1184    private static final int CE_IMPLICIT_TAG_ = 10;
1185    private static final int CE_SPEC_PROC_TAG_ = 11;
1186    /** 
1187     * This is a three byte primary with starting secondaries and tertiaries.
1188     * It fits in a single 32 bit CE and is used instead of expansion to save
1189     * space without affecting the performance (hopefully) 
1190     */
1191    private static final int CE_LONG_PRIMARY_TAG_ = 12;  
1192    /** 
1193     * Unsafe UChar hash table table size. Size is 32 bytes for 1 bit for each 
1194     * latin 1 char + some power of two for hashing the rest of the chars. 
1195     * Size in bytes                               
1196     */
1197    private static final int UNSAFECP_TABLE_SIZE_ = 1056;
1198    /** 
1199     * Mask value down to "some power of two" -1. Number of bits, not num of 
1200     * bytes.       
1201     */
1202    private static final int UNSAFECP_TABLE_MASK_ = 0x1fff;
1203    /**
1204     * Case values
1205     */
1206    private static final int UPPER_CASE_ = 0x80;
1207    private static final int MIXED_CASE_ = 0x40;
1208    private static final int LOWER_CASE_ = 0x00;
1209    /**
1210     * Initial table size
1211     */
1212    private static final int INIT_TABLE_SIZE_ = 1028;
1213    /**
1214     * Header size, copied from ICU4C, to be changed when that value changes
1215     */
1216    private static final int HEADER_SIZE_ = 0xC4;
1217    /**
1218     * Contraction table new element indicator
1219     */
1220    private static final int CONTRACTION_TABLE_NEW_ELEMENT_ = 0xFFFFFF;
1221    /**
1222     * Parser for the rules
1223     */
1224    private CollationRuleParser m_parser_;
1225    /**
1226     * Utility UCA collation element iterator
1227     */
1228    private CollationElementIterator m_utilColEIter_;
1229    /**
1230     * Utility data members
1231     */
1232    private CEGenerator m_utilGens_[] = {new CEGenerator(), new CEGenerator(),
1233                                         new CEGenerator()};
1234    private int m_utilCEBuffer_[] = new int[CE_BASIC_STRENGTH_LIMIT_];
1235    private int m_utilIntBuffer_[] = new int[CE_STRENGTH_LIMIT_];
1236    private Elements m_utilElement_ = new Elements();
1237    private Elements m_utilElement2_ = new Elements();
1238    private CollationRuleParser.Token m_utilToken_ 
1239    = new CollationRuleParser.Token();
1240    private int m_utilCountBuffer_[] = new int[6];     
1241    private long m_utilLongBuffer_[] = new long[5];
1242    private WeightRange m_utilLowerWeightRange_[] = 
1243    {new WeightRange(), new WeightRange(), 
1244     new WeightRange(), new WeightRange(), 
1245     new WeightRange()}; 
1246    private WeightRange m_utilUpperWeightRange_[] = 
1247    {new WeightRange(), new WeightRange(), 
1248     new WeightRange(), new WeightRange(), 
1249     new WeightRange()}; 
1250    private WeightRange m_utilWeightRange_ = new WeightRange();
1251    private char m_utilCharBuffer_[] = new char[256];
1252    private CanonicalIterator m_utilCanIter_ = new CanonicalIterator("");
1253    private StringBuffer   m_utilStringBuffer_ = new StringBuffer  ("");
1254    
1255    // private methods -------------------------------------------------------
1256    
1257    /**
1258     * @param listheader parsed rule tokens
1259     * @exception Exception thrown when internal error occurs
1260     */
1261    private void initBuffers(CollationRuleParser.TokenListHeader listheader) 
1262    throws Exception  
1263    {
1264        CollationRuleParser.Token token = listheader.m_last_;
1265        Arrays.fill(m_utilIntBuffer_, 0, CE_STRENGTH_LIMIT_, 0);
1266        
1267    token.m_toInsert_ = 1;
1268    m_utilIntBuffer_[token.m_strength_] = 1;
1269    while (token.m_previous_ != null) {
1270        if (token.m_previous_.m_strength_ < token.m_strength_) { 
1271                // going up
1272        m_utilIntBuffer_[token.m_strength_] = 0;
1273        m_utilIntBuffer_[token.m_previous_.m_strength_] ++;
1274        } 
1275            else if (token.m_previous_.m_strength_ > token.m_strength_) { 
1276                // going down
1277        m_utilIntBuffer_[token.m_previous_.m_strength_] = 1;
1278        } 
1279            else {
1280        m_utilIntBuffer_[token.m_strength_] ++;
1281        }
1282        token = token.m_previous_;
1283        token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_];
1284    } 
1285        
1286    token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_];
1287    INVERSE_UCA_.getInverseGapPositions(listheader);
1288        
1289    token = listheader.m_first_;
1290    int fstrength = Collator.IDENTICAL;
1291    int initstrength = Collator.IDENTICAL;
1292        
1293    m_utilCEBuffer_[Collator.PRIMARY] = mergeCE(listheader.m_baseCE_, 
1294                            listheader.m_baseContCE_,
1295                            Collator.PRIMARY);
1296    m_utilCEBuffer_[Collator.SECONDARY] = mergeCE(listheader.m_baseCE_, 
1297                              listheader.m_baseContCE_,
1298                              Collator.SECONDARY);
1299    m_utilCEBuffer_[Collator.TERTIARY] = mergeCE(listheader.m_baseCE_, 
1300                             listheader.m_baseContCE_,
1301                             Collator.TERTIARY);
1302    while (token != null) {
1303        fstrength = token.m_strength_;
1304        if (fstrength < initstrength) {
1305        initstrength = fstrength;
1306        if (listheader.m_pos_[fstrength] == -1) {
1307            while (listheader.m_pos_[fstrength] == -1 && fstrength > 0) 
1308            {
1309                fstrength--;
1310            }
1311            if (listheader.m_pos_[fstrength] == -1) {
1312            throw new Exception  ("Internal program error");
1313            }
1314        }
1315        if (initstrength == Collator.TERTIARY) { 
1316                    // starting with tertiary
1317            m_utilCEBuffer_[Collator.PRIMARY] 
1318            = listheader.m_gapsLo_[fstrength * 3];
1319            m_utilCEBuffer_[Collator.SECONDARY] 
1320            = listheader.m_gapsLo_[fstrength * 3 + 1];
1321            m_utilCEBuffer_[Collator.TERTIARY] = getCEGenerator(
1322                                    m_utilGens_[Collator.TERTIARY], 
1323                                    listheader.m_gapsLo_, 
1324                                    listheader.m_gapsHi_, 
1325                                    token, fstrength); 
1326        } 
1327                else if (initstrength == Collator.SECONDARY) { 
1328                    // secondaries
1329            m_utilCEBuffer_[Collator.PRIMARY] 
1330            = listheader.m_gapsLo_[fstrength * 3];
1331            m_utilCEBuffer_[Collator.SECONDARY] 
1332            = getCEGenerator(
1333                                         m_utilGens_[Collator.SECONDARY], 
1334                                         listheader.m_gapsLo_, 
1335                                         listheader.m_gapsHi_, 
1336                                         token, fstrength);
1337            m_utilCEBuffer_[Collator.TERTIARY] 
1338            = getSimpleCEGenerator(
1339                           m_utilGens_[Collator.TERTIARY], 
1340                           token, Collator.TERTIARY);
1341        } 
1342                else { 
1343                    // primaries 
1344            m_utilCEBuffer_[Collator.PRIMARY] 
1345            = getCEGenerator(
1346                     m_utilGens_[Collator.PRIMARY], 
1347                     listheader.m_gapsLo_, 
1348                     listheader.m_gapsHi_, 
1349                     token, fstrength);
1350            m_utilCEBuffer_[Collator.SECONDARY] 
1351            = getSimpleCEGenerator(
1352                                               m_utilGens_[Collator.SECONDARY], 
1353                                               token, Collator.SECONDARY);
1354            m_utilCEBuffer_[Collator.TERTIARY] 
1355            = getSimpleCEGenerator(
1356                                               m_utilGens_[Collator.TERTIARY], 
1357                                               token, Collator.TERTIARY);
1358        }
1359        } 
1360            else {
1361        if (token.m_strength_ == Collator.TERTIARY) {
1362            m_utilCEBuffer_[Collator.TERTIARY] 
1363            = getNextGenerated(m_utilGens_[Collator.TERTIARY]);
1364        } 
1365                else if (token.m_strength_ == Collator.SECONDARY) {
1366            m_utilCEBuffer_[Collator.SECONDARY] 
1367            = getNextGenerated(m_utilGens_[Collator.SECONDARY]);
1368            m_utilCEBuffer_[Collator.TERTIARY] 
1369            = getSimpleCEGenerator(
1370                           m_utilGens_[Collator.TERTIARY], 
1371                           token, Collator.TERTIARY);
1372        } 
1373                else if (token.m_strength_ == Collator.PRIMARY) {
1374            m_utilCEBuffer_[Collator.PRIMARY] 
1375            = getNextGenerated(
1376                       m_utilGens_[Collator.PRIMARY]);
1377            m_utilCEBuffer_[Collator.SECONDARY] 
1378            = getSimpleCEGenerator(
1379                           m_utilGens_[Collator.SECONDARY], 
1380                           token, Collator.SECONDARY);
1381            m_utilCEBuffer_[Collator.TERTIARY] 
1382            = getSimpleCEGenerator(
1383                           m_utilGens_[Collator.TERTIARY], 
1384                           token, Collator.TERTIARY);
1385        }
1386        }
1387        doCE(m_utilCEBuffer_, token);
1388        token = token.m_next_;
1389    }
1390    }
1391
1392    /**
1393     * Get the next generated ce
1394     * @param g ce generator
1395     * @return next generated ce 
1396     */
1397    private int getNextGenerated(CEGenerator g) 
1398    {
1399        g.m_current_ = nextWeight(g);
1400        return g.m_current_;
1401    }
1402
1403    /**
1404     * @param g CEGenerator
1405     * @param token rule token
1406     * @param strength 
1407     * @return ce generator
1408     * @exception Exception thrown when internal error occurs
1409     */
1410    private int getSimpleCEGenerator(CEGenerator g, 
1411                                     CollationRuleParser.Token token, 
1412                                     int strength) throws Exception  
1413    {
1414        int high, low, count = 1;
1415        int maxbyte = (strength == Collator.TERTIARY) ? 0x3F : 0xFF;
1416
1417    if (strength == Collator.SECONDARY) {
1418        low = RuleBasedCollator.COMMON_TOP_2_ << 24;
1419        high = 0xFFFFFFFF;
1420        count = 0xFF - RuleBasedCollator.COMMON_TOP_2_;
1421    } 
1422        else {
1423        low = RuleBasedCollator.BYTE_COMMON_ << 24; //0x05000000;
1424        high = 0x40000000;
1425        count = 0x40 - RuleBasedCollator.BYTE_COMMON_;
1426    }
1427    
1428    if (token.m_next_ != null && token.m_next_.m_strength_ == strength) {
1429        count = token.m_next_.m_toInsert_;
1430    } 
1431    
1432    g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte, 
1433                                            g.m_ranges_);
1434    g.m_current_ = RuleBasedCollator.BYTE_COMMON_ << 24;
1435    
1436    if (g.m_rangesLength_ == 0) {
1437        throw new Exception  ("Internal program error");
1438    }
1439    return g.m_current_;
1440    }
1441
1442    /**
1443     * Combines 2 ce into one with respect to the argument strength
1444     * @param ce1 first ce
1445     * @param ce2 second ce
1446     * @param strength strength to use
1447     * @return combined ce
1448     */
1449    private static int mergeCE(int ce1, int ce2, int strength) 
1450    {
1451        int mask = RuleBasedCollator.CE_TERTIARY_MASK_;
1452        if (strength == Collator.SECONDARY) {
1453            mask = RuleBasedCollator.CE_SECONDARY_MASK_;
1454        }
1455        else if (strength == Collator.PRIMARY) {
1456            mask = RuleBasedCollator.CE_PRIMARY_MASK_;
1457        }
1458        ce1 &= mask;
1459        ce2 &= mask;
1460        switch (strength) 
1461        {
1462            case Collator.PRIMARY:
1463                return ce1 | ce2 >>> 16;
1464            case Collator.SECONDARY:
1465                return ce1 << 16 | ce2 << 8;
1466            default:
1467                return ce1 << 24 | ce2 << 16;
1468        }
1469    }
1470    
1471    /**
1472     * @param g CEGenerator
1473     * @param lows low gap array
1474     * @param highs high gap array
1475     * @param token rule token
1476     * @param fstrength 
1477     * @exception Exception thrown when internal error occurs
1478     */
1479    private int getCEGenerator(CEGenerator g, int lows[], int highs[], 
1480                               CollationRuleParser.Token token, int fstrength) 
1481    throws Exception  
1482    {
1483    int strength = token.m_strength_;
1484    int low = lows[fstrength * 3 + strength];
1485    int high = highs[fstrength * 3 + strength];
1486    int maxbyte = 0;
1487    if(strength == Collator.TERTIARY) {
1488        maxbyte = 0x3F;
1489    } else if(strength == Collator.PRIMARY) {
1490        maxbyte = 0xFE;
1491    } else {
1492        maxbyte = 0xFF;
1493    }
1494    
1495    int count = token.m_toInsert_;
1496    
1497    if (Utility.compareUnsigned(low, high) >= 0 
1498            && strength > Collator.PRIMARY) {
1499        int s = strength;
1500        while (true) {
1501        s --;
1502        if (lows[fstrength * 3 + s] != highs[fstrength * 3 + s]) {
1503            if (strength == Collator.SECONDARY) {
1504            low = RuleBasedCollator.COMMON_TOP_2_ << 24;
1505            high = 0xFFFFFFFF;
1506            } 
1507                    else {
1508                        // low = 0x02000000; 
1509                        // This needs to be checked - what if low is
1510                        // not good...
1511            high = 0x40000000;
1512            }
1513            break;
1514        }
1515        if (s < 0) {
1516            throw new Exception  ("Internal program error");
1517        }
1518        }
1519    } 
1520    if (low == 0) {
1521        low = 0x01000000;
1522    }
1523    if (strength == Collator.SECONDARY) { // similar as simple 
1524        if (Utility.compareUnsigned(low, 
1525                    RuleBasedCollator.COMMON_BOTTOM_2_ << 24) >= 0
1526                && Utility.compareUnsigned(low, 
1527                       RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) {
1528        low = RuleBasedCollator.COMMON_TOP_2_ << 24;
1529            }
1530        if (Utility.compareUnsigned(high, 
1531                    RuleBasedCollator.COMMON_BOTTOM_2_ << 24) > 0 
1532                && Utility.compareUnsigned(high, 
1533                       RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) {
1534        high = RuleBasedCollator.COMMON_TOP_2_ << 24;
1535        } 
1536        if (Utility.compareUnsigned(low, 
1537                    RuleBasedCollator.COMMON_BOTTOM_2_ << 24) < 0) {
1538        g.m_rangesLength_ = allocateWeights(
1539                            RuleBasedCollator.BYTE_UNSHIFTED_MIN_ << 24, 
1540                            high, count, maxbyte, g.m_ranges_);
1541        g.m_current_ = nextWeight(g);
1542        //g.m_current_ = RuleBasedCollator.COMMON_BOTTOM_2_ << 24;
1543        return g.m_current_;
1544        }
1545    } 
1546    
1547    g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte, 
1548                                            g.m_ranges_);
1549    if (g.m_rangesLength_ == 0) {
1550        throw new Exception  ("Internal program error");
1551    }
1552    g.m_current_ = nextWeight(g);
1553    return g.m_current_;
1554    }
1555
1556    /**
1557     * @param ceparts list of collation elements parts
1558     * @param token rule token
1559     * @exception Exception thrown when forming case bits for expansions fails
1560     */
1561    private void doCE(int ceparts[], CollationRuleParser.Token token) 
1562    throws Exception  
1563    {
1564        // this one makes the table and stuff
1565    // int noofbytes[] = new int[3];
1566    for (int i = 0; i < 3; i ++) {
1567        // noofbytes[i] = countBytes(ceparts[i]);
1568            m_utilIntBuffer_[i] = countBytes(ceparts[i]);
1569    }
1570    
1571    // Here we have to pack CEs from parts
1572    int cei = 0;
1573    int value = 0;
1574    
1575    while ((cei << 1) < m_utilIntBuffer_[0] || cei < m_utilIntBuffer_[1] 
1576               || cei < m_utilIntBuffer_[2]) {
1577        if (cei > 0) {
1578        value = RuleBasedCollator.CE_CONTINUATION_MARKER_;
1579        } else {
1580        value = 0;
1581        }
1582        
1583        if ((cei << 1) < m_utilIntBuffer_[0]) {
1584        value |= ((ceparts[0] >> (32 - ((cei + 1) << 4))) & 0xFFFF) 
1585                      << 16;
1586        }
1587        if (cei < m_utilIntBuffer_[1]) {
1588        value |= ((ceparts[1] >> (32 - ((cei + 1) << 3))) & 0xFF) << 8;
1589        }
1590            
1591        if (cei < m_utilIntBuffer_[2]) {
1592        value |= ((ceparts[2] >> (32 - ((cei+1) << 3))) & 0x3F);
1593        }
1594        token.m_CE_[cei] = value;
1595        cei ++;
1596    }
1597    if (cei == 0) { // totally ignorable
1598        token.m_CELength_ = 1;
1599        token.m_CE_[0] = 0;
1600    } 
1601    else { // there is at least something
1602        token.m_CELength_ = cei;
1603    }
1604          
1605    // Case bits handling for expansion
1606    if(token.m_CE_[0] != 0) { // case bits should be set only for non-ignorables
1607        int startoftokenrule = token.m_source_ & 0xFF;
1608        if ((token.m_source_ >>> 24) > 1) {
1609            // Do it manually
1610            int length = token.m_source_ >>> 24;
1611            String   tokenstr = token.m_rules_.substring(startoftokenrule, 
1612                                   startoftokenrule + length);
1613            token.m_CE_[0] |= getCaseBits(tokenstr);
1614        } 
1615        else {
1616            // Copy it from the UCA
1617            int caseCE 
1618            = getFirstCE(token.m_rules_.charAt(startoftokenrule));
1619            token.m_CE_[0] |= (caseCE & 0xC0);
1620        }
1621    }
1622    }
1623
1624    /**
1625     * Count the number of non-zero bytes used in the ce
1626     * @param ce 
1627     * @return number of non-zero bytes used in ce
1628     */
1629    private static final int countBytes(int ce)   
1630    {                               
1631    int mask = 0xFFFFFFFF;   
1632    int result = 0;              
1633    while (mask != 0) {            
1634        if ((ce & mask) != 0) { 
1635        result ++;            
1636        }                           
1637        mask >>>= 8;                 
1638    }   
1639        return result;                          
1640    }
1641    
1642    /**
1643     * We are ready to create collation elements
1644     * @param t build table to insert
1645     * @param lh rule token list header
1646     */
1647    private void createElements(BuildTable t, 
1648                CollationRuleParser.TokenListHeader lh)
1649    {
1650    CollationRuleParser.Token tok = lh.m_first_;
1651    m_utilElement_.clear();
1652    while (tok != null) {
1653        // first, check if there are any expansions
1654        // if there are expansions, we need to do a little bit more 
1655        // processing since parts of expansion can be tailored, while 
1656        // others are not
1657        if (tok.m_expansion_ != 0) {
1658        int len = tok.m_expansion_ >>> 24;
1659        int currentSequenceLen = len;
1660        int expOffset = tok.m_expansion_ & 0x00FFFFFF;
1661        m_utilToken_.m_source_ = currentSequenceLen | expOffset;
1662        m_utilToken_.m_rules_ = m_parser_.m_source_;
1663    
1664        while (len > 0) {
1665            currentSequenceLen = len;
1666            while (currentSequenceLen > 0) {
1667            m_utilToken_.m_source_ = (currentSequenceLen << 24) 
1668                | expOffset;
1669            CollationRuleParser.Token expt = 
1670                (CollationRuleParser.Token)
1671                m_parser_.m_hashTable_.get(m_utilToken_);
1672            if (expt != null 
1673                && expt.m_strength_ 
1674                != CollationRuleParser.TOKEN_RESET_) { 
1675                // expansion is tailored
1676                int noOfCEsToCopy = expt.m_CELength_;
1677                for (int j = 0; j < noOfCEsToCopy; j ++) {
1678                tok.m_expCE_[tok.m_expCELength_ + j] 
1679                    = expt.m_CE_[j];
1680                }
1681                tok.m_expCELength_ += noOfCEsToCopy;
1682                // never try to add codepoints and CEs.
1683                // For some odd reason, it won't work.
1684                expOffset += currentSequenceLen; //noOfCEsToCopy;
1685                len -= currentSequenceLen; //noOfCEsToCopy;
1686                break;
1687            } 
1688            else {
1689                currentSequenceLen --;
1690            }
1691            }
1692            if (currentSequenceLen == 0) { 
1693            // couldn't find any tailored subsequence, will have to 
1694            // get one from UCA. first, get the UChars from the 
1695            // rules then pick CEs out until there is no more and 
1696            // stuff them into expansion
1697            m_utilColEIter_.setText(m_parser_.m_source_.substring(
1698                                          expOffset, expOffset + 1));
1699            while (true) {
1700                int order = m_utilColEIter_.next();
1701                if (order == CollationElementIterator.NULLORDER) {
1702                break;
1703                }
1704                tok.m_expCE_[tok.m_expCELength_ ++] = order;
1705            }
1706            expOffset ++;
1707            len --;
1708            }
1709        }
1710        } 
1711        else {
1712        tok.m_expCELength_ = 0;
1713        }
1714    
1715        // set the ucaelement with obtained values
1716            m_utilElement_.m_CELength_ = tok.m_CELength_ + tok.m_expCELength_;
1717            
1718        // copy CEs
1719        System.arraycopy(tok.m_CE_, 0, m_utilElement_.m_CEs_, 0, 
1720                             tok.m_CELength_);
1721        System.arraycopy(tok.m_expCE_, 0, m_utilElement_.m_CEs_, 
1722                             tok.m_CELength_, tok.m_expCELength_);
1723    
1724        // copy UChars 
1725        // We kept prefix and source kind of together, as it is a kind of a 
1726        // contraction. 
1727        // However, now we have to slice the prefix off the main thing - 
1728        m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_;
1729        m_utilElement_.m_cPointsOffset_ = 0; //el.m_uchars_;
1730        if (tok.m_prefix_ != 0) { 
1731        // we will just copy the prefix here, and adjust accordingly in 
1732        // the addPrefix function in ucol_elm. The reason is that we 
1733        // need to add both composed AND decomposed elements to the 
1734        // unsafe table.
1735        int size = tok.m_prefix_ >> 24;
1736        int offset = tok.m_prefix_ & 0x00FFFFFF;
1737        m_utilElement_.m_prefixChars_ 
1738            = m_parser_.m_source_.substring(offset, offset + size);
1739        size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24); 
1740        offset = (tok.m_source_ & 0x00FFFFFF) + (tok.m_prefix_ >> 24);
1741        m_utilElement_.m_uchars_ 
1742            = m_parser_.m_source_.substring(offset, offset + size);
1743        } 
1744        else {
1745        m_utilElement_.m_prefixChars_ = null;
1746        int offset = tok.m_source_ & 0x00FFFFFF;
1747        int size = tok.m_source_ >>> 24;
1748        m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(offset, 
1749                                     offset + size);
1750        }
1751        m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
1752        for (int i = 0; i < m_utilElement_.m_cPoints_.length() 
1753             - m_utilElement_.m_cPointsOffset_; i ++) {
1754        if (isJamo(m_utilElement_.m_cPoints_.charAt(i))) {
1755            t.m_collator_.m_isJamoSpecial_ = true;
1756            break;
1757        }
1758        }
1759            
1760            /***
1761    
1762            // Case bits handling 
1763            m_utilElement_.m_CEs_[0] &= 0xFFFFFF3F; 
1764        // Clean the case bits field
1765            if (m_utilElement_.m_cPoints_.length() 
1766                - m_utilElement_.m_cPointsOffset_ > 1) {
1767        // Do it manually
1768        m_utilElement_.m_CEs_[0] 
1769        |= getCaseBits(m_utilElement_.m_cPoints_);
1770            } 
1771            else {
1772        // Copy it from the UCA
1773        int caseCE = getFirstCE(m_utilElement_.m_cPoints_.charAt(0));
1774        m_utilElement_.m_CEs_[0] |= (caseCE & 0xC0);
1775            }
1776    
1777            ***/
1778        // and then, add it
1779        addAnElement(t, m_utilElement_);
1780        tok = tok.m_next_;
1781    }   
1782    }
1783    
1784    /**
1785     * Testing if the string argument has case
1786     * @param src string
1787     * @return the case for this char array
1788     * @exception Exception thrown when internal program error occurs
1789     */
1790    private final int getCaseBits(String   src) throws Exception  
1791    {
1792    int uCount = 0; 
1793    int lCount = 0;
1794    src = Normalizer.decompose(src, true);
1795    m_utilColEIter_.setText(src);
1796    for (int i = 0; i < src.length(); i++) {
1797        m_utilColEIter_.setText(src.substring(i, i + 1));
1798        int order = m_utilColEIter_.next();
1799        if (RuleBasedCollator.isContinuation(order)) {
1800        throw new Exception  ("Internal program error");
1801        }
1802        if ((order & RuleBasedCollator.CE_CASE_BIT_MASK_)
1803        == UPPER_CASE_) {
1804        uCount ++;
1805        } 
1806        else {
1807        char ch = src.charAt(i);
1808        if (UCharacter.isLowerCase(ch)) {
1809            lCount ++;
1810        } 
1811        else {
1812            if (toSmallKana(ch) == ch && toLargeKana(ch) != ch) {
1813            lCount ++;
1814            }
1815        }
1816        }
1817    }
1818        
1819    if (uCount != 0 && lCount != 0) {
1820        return MIXED_CASE_;
1821    } 
1822    else if (uCount != 0) {
1823        return UPPER_CASE_;
1824    } 
1825    else {
1826        return LOWER_CASE_;
1827    }
1828    }
1829    
1830    /**
1831     * Converts a char to the uppercase Kana
1832     * @param ch character to convert
1833     * @return the converted Kana character
1834     */
1835    private static final char toLargeKana(char ch) 
1836    {
1837    if (0x3042 < ch && ch < 0x30ef) { // Kana range 
1838        switch (ch - 0x3000) {
1839        case 0x41: 
1840        case 0x43: 
1841        case 0x45: 
1842        case 0x47: 
1843        case 0x49: 
1844        case 0x63: 
1845        case 0x83: 
1846        case 0x85: 
1847        case 0x8E:
1848        case 0xA1: 
1849        case 0xA3: 
1850        case 0xA5: 
1851        case 0xA7: 
1852        case 0xA9: 
1853        case 0xC3: 
1854        case 0xE3: 
1855        case 0xE5: 
1856        case 0xEE:
1857        ch ++;
1858        break;
1859        case 0xF5:
1860        ch = 0x30AB;
1861        break;
1862        case 0xF6:
1863        ch = 0x30B1;
1864        break;
1865        }
1866    }
1867    return ch;
1868    }
1869    
1870    /**
1871     * Converts a char to the lowercase Kana
1872     * @param ch character to convert
1873     * @return the converted Kana character
1874     */
1875    private static final char toSmallKana(char ch) 
1876    {
1877    if (0x3042 < ch && ch < 0x30ef) { // Kana range
1878        switch (ch - 0x3000) {
1879        case 0x42: 
1880        case 0x44: 
1881        case 0x46: 
1882        case 0x48: 
1883        case 0x4A: 
1884        case 0x64: 
1885        case 0x84: 
1886        case 0x86: 
1887        case 0x8F:
1888        case 0xA2: 
1889        case 0xA4: 
1890        case 0xA6: 
1891        case 0xA8: 
1892        case 0xAA: 
1893        case 0xC4: 
1894        case 0xE4: 
1895        case 0xE6: 
1896        case 0xEF:
1897        ch --;
1898        break;
1899        case 0xAB:
1900        ch = 0x30F5;
1901        break;
1902        case 0xB1:
1903        ch = 0x30F6;
1904        break;
1905        }
1906    }
1907    return ch;
1908    }
1909
1910    /**
1911     * This should be connected to special Jamo handling.
1912     */
1913    private int getFirstCE(char ch) 
1914    {
1915        m_utilColEIter_.setText(UCharacter.toString(ch));
1916    return m_utilColEIter_.next();
1917    }
1918    
1919    /** 
1920     * This adds a read element, while testing for existence 
1921     * @param t build table
1922     * @param element 
1923     * @return ce
1924     */
1925    private int addAnElement(BuildTable t, Elements element) 
1926    {
1927        Vector   expansions = t.m_expansions_;
1928        element.m_mapCE_ = 0;
1929        
1930        if (element.m_CELength_ == 1) {
1931            element.m_mapCE_ = element.m_CEs_[0];
1932
1933        } else {     
1934        // unfortunately, it looks like we have to look for a long primary 
1935        // here since in canonical closure we are going to hit some long 
1936        // primaries from the first phase, and they will come back as 
1937        // continuations/expansions destroying the effect of the previous 
1938        // opitimization. A long primary is a three byte primary with 
1939        // starting secondaries and tertiaries. It can appear in long runs 
1940        // of only primary differences (like east Asian tailorings) also, 
1941        // it should not be an expansion, as expansions would break with 
1942        // this
1943        if (element.m_CELength_ == 2 // a two CE expansion 
1944        && RuleBasedCollator.isContinuation(element.m_CEs_[1]) 
1945        && (element.m_CEs_[1] 
1946            & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) 
1947        == 0 // that has only primaries in continuation
1948        && (((element.m_CEs_[0] >> 8) & 0xFF) 
1949            == RuleBasedCollator.BYTE_COMMON_) 
1950        // a common secondary
1951        && ((element.m_CEs_[0] & 0xFF) 
1952            == RuleBasedCollator.BYTE_COMMON_) // and a common tertiary
1953        ) {
1954        element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_ 
1955            // a long primary special
1956            | (CE_LONG_PRIMARY_TAG_ << 24) 
1957            // first and second byte of primary
1958            | ((element.m_CEs_[0] >> 8) & 0xFFFF00) 
1959            // third byte of primary
1960            | ((element.m_CEs_[1] >> 24) & 0xFF);   
1961        } 
1962        else {
1963                // omitting expansion offset in builder
1964                // (HEADER_SIZE_ >> 2)
1965        int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ 
1966            | (CE_EXPANSION_TAG_ 
1967               << RuleBasedCollator.CE_TAG_SHIFT_) 
1968            | (addExpansion(expansions, element.m_CEs_[0])
1969               << 4) & 0xFFFFF0;
1970    
1971        for (int i = 1; i < element.m_CELength_; i ++) {
1972            addExpansion(expansions, element.m_CEs_[i]);
1973        }
1974        if (element.m_CELength_ <= 0xF) {
1975            expansion |= element.m_CELength_;
1976        } 
1977        else {
1978            addExpansion(expansions, 0);
1979        }
1980        element.m_mapCE_ = expansion;
1981        setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1],
1982                (byte)element.m_CELength_, 
1983                t.m_maxExpansions_);
1984        if (isJamo(element.m_cPoints_.charAt(0))){
1985            t.m_collator_.m_isJamoSpecial_ = true;
1986            setMaxJamoExpansion(element.m_cPoints_.charAt(0),
1987                    element.m_CEs_[element.m_CELength_ 
1988                              - 1],
1989                    (byte)element.m_CELength_,
1990                    t.m_maxJamoExpansions_);
1991        }
1992        }
1993    }
1994        
1995        // We treat digits differently - they are "uber special" and should be
1996        // processed differently if numeric collation is on. 
1997        int uniChar = 0;
1998        if ((element.m_uchars_.length() == 2) 
1999            && UTF16.isLeadSurrogate(element.m_uchars_.charAt(0))) {
2000            uniChar = UCharacterProperty.getRawSupplementary(
2001                                 element.m_uchars_.charAt(0), 
2002                                 element.m_uchars_.charAt(1));      
2003        } 
2004        else if (element.m_uchars_.length() == 1) {
2005            uniChar = element.m_uchars_.charAt(0);
2006        }
2007        
2008        // Here, we either have one normal CE OR mapCE is set. Therefore, we 
2009        // stuff only one element to the expansion buffer. When we encounter a 
2010        // digit and we don't do numeric collation, we will just pick the CE 
2011        // we have and break out of case (see ucol.cpp ucol_prv_getSpecialCE 
2012        // && ucol_prv_getSpecialPrevCE). If we picked a special, further 
2013        // processing will occur. If it's a simple CE, we'll return due
2014        // to how the loop is constructed.
2015        if (uniChar != 0 && UCharacter.isDigit(uniChar)) {
2016            // prepare the element
2017            int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ 
2018        | (CollationElementIterator.CE_DIGIT_TAG_
2019           << RuleBasedCollator.CE_TAG_SHIFT_) | 1; 
2020            if (element.m_mapCE_ != 0) { 
2021                // if there is an expansion, we'll pick it here
2022                expansion |= (addExpansion(expansions, element.m_mapCE_) << 4);
2023            } 
2024            else {
2025                expansion |= (addExpansion(expansions, element.m_CEs_[0]) << 4);
2026            }
2027            element.m_mapCE_ = expansion;
2028        }
2029    
2030    // here we want to add the prefix structure.
2031    // I will try to process it as a reverse contraction, if possible.
2032    // prefix buffer is already reversed.
2033    
2034    if (element.m_prefixChars_ != null &&
2035            element.m_prefixChars_.length() - element.m_prefix_ > 0) {
2036        // We keep the seen prefix starter elements in a hashtable we need 
2037            // it to be able to distinguish between the simple codepoints and 
2038            // prefix starters. Also, we need to use it for canonical closure.
2039        m_utilElement2_.m_caseBit_ = element.m_caseBit_;
2040            m_utilElement2_.m_CELength_ = element.m_CELength_;
2041            m_utilElement2_.m_CEs_ = element.m_CEs_;
2042            m_utilElement2_.m_mapCE_ = element.m_mapCE_;
2043            //m_utilElement2_.m_prefixChars_ = element.m_prefixChars_;
2044            m_utilElement2_.m_sizePrim_ = element.m_sizePrim_;
2045            m_utilElement2_.m_sizeSec_ = element.m_sizeSec_;
2046            m_utilElement2_.m_sizeTer_ = element.m_sizeTer_;
2047            m_utilElement2_.m_variableTop_ = element.m_variableTop_;
2048            m_utilElement2_.m_prefix_ = element.m_prefix_;
2049            m_utilElement2_.m_prefixChars_ = Normalizer.compose(element.m_prefixChars_, false);
2050            m_utilElement2_.m_uchars_ = element.m_uchars_;
2051            m_utilElement2_.m_cPoints_ = element.m_cPoints_;
2052            m_utilElement2_.m_cPointsOffset_ = 0;
2053            
2054        if (t.m_prefixLookup_ != null) {
2055        Elements uCE = (Elements)t.m_prefixLookup_.get(element);
2056        if (uCE != null) { 
2057                    // there is already a set of code points here
2058            element.m_mapCE_ = addPrefix(t, uCE.m_mapCE_, element);
2059        } 
2060                else { // no code points, so this spot is clean
2061            element.m_mapCE_ = addPrefix(t, CE_NOT_FOUND_, element);
2062            uCE = new Elements(element);
2063            uCE.m_cPoints_ = uCE.m_uchars_;
2064            t.m_prefixLookup_.put(uCE, uCE);
2065        }
2066        if (m_utilElement2_.m_prefixChars_.length() 
2067            != element.m_prefixChars_.length() - element.m_prefix_
2068                    || !m_utilElement2_.m_prefixChars_.regionMatches(0,
2069                                     element.m_prefixChars_, element.m_prefix_,
2070                                     m_utilElement2_.m_prefixChars_.length())) {
2071            // do it!
2072                    m_utilElement2_.m_mapCE_ = addPrefix(t, element.m_mapCE_, 
2073                                                         m_utilElement2_);
2074        }
2075        }
2076    }
2077    
2078    // We need to use the canonical iterator here
2079    // the way we do it is to generate the canonically equivalent strings 
2080    // for the contraction and then add the sequences that pass FCD check
2081    if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1 
2082        && !(element.m_cPoints_.length() - element.m_cPointsOffset_ == 2 
2083         && UTF16.isLeadSurrogate(element.m_cPoints_.charAt(0)) 
2084         && UTF16.isTrailSurrogate(element.m_cPoints_.charAt(1)))) { 
2085            // this is a contraction, we should check whether a composed form 
2086            // should also be included
2087        m_utilCanIter_.setSource(element.m_cPoints_);
2088        String   source = m_utilCanIter_.next();
2089        while (source != null && source.length() > 0) {
2090        if (Normalizer.quickCheck(source, Normalizer.FCD,0) 
2091                    != Normalizer.NO) {
2092            element.m_uchars_ = source;
2093            element.m_cPoints_ = element.m_uchars_;
2094            finalizeAddition(t, element);
2095        }
2096        source = m_utilCanIter_.next();
2097        }
2098        
2099        return element.m_mapCE_;
2100    } 
2101        else {
2102        return finalizeAddition(t, element);  
2103    }
2104    }
2105    
2106    /**
2107     * Adds an expansion ce to the expansion vector
2108     * @param expansions vector to add to
2109     * @param value of the expansion
2110     * @return the current position of the new element
2111     */
2112    private static final int addExpansion(Vector   expansions, int value) 
2113    {
2114    expansions.add(new Integer  (value));
2115    return expansions.size() - 1;
2116    }
2117    
2118    /**
2119     * Looks for the maximum length of all expansion sequences ending with the 
2120     * same collation element. The size required for maxexpansion and maxsize 
2121     * is returned if the arrays are too small.
2122     * @param endexpansion the last expansion collation element to be added
2123     * @param expansionsize size of the expansion
2124     * @param maxexpansion data structure to store the maximum expansion data.
2125     * @returns size of the maxexpansion and maxsize used.
2126     */
2127    private static int setMaxExpansion(int endexpansion, byte expansionsize,
2128                       MaxExpansionTable maxexpansion)
2129    {
2130    int start = 0;
2131    int limit = maxexpansion.m_endExpansionCE_.size();
2132        long unsigned = (long)endexpansion;
2133        unsigned &= 0xFFFFFFFFl;
2134    
2135    // using binary search to determine if last expansion element is 
2136    // already in the array 
2137    int result = -1;
2138    while (start < limit - 1) {                                                
2139        int mid = start + ((limit - start) >> 1);                                    
2140            long unsignedce = ((Integer  )maxexpansion.m_endExpansionCE_.get(
2141                                       mid)).intValue(); 
2142            unsignedce &= 0xFFFFFFFFl;
2143        if (unsigned <= unsignedce) {                                                   
2144        limit = mid;                                                           
2145        }                                                                        
2146        else {                                                                   
2147        start = mid;                                                           
2148        }                                                                        
2149    } 
2150          
2151    if (((Integer  )maxexpansion.m_endExpansionCE_.get(start)).intValue() 
2152        == endexpansion) {                                                     
2153        result = start;  
2154    }                                                                          
2155    else if (((Integer  )maxexpansion.m_endExpansionCE_.get(limit)).intValue() 
2156         == endexpansion) {                                                     
2157        result = limit;      
2158    }                                            
2159    if (result > -1) {
2160        // found the ce in expansion, we'll just modify the size if it 
2161        // is smaller
2162        Object   currentsize = maxexpansion.m_expansionCESize_.get(result);
2163        if (((Byte  )currentsize).byteValue() < expansionsize) {
2164        maxexpansion.m_expansionCESize_.set(result, 
2165                            new Byte  (expansionsize));
2166        }
2167    }
2168    else {
2169        // we'll need to squeeze the value into the array. initial 
2170        // implementation. shifting the subarray down by 1
2171        maxexpansion.m_endExpansionCE_.insertElementAt(
2172                                                   new Integer  (endexpansion),
2173                                                   start + 1);
2174        maxexpansion.m_expansionCESize_.insertElementAt(
2175                                new Byte  (expansionsize),
2176                                start + 1);
2177    }
2178    return maxexpansion.m_endExpansionCE_.size();
2179    }
2180    
2181    /**
2182     * Sets the maximum length of all jamo expansion sequences ending with the 
2183     * same collation element. The size required for maxexpansion and maxsize 
2184     * is returned if the arrays are too small.
2185     * @param ch the jamo codepoint
2186     * @param endexpansion the last expansion collation element to be added
2187     * @param expansionsize size of the expansion
2188     * @param maxexpansion data structure to store the maximum expansion data.
2189     * @returns size of the maxexpansion and maxsize used.
2190     */
2191    private static int setMaxJamoExpansion(char ch, int endexpansion,
2192                       byte expansionsize,
2193                       MaxJamoExpansionTable maxexpansion)
2194    {
2195    boolean isV = true;
2196    if (ch >= 0x1100 && ch <= 0x1112) {
2197        // determines L for Jamo, doesn't need to store this since it is 
2198        // never at the end of a expansion
2199        if (maxexpansion.m_maxLSize_ < expansionsize) {
2200        maxexpansion.m_maxLSize_ = expansionsize;
2201        }
2202        return maxexpansion.m_endExpansionCE_.size();
2203    }
2204    
2205    if (ch >= 0x1161 && ch <= 0x1175) {
2206        // determines V for Jamo
2207        if (maxexpansion.m_maxVSize_ < expansionsize) {
2208        maxexpansion.m_maxVSize_ = expansionsize;
2209        }
2210    }
2211    
2212    if (ch >= 0x11A8 && ch <= 0x11C2) {
2213        isV = false;
2214        // determines T for Jamo
2215        if (maxexpansion.m_maxTSize_ < expansionsize) {
2216        maxexpansion.m_maxTSize_ = expansionsize;
2217        }
2218    }
2219
2220        int pos = maxexpansion.m_endExpansionCE_.size();    
2221    while (pos > 0) {
2222        pos --;
2223        if (((Integer  )maxexpansion.m_endExpansionCE_.get(pos)).intValue() 
2224        == endexpansion) {
2225        return maxexpansion.m_endExpansionCE_.size();
2226        }
2227    }
2228    maxexpansion.m_endExpansionCE_.add(new Integer  (endexpansion));
2229    maxexpansion.m_isV_.add(new Boolean  (isV));
2230          
2231    return maxexpansion.m_endExpansionCE_.size();
2232    }
2233    
2234    /**
2235     * Adds a prefix to the table
2236     * @param t build table to update
2237     * @param CE collation element to add
2238     * @param element rule element to add
2239     * @return modified ce
2240     */
2241    private int addPrefix(BuildTable t, int CE, Elements element) 
2242    {
2243    // currently the longest prefix we're supporting in Japanese is two 
2244    // characters long. Although this table could quite easily mimic 
2245    // complete contraction stuff there is no good reason to make a general 
2246    // solution, as it would require some error prone messing.
2247    ContractionTable contractions = t.m_contractions_;
2248    String   oldCP = element.m_cPoints_;
2249    int oldCPOffset = element.m_cPointsOffset_;
2250        
2251    contractions.m_currentTag_ = CE_SPEC_PROC_TAG_;
2252    // here, we will normalize & add prefix to the table.
2253    int size = element.m_prefixChars_.length() - element.m_prefix_;
2254    for (int j = 1; j < size; j ++) {   
2255        // First add NFD prefix chars to unsafe CP hash table 
2256        // Unless it is a trail surrogate, which is handled algoritmically 
2257        // and shouldn't take up space in the table.
2258        char ch = element.m_prefixChars_.charAt(j + element.m_prefix_);
2259        if (!UTF16.isTrailSurrogate(ch)) {
2260        unsafeCPSet(t.m_unsafeCP_, ch);
2261        }
2262    }
2263        
2264    // StringBuffer reversed = new StringBuffer();
2265        m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
2266    for (int j = 0; j < size; j ++) { 
2267        // prefixes are going to be looked up backwards
2268        // therefore, we will promptly reverse the prefix buffer...
2269        int offset = element.m_prefixChars_.length() - j - 1;
2270        m_utilStringBuffer_.append(element.m_prefixChars_.charAt(offset));
2271    }
2272    element.m_prefixChars_ = m_utilStringBuffer_.toString();
2273    element.m_prefix_ = 0;
2274    
2275    // the first codepoint is also unsafe, as it forms a 'contraction' with 
2276    // the prefix
2277    if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(0))) {
2278        unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_.charAt(0));
2279    }
2280        
2281    element.m_cPoints_ = element.m_prefixChars_;
2282    element.m_cPointsOffset_ = element.m_prefix_;
2283    
2284    // Add the last char of the contraction to the contraction-end hash 
2285    // table. unless it is a trail surrogate, which is handled 
2286    // algorithmically and shouldn't be in the table
2287    if (!UTF16.isTrailSurrogate(
2288                    element.m_cPoints_.charAt(element.m_cPoints_.length() - 1))) {
2289        ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_.charAt(
2290                                     element.m_cPoints_.length() - 1));
2291    }
2292    // First we need to check if contractions starts with a surrogate
2293    // int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);
2294    
2295    // If there are any Jamos in the contraction, we should turn on special 
2296    // processing for Jamos
2297    if (isJamo(element.m_prefixChars_.charAt(element.m_prefix_))) {
2298        t.m_collator_.m_isJamoSpecial_ = true;
2299    }
2300    // then we need to deal with it 
2301    // we could aready have something in table - or we might not 
2302    if (!isPrefix(CE)) { 
2303        // if it wasn't contraction, we wouldn't end up here
2304        int firstContractionOffset = addContraction(contractions, 
2305                            CONTRACTION_TABLE_NEW_ELEMENT_, 
2306                            (char)0, CE);
2307        int newCE = processContraction(contractions, element, 
2308                       CE_NOT_FOUND_);
2309        addContraction(contractions, firstContractionOffset, 
2310                           element.m_prefixChars_.charAt(element.m_prefix_), 
2311                           newCE);
2312        addContraction(contractions, firstContractionOffset, (char)0xFFFF, 
2313                           CE);
2314        CE = constructSpecialCE(CE_SPEC_PROC_TAG_, firstContractionOffset);
2315    } 
2316    else { 
2317        // we are adding to existing contraction 
2318        // there were already some elements in the table, so we need to add 
2319        // a new contraction 
2320        // Two things can happen here: either the codepoint is already in 
2321        // the table, or it is not
2322        char ch = element.m_prefixChars_.charAt(element.m_prefix_);
2323        int position = findCP(contractions, CE, ch);
2324        if (position > 0) {       
2325        // if it is we just continue down the chain 
2326        int eCE = getCE(contractions, CE, position);
2327        int newCE = processContraction(contractions, element, eCE);
2328        setContraction(contractions, CE, position, ch, newCE);
2329        } 
2330        else {                  
2331        // if it isn't, we will have to create a new sequence 
2332        processContraction(contractions, element, CE_NOT_FOUND_);
2333        insertContraction(contractions, CE, ch, element.m_mapCE_);
2334        }
2335    }
2336    
2337    element.m_cPoints_ = oldCP;
2338    element.m_cPointsOffset_ = oldCPOffset;
2339    
2340    return CE;
2341    }
2342    
2343    /**
2344     * Checks if the argument ce is a contraction
2345     * @param CE collation element
2346     * @return true if argument ce is a contraction
2347     */
2348    private static final boolean isContraction(int CE) 
2349    {
2350    return isSpecial(CE) && (getCETag(CE) == CE_CONTRACTION_TAG_);
2351    }
2352    
2353    /**
2354     * Checks if the argument ce has a prefix
2355     * @param CE collation element
2356     * @return true if argument ce has a prefix
2357     */
2358    private static final boolean isPrefix(int CE) 
2359    {
2360    return isSpecial(CE) && (getCETag(CE) == CE_SPEC_PROC_TAG_);
2361    }
2362    
2363    /**
2364     * Checks if the argument ce is special
2365     * @param CE collation element
2366     * @return true if argument ce is special
2367     */
2368    private static final boolean isSpecial(int CE) 
2369    {
2370    return (CE & RuleBasedCollator.CE_SPECIAL_FLAG_) == 0xF0000000;
2371    }
2372    
2373    /**
2374     * Checks if the argument ce has a prefix
2375     * @param CE collation element
2376     * @return true if argument ce has a prefix
2377     */
2378    private static final int getCETag(int CE) 
2379    {
2380    return (CE & RuleBasedCollator.CE_TAG_MASK_) >>> 
2381        RuleBasedCollator.CE_TAG_SHIFT_;
2382    }
2383    
2384    /**
2385     * Gets the ce at position in contraction table
2386     * @param table contraction table
2387     * @param position offset to the contraction table
2388     * @return ce
2389     */
2390    private static final int getCE(ContractionTable table, int element, 
2391                   int position) 
2392    {
2393    element &= 0xFFFFFF;
2394        BasicContractionTable tbl = getBasicContractionTable(table, element);
2395        
2396        if (tbl == null) {
2397            return CE_NOT_FOUND_;
2398        }
2399    if (position > tbl.m_CEs_.size() || position == -1) {
2400        return CE_NOT_FOUND_;
2401    } 
2402    else {
2403        return ((Integer  )tbl.m_CEs_.get(position)).intValue();
2404    }
2405    }
2406    
2407    /**
2408     * Sets the unsafe character
2409     * @param table unsafe table
2410     * @param c character to be added
2411     */
2412    private static final void unsafeCPSet(byte table[], char c) 
2413    {
2414    int hash = c;
2415    if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
2416        if (hash >= 0xd800 && hash <= 0xf8ff) {
2417        // Part of a surrogate, or in private use area. 
2418        // These don't go in the table                            
2419        return;
2420        }
2421        hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
2422    }
2423    table[hash >> 3] |= (1 << (hash & 7));
2424    }
2425    
2426    /**
2427     * Sets the contraction end character
2428     * @param table contraction end table
2429     * @param c character to be added
2430     */
2431    private static final void ContrEndCPSet(byte table[], char c) 
2432    {
2433    int hash = c;
2434    if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
2435        hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
2436    }
2437    table[hash >> 3] |= (1 << (hash & 7));
2438    }
2439    
2440    /** 
2441     * Adds more contractions in table. If element is non existant, it creates 
2442     * on. Returns element handle 
2443     * @param table contraction table
2444     * @param element offset to the contraction table
2445     * @param codePoint codepoint to add
2446     * @param value
2447     * @return collation element
2448     */
2449    private static int addContraction(ContractionTable table, int element, 
2450                                      char codePoint, int value) 
2451    {
2452    BasicContractionTable tbl = getBasicContractionTable(table, element);
2453    if (tbl == null) {
2454        tbl = addAContractionElement(table);
2455        element = table.m_elements_.size() - 1;
2456    } 
2457    
2458    tbl.m_CEs_.add(new Integer  (value));
2459    tbl.m_codePoints_.append(codePoint);
2460    return constructSpecialCE(table.m_currentTag_, element);
2461    }
2462
2463    /**
2464     * Adds a contraction element to the table
2465     * @param table contraction table to update
2466     * @return contraction 
2467     */
2468    private static BasicContractionTable addAContractionElement(
2469                                ContractionTable table) 
2470    {
2471    BasicContractionTable result = new BasicContractionTable();
2472    table.m_elements_.add(result);
2473    return result;
2474    }
2475
2476    /**
2477     * Constructs a special ce
2478     * @param tag special tag
2479     * @param CE collation element 
2480     * @return a contraction ce
2481     */
2482    private static final int constructSpecialCE(int tag, int CE) 
2483    {
2484    return RuleBasedCollator.CE_SPECIAL_FLAG_ 
2485        | (tag << RuleBasedCollator.CE_TAG_SHIFT_) | (CE & 0xFFFFFF);
2486    }
2487    
2488    /**
2489     * Sets and inserts the element that has a contraction
2490     * @param contractions contraction table 
2491     * @param element contracting element
2492     * @param existingCE
2493     * @return contraction ce
2494     */
2495    private static int processContraction(ContractionTable contractions, 
2496                      Elements element, 
2497                      int existingCE) 
2498    {
2499    int firstContractionOffset = 0;
2500    // end of recursion 
2501    if (element.m_cPoints_.length() - element.m_cPointsOffset_ == 1) {
2502        if (isContractionTableElement(existingCE) 
2503        && getCETag(existingCE) == contractions.m_currentTag_) {
2504        changeContraction(contractions, existingCE, (char)0, 
2505                  element.m_mapCE_);
2506        changeContraction(contractions, existingCE, (char)0xFFFF,
2507                                  element.m_mapCE_);
2508        return existingCE;
2509        } 
2510        else {
2511        // can't do just that. existingCe might be a contraction, 
2512        // meaning that we need to do another step
2513        return element.m_mapCE_; 
2514        }
2515    }
2516    
2517    // this recursion currently feeds on the only element we have... 
2518    // We will have to copy it in order to accomodate for both backward 
2519    // and forward cycles
2520    // we encountered either an empty space or a non-contraction element 
2521    // this means we are constructing a new contraction sequence 
2522    element.m_cPointsOffset_ ++;
2523    if (!isContractionTableElement(existingCE)) { 
2524        // if it wasn't contraction, we wouldn't end up here
2525        firstContractionOffset = addContraction(contractions, 
2526                            CONTRACTION_TABLE_NEW_ELEMENT_, 
2527                            (char)0, existingCE);
2528        int newCE = processContraction(contractions, element, 
2529                       CE_NOT_FOUND_);
2530        addContraction(contractions, firstContractionOffset, 
2531               element.m_cPoints_.charAt(element.m_cPointsOffset_), 
2532               newCE);
2533        addContraction(contractions, firstContractionOffset, 
2534               (char)0xFFFF, existingCE);
2535        existingCE = constructSpecialCE(contractions.m_currentTag_, 
2536                        firstContractionOffset);
2537    } 
2538    else { 
2539        // we are adding to existing contraction
2540        // there were already some elements in the table, so we need to add 
2541        // a new contraction 
2542        // Two things can happen here: either the codepoint is already in 
2543        // the table, or it is not
2544        int position = findCP(contractions, existingCE, 
2545                  element.m_cPoints_.charAt(element.m_cPointsOffset_));
2546        if (position > 0) {       
2547        // if it is we just continue down the chain 
2548        int eCE = getCE(contractions, existingCE, position);
2549        int newCE = processContraction(contractions, element, eCE);
2550        setContraction(contractions, existingCE, position, 
2551                           element.m_cPoints_.charAt(element.m_cPointsOffset_), 
2552                   newCE);
2553        } 
2554        else {  
2555        // if it isn't, we will have to create a new sequence 
2556        int newCE = processContraction(contractions, element, 
2557                           CE_NOT_FOUND_);
2558        insertContraction(contractions, existingCE, 
2559                  element.m_cPoints_.charAt(element.m_cPointsOffset_), 
2560                  newCE);
2561        }
2562    }
2563    element.m_cPointsOffset_ --;
2564    return existingCE;
2565    }
2566    
2567    /**
2568     * Checks if CE belongs to the contraction table
2569     * @param CE collation element to test
2570     * @return true if CE belongs to the contraction table
2571     */
2572    private static final boolean isContractionTableElement(int CE) 
2573    { 
2574    return isSpecial(CE) 
2575        && (getCETag(CE) == CE_CONTRACTION_TAG_
2576        || getCETag(CE) == CE_SPEC_PROC_TAG_);
2577    }
2578    
2579    /**
2580     * Gets the codepoint 
2581     * @param table contraction table
2582     * @param element offset to the contraction element in the table
2583     * @param codePoint code point to look for
2584     * @return the offset to the code point
2585     */
2586    private static int findCP(ContractionTable table, int element, 
2587                  char codePoint) 
2588    {
2589    BasicContractionTable tbl = getBasicContractionTable(table, element);
2590    if (tbl == null) {
2591        return -1;
2592    }
2593    
2594    int position = 0;
2595    while (codePoint > tbl.m_codePoints_.charAt(position)) {
2596        position ++;
2597        if (position > tbl.m_codePoints_.length()) {
2598        return -1;
2599        }
2600    }
2601    if (codePoint == tbl.m_codePoints_.charAt(position)) {
2602        return position;
2603    } 
2604    else {
2605        return -1;
2606    }
2607    }
2608
2609    /**
2610     * Gets the contraction element out of the contraction table
2611     * @param table contraction table
2612     * @param offset to the element in the contraction table
2613     * @return basic contraction element at offset in the contraction table
2614     */
2615    private static final BasicContractionTable getBasicContractionTable(
2616                                    ContractionTable table,
2617                                    int offset) 
2618    {
2619        offset &= 0xFFFFFF;
2620        if (offset == 0xFFFFFF) {
2621        return null;
2622        }
2623    return (BasicContractionTable)table.m_elements_.get(offset);
2624    }
2625    
2626    /**
2627     * Changes the contraction element
2628     * @param table contraction table
2629     * @param element offset to the element in the contraction table
2630     * @param codePoint codepoint 
2631     * @param newCE new collation element
2632     * @return basic contraction element at offset in the contraction table
2633     */
2634    private static final int changeContraction(ContractionTable table, 
2635                                               int element, char codePoint, 
2636                                               int newCE) 
2637    {
2638    BasicContractionTable tbl = getBasicContractionTable(table, element);    
2639    if (tbl == null) {
2640        return 0;
2641    }
2642    int position = 0;
2643    while (codePoint > tbl.m_codePoints_.charAt(position)) {
2644        position ++;
2645        if (position > tbl.m_codePoints_.length()) {
2646        return CE_NOT_FOUND_;
2647        }
2648    }
2649    if (codePoint == tbl.m_codePoints_.charAt(position)) {
2650        tbl.m_CEs_.set(position, new Integer  (newCE));
2651        return element & 0xFFFFFF;
2652    } 
2653    else {
2654        return CE_NOT_FOUND_;
2655    }
2656    }
2657    
2658    /** 
2659     * Sets a part of contraction sequence in table. If element is non 
2660     * existant, it creates on. Returns element handle.
2661     * @param table contraction table
2662     * @param element offset to the contraction table
2663     * @param offset
2664     * @param codePoint contraction character
2665     * @param value ce value
2666     * @return new contraction ce
2667     */
2668    private static final int setContraction(ContractionTable table, 
2669                                            int element, int offset, 
2670                                            char codePoint, int value) 
2671    {
2672        element &= 0xFFFFFF;
2673    BasicContractionTable tbl = getBasicContractionTable(table, element);    
2674    if (tbl == null) {
2675        tbl = addAContractionElement(table);
2676        element = table.m_elements_.size() - 1;
2677    }
2678    
2679    tbl.m_CEs_.set(offset, new Integer  (value));
2680    tbl.m_codePoints_.setCharAt(offset, codePoint);
2681    return constructSpecialCE(table.m_currentTag_, element);
2682    }
2683    
2684    /** 
2685     * Inserts a part of contraction sequence in table. Sequences behind the 
2686     * offset are moved back. If element is non existent, it creates on. 
2687     * @param table contraction
2688     * @param element offset to the table contraction
2689     * @param codePoint code point
2690     * @param value collation element value
2691     * @return contraction collation element
2692     */
2693    private static final int insertContraction(ContractionTable table, 
2694                                               int element, char codePoint, 
2695                                               int value) 
2696    {
2697    element &= 0xFFFFFF;
2698    BasicContractionTable tbl = getBasicContractionTable(table, element);
2699    if (tbl == null) {
2700        tbl = addAContractionElement(table);
2701        element = table.m_elements_.size() - 1;
2702    }
2703    
2704    int offset = 0;
2705    while (tbl.m_codePoints_.charAt(offset) < codePoint 
2706           && offset < tbl.m_codePoints_.length()) {
2707        offset ++;
2708    }
2709    
2710    tbl.m_CEs_.insertElementAt(new Integer  (value), offset);
2711    tbl.m_codePoints_.insert(offset, codePoint);
2712    
2713    return constructSpecialCE(table.m_currentTag_, element);
2714    }
2715    
2716    /**
2717     * Finalize addition
2718     * @param t build table
2719     * @param element to add
2720     */
2721    private final static int finalizeAddition(BuildTable t, Elements element) 
2722    {
2723    int CE = CE_NOT_FOUND_;
2724        // This should add a completely ignorable element to the  
2725        // unsafe table, so that backward iteration will skip 
2726        // over it when treating contractions. 
2727        if (element.m_mapCE_ == 0) { 
2728            for (int i = 0; i < element.m_cPoints_.length(); i ++) { 
2729                char ch = element.m_cPoints_.charAt(i);
2730                if (!UTF16.isTrailSurrogate(ch)) { 
2731                    unsafeCPSet(t.m_unsafeCP_, ch); 
2732                } 
2733            } 
2734        } 
2735
2736    if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1) { 
2737        // we're adding a contraction
2738        int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);
2739        CE = t.m_mapping_.getValue(cp);
2740        CE = addContraction(t, CE, element);
2741    } 
2742    else { 
2743        // easy case
2744        CE = t.m_mapping_.getValue(element.m_cPoints_.charAt(
2745                                 element.m_cPointsOffset_));
2746        
2747        if (CE != CE_NOT_FOUND_) {
2748        if(isContractionTableElement(CE)) { 
2749            // adding a non contraction element (thai, expansion, 
2750            // single) to already existing contraction 
2751            if (!isPrefix(element.m_mapCE_)) { 
2752            // we cannot reenter prefix elements - as we are going 
2753            // to create a dead loop
2754            // Only expansions and regular CEs can go here... 
2755            // Contractions will never happen in this place
2756            setContraction(t.m_contractions_, CE, 0, (char)0, 
2757                       element.m_mapCE_);
2758            // This loop has to change the CE at the end of 
2759            // contraction REDO!
2760            changeLastCE(t.m_contractions_, CE, element.m_mapCE_);
2761            }
2762        } 
2763        else {
2764            t.m_mapping_.setValue(element.m_cPoints_.charAt(
2765                                    element.m_cPointsOffset_), 
2766                      element.m_mapCE_);
2767        }
2768        } 
2769        else {
2770        t.m_mapping_.setValue(element.m_cPoints_.charAt(
2771                                element.m_cPointsOffset_), 
2772                                      element.m_mapCE_);
2773        }
2774    }
2775    return CE;
2776    }
2777    
2778    /** 
2779     * Note regarding surrogate handling: We are interested only in the single
2780     * or leading surrogates in a contraction. If a surrogate is somewhere else
2781     * in the contraction, it is going to be handled as a pair of code units,
2782     * as it doesn't affect the performance AND handling surrogates specially
2783     * would complicate code way too much.
2784     */
2785    private static int addContraction(BuildTable t, int CE, Elements element) 
2786    {
2787    ContractionTable contractions = t.m_contractions_;
2788    contractions.m_currentTag_ = CE_CONTRACTION_TAG_;
2789    
2790    // First we need to check if contractions starts with a surrogate
2791    int cp = UTF16.charAt(element.m_cPoints_, 0);
2792    int cpsize = 1;
2793    if (UCharacter.isSupplementary(cp)) {
2794        cpsize = 2;
2795    }
2796    if (cpsize < element.m_cPoints_.length()) { 
2797        // This is a real contraction, if there are other characters after 
2798        // the first
2799        int size = element.m_cPoints_.length() - element.m_cPointsOffset_;
2800        for (int j = 1; j < size; j ++) {   
2801        // First add contraction chars to unsafe CP hash table 
2802        // Unless it is a trail surrogate, which is handled 
2803        // algoritmically and shouldn't take up space in the table.
2804        if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(
2805                                      element.m_cPointsOffset_ + j))) {
2806            unsafeCPSet(t.m_unsafeCP_, 
2807                element.m_cPoints_.charAt(
2808                              element.m_cPointsOffset_ + j));
2809        }
2810        }
2811        // Add the last char of the contraction to the contraction-end 
2812        // hash table. unless it is a trail surrogate, which is handled 
2813        // algorithmically and shouldn't be in the table
2814        if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(
2815                                  element.m_cPoints_.length() -1))) {
2816        ContrEndCPSet(t.m_contrEndCP_, 
2817                  element.m_cPoints_.charAt(
2818                            element.m_cPoints_.length() -1));
2819        }
2820    
2821        // If there are any Jamos in the contraction, we should turn on 
2822        // special processing for Jamos
2823        if (isJamo(element.m_cPoints_.charAt(element.m_cPointsOffset_))) {
2824        t.m_collator_.m_isJamoSpecial_ = true;
2825        }
2826        // then we need to deal with it 
2827        // we could aready have something in table - or we might not 
2828        element.m_cPointsOffset_ += cpsize;
2829        if (!isContraction(CE)) { 
2830        // if it wasn't contraction, we wouldn't end up here
2831        int firstContractionOffset = addContraction(contractions, 
2832                                CONTRACTION_TABLE_NEW_ELEMENT_, (char)0, CE);
2833        int newCE = processContraction(contractions, element, 
2834                           CE_NOT_FOUND_);
2835        addContraction(contractions, firstContractionOffset, 
2836                   element.m_cPoints_.charAt(element.m_cPointsOffset_), 
2837                   newCE);
2838        addContraction(contractions, firstContractionOffset, 
2839                               (char)0xFFFF, CE);
2840        CE = constructSpecialCE(CE_CONTRACTION_TAG_, 
2841                    firstContractionOffset);
2842        } 
2843        else { 
2844        // we are adding to existing contraction 
2845        // there were already some elements in the table, so we need to 
2846        // add a new contraction
2847        // Two things can happen here: either the codepoint is already 
2848        // in the table, or it is not 
2849        int position = findCP(contractions, CE, 
2850                      element.m_cPoints_.charAt(element.m_cPointsOffset_));
2851        if (position > 0) {       
2852            // if it is we just continue down the chain
2853            int eCE = getCE(contractions, CE, position);
2854            int newCE = processContraction(contractions, element, eCE);
2855            setContraction(contractions, CE, position, 
2856                   element.m_cPoints_.charAt(element.m_cPointsOffset_), 
2857                   newCE);
2858        } 
2859        else {                  
2860            // if it isn't, we will have to create a new sequence 
2861            int newCE = processContraction(contractions, element, 
2862                           CE_NOT_FOUND_);
2863            insertContraction(contractions, CE, 
2864                      element.m_cPoints_.charAt(element.m_cPointsOffset_), 
2865                                      newCE);
2866        }
2867        }
2868        element.m_cPointsOffset_ -= cpsize;
2869        t.m_mapping_.setValue(cp, CE);
2870    } 
2871    else if (!isContraction(CE)) { 
2872        // this is just a surrogate, and there is no contraction 
2873        t.m_mapping_.setValue(cp, element.m_mapCE_);
2874    } 
2875    else { 
2876        // fill out the first stage of the contraction with the surrogate 
2877        // CE 
2878        changeContraction(contractions, CE, (char)0, element.m_mapCE_);
2879        changeContraction(contractions, CE, (char)0xFFFF, element.m_mapCE_);
2880    }
2881    return CE;
2882    }
2883    
2884    /** 
2885     * this is for adding non contractions 
2886     * @param table contraction table
2887     * @param element offset to the contraction table
2888     * @param value collation element value
2889     * @return new collation element 
2890     */
2891    private static final int changeLastCE(ContractionTable table, int element, 
2892                                          int value) 
2893    {
2894    BasicContractionTable tbl = getBasicContractionTable(table, element);
2895    if (tbl == null) {
2896        return 0;
2897    }
2898    
2899    tbl.m_CEs_.set(tbl.m_CEs_.size() - 1, new Integer  (value));
2900    return constructSpecialCE(table.m_currentTag_, element & 0xFFFFFF);
2901    }
2902    
2903    /**
2904     * Given a set of ranges calculated by allocWeights(), iterate through the 
2905     * weights. Sets the next weight in cegenerator.m_current_.
2906     * @param cegenerator object that contains ranges weight range array and
2907     *        its rangeCount
2908     * @return the next weight
2909     */
2910    private static int nextWeight(CEGenerator cegenerator) 
2911    {
2912        if (cegenerator.m_rangesLength_ > 0) {
2913            // get maxByte from the .count field
2914            int maxByte = cegenerator.m_ranges_[0].m_count_;
2915            // get the next weight 
2916            int weight = cegenerator.m_ranges_[0].m_start_;
2917            if (weight == cegenerator.m_ranges_[0].m_end_) {
2918                // this range is finished, remove it and move the following 
2919                // ones up 
2920                cegenerator.m_rangesLength_ --;
2921                if (cegenerator.m_rangesLength_ > 0) {
2922                    System.arraycopy(cegenerator.m_ranges_, 1, 
2923                                     cegenerator.m_ranges_, 0, 
2924                                     cegenerator.m_rangesLength_);
2925                    cegenerator.m_ranges_[0].m_count_ = maxByte; 
2926                    // keep maxByte in ranges[0]
2927                }
2928            } 
2929            else {
2930                // increment the weight for the next value
2931                cegenerator.m_ranges_[0].m_start_ 
2932            = incWeight(weight, cegenerator.m_ranges_[0].m_length2_, 
2933                maxByte);
2934            }
2935            return weight;
2936        }
2937        return -1;
2938    }
2939    
2940    /**
2941     * Increment the collation weight
2942     * @param weight to increment
2943     * @param length
2944     * @param maxByte
2945     * @return new incremented weight
2946     */
2947    private static final int incWeight(int weight, int length, int maxByte) 
2948    {
2949        while (true) {
2950            int b = getWeightByte(weight, length);
2951            if (b < maxByte) {
2952                return setWeightByte(weight, length, b + 1);
2953            } 
2954            else {
2955                // roll over, set this byte to BYTE_FIRST_TAILORED_ and 
2956                // increment the previous one
2957                weight = setWeightByte(weight, length, 
2958                                       RuleBasedCollator.BYTE_FIRST_TAILORED_);
2959                -- length;
2960            }
2961        }
2962    }
2963    
2964    /**
2965     * Gets the weight byte
2966     * @param weight
2967     * @param index
2968     * @return byte
2969     */
2970    private static final int getWeightByte(int weight, int index) 
2971    {
2972        return (weight >> ((4 - index) << 3)) & 0xff;
2973    }
2974    
2975    /**
2976     * Set the weight byte in table
2977     * @param weight 
2978     * @param index
2979     * @param b byte
2980     */
2981    private static final int setWeightByte(int weight, int index, int b) 
2982    {
2983        index <<= 3;
2984        // 0xffffffff except a 00 "hole" for the index-th byte
2985        int mask = 0xffffffff >>> index;
2986        index = 32 - index;
2987        mask |= 0xffffff00 << index;
2988        return (weight & mask) | (b << index);
2989    }
2990    
2991    /**
2992     * Call getWeightRanges and then determine heuristically which ranges to 
2993     * use for a given number of weights between (excluding) two limits
2994     * @param lowerLimit
2995     * @param upperLimit
2996     * @param n
2997     * @param maxByte
2998     * @param ranges
2999     * @return
3000     */
3001    private int allocateWeights(int lowerLimit, int upperLimit, int n,
3002                                int maxByte, WeightRange ranges[]) 
3003    {
3004        // number of usable byte values 3..maxByte
3005        int countBytes = maxByte - RuleBasedCollator.BYTE_FIRST_TAILORED_ + 1;
3006        // [0] unused, [5] to make index checks unnecessary, m_utilCountBuffer_
3007        // countBytes to the power of index, m_utilLongBuffer_ for unsignedness
3008        // gcc requires explicit initialization 
3009        m_utilLongBuffer_[0] = 1;
3010        m_utilLongBuffer_[1] = countBytes;
3011        m_utilLongBuffer_[2] = m_utilLongBuffer_[1] * countBytes;
3012        m_utilLongBuffer_[3] = m_utilLongBuffer_[2] * countBytes;
3013        m_utilLongBuffer_[4] = m_utilLongBuffer_[3] * countBytes;
3014        int rangeCount = getWeightRanges(lowerLimit, upperLimit, maxByte, 
3015                                         countBytes, ranges);
3016        if (rangeCount <= 0) {
3017            return 0;
3018        }
3019        // what is the maximum number of weights with these ranges?
3020        long maxCount = 0;
3021        for (int i = 0; i < rangeCount; ++ i) {
3022            maxCount += (long)ranges[i].m_count_ 
3023        * m_utilLongBuffer_[4 - ranges[i].m_length_];
3024        }
3025        if (maxCount < n) {
3026            return 0;
3027        }
3028        // set the length2 and count2 fields
3029        for (int i = 0; i < rangeCount; ++ i) {
3030            ranges[i].m_length2_ = ranges[i].m_length_;
3031            ranges[i].m_count2_ = ranges[i].m_count_;
3032        }
3033        // try until we find suitably large ranges
3034        while (true) {
3035            // get the smallest number of bytes in a range
3036            int minLength = ranges[0].m_length2_;
3037            // sum up the number of elements that fit into ranges of each byte 
3038            // length
3039            Arrays.fill(m_utilCountBuffer_, 0);
3040            for (int i = 0; i < rangeCount; ++ i) {
3041                m_utilCountBuffer_[ranges[i].m_length2_] += ranges[i].m_count2_;
3042            }
3043            // now try to allocate n elements in the available short ranges 
3044            if (n <= m_utilCountBuffer_[minLength] 
3045        + m_utilCountBuffer_[minLength + 1]) {
3046                // trivial cases, use the first few ranges
3047                maxCount = 0;
3048                rangeCount = 0;
3049                do {
3050                    maxCount += ranges[rangeCount].m_count2_;
3051                    ++ rangeCount;
3052                } while (n > maxCount);
3053                break;
3054            } 
3055            else if (n <= ranges[0].m_count2_ * countBytes) {
3056                // easy case, just make this one range large enough by 
3057                // lengthening it once more, possibly split it
3058                rangeCount = 1;
3059                // calculate how to split the range between maxLength-1 
3060                // (count1) and maxLength (count2) 
3061                long power_1 
3062            = m_utilLongBuffer_[minLength - ranges[0].m_length_];
3063                long power = power_1 * countBytes;
3064                int count2 = (int)((n + power - 1) / power);
3065                int count1 = ranges[0].m_count_ - count2;
3066                // split the range
3067                if (count1 < 1) {
3068                    // lengthen the entire range to maxLength 
3069                    lengthenRange(ranges, 0, maxByte, countBytes);
3070                } 
3071                else {
3072                    // really split the range
3073                    // create a new range with the end and initial and current 
3074                    // length of the old one
3075                    rangeCount = 2;
3076                    ranges[1].m_end_ = ranges[0].m_end_;
3077                    ranges[1].m_length_ = ranges[0].m_length_;
3078                    ranges[1].m_length2_ = minLength;
3079                    // set the end of the first range according to count1
3080                    int i = ranges[0].m_length_;
3081                    int b = getWeightByte(ranges[0].m_start_, i) + count1 - 1;
3082                    // ranges[0].count and count1 may be >countBytes from 
3083                    // merging adjacent ranges; b > maxByte is possible
3084                    if (b <= maxByte) {
3085                        ranges[0].m_end_ = setWeightByte(ranges[0].m_start_, i, 
3086                                                         b);
3087                    } 
3088                    else {
3089                        ranges[0].m_end_ = setWeightByte(
3090                             incWeight(ranges[0].m_start_, i - 1, 
3091                                   maxByte), 
3092                             i, b - countBytes);
3093                    }
3094                    // set the bytes in the end weight at length + 1..length2 
3095                    // to maxByte
3096                    b = (maxByte << 24) | (maxByte << 16) | (maxByte << 8)
3097                        | maxByte; // this used to be 0xffffffff 
3098                    ranges[0].m_end_ = truncateWeight(ranges[0].m_end_, i) 
3099            | (b >>> (i << 3)) 
3100            & (b << ((4 - minLength) << 3));
3101                    // set the start of the second range to immediately follow 
3102                    // the end of the first one
3103                    ranges[1].m_start_ = incWeight(ranges[0].m_end_, minLength, 
3104                                                   maxByte);
3105                    // set the count values (informational)
3106                    ranges[0].m_count_ = count1;
3107                    ranges[1].m_count_ = count2;
3108    
3109                    ranges[0].m_count2_ = (int)(count1 * power_1);
3110                    // will be *countBytes when lengthened 
3111                    ranges[1].m_count2_ = (int)(count2 * power_1); 
3112    
3113                    // lengthen the second range to maxLength
3114                    lengthenRange(ranges, 1, maxByte, countBytes);
3115                }
3116                break;
3117            }
3118            // no good match, lengthen all minLength ranges and iterate 
3119            for (int i=0; ranges[i].m_length2_ == minLength; ++ i) {
3120                lengthenRange(ranges, i, maxByte, countBytes);
3121            }
3122        }
3123    
3124        if (rangeCount > 1) {
3125            // sort the ranges by weight values 
3126            Arrays.sort(ranges, 0, rangeCount);
3127        }
3128    
3129        // set maxByte in ranges[0] for ucol_nextWeight()
3130        ranges[0].m_count_ = maxByte;
3131    
3132        return rangeCount;
3133    }
3134    
3135    /**
3136     * Updates the range length
3137     * @param range weight range array
3138     * @param offset to weight range array
3139     * @param maxByte
3140     * @param countBytes
3141     * @return new length
3142     */
3143    private static final int lengthenRange(WeightRange range[], int offset, 
3144                                           int maxByte, int countBytes) 
3145    {
3146        int length = range[offset].m_length2_ + 1;
3147        range[offset].m_start_ = setWeightTrail(range[offset].m_start_, length, 
3148                        RuleBasedCollator.BYTE_FIRST_TAILORED_);
3149        range[offset].m_end_ = setWeightTrail(range[offset].m_end_, length, 
3150                                              maxByte);
3151        range[offset].m_count2_ *= countBytes;
3152        range[offset].m_length2_ = length;
3153        return length;
3154    }
3155    
3156    /**
3157     * Gets the weight 
3158     * @param weight
3159     * @param length
3160     * @param trail
3161     * @return new weight
3162     */
3163    private static final int setWeightTrail(int weight, int length, int trail) 
3164    {
3165        length = (4 - length) << 3;
3166        return (weight & (0xffffff00 << length)) | (trail << length);
3167    }
3168    
3169    /**
3170     * take two CE weights and calculate the
3171     * possible ranges of weights between the two limits, excluding them
3172     * for weights with up to 4 bytes there are up to 2*4-1=7 ranges
3173     * @param lowerLimit
3174     * @param upperLimit
3175     * @param maxByte
3176     * @param countBytes
3177     * @param ranges
3178     * @return weight ranges
3179     */
3180    private int getWeightRanges(int lowerLimit, int upperLimit, int maxByte, 
3181                                int countBytes, WeightRange ranges[]) 
3182    {
3183        // assume that both lowerLimit & upperLimit are not 0 
3184        // get the lengths of the limits 
3185        int lowerLength = lengthOfWeight(lowerLimit);
3186        int upperLength = lengthOfWeight(upperLimit);
3187        if (Utility.compareUnsigned(lowerLimit, upperLimit) >= 0) {
3188            return 0;
3189        }
3190        // check that neither is a prefix of the other
3191        if (lowerLength < upperLength) {
3192            if (lowerLimit == truncateWeight(upperLimit, lowerLength)) {
3193                return 0;
3194            }
3195        }
3196        // if the upper limit is a prefix of the lower limit then the earlier 
3197        // test lowerLimit >= upperLimit has caught it
3198        // reset local variables
3199        // With the limit lengths of 1..4, there are up to 7 ranges for 
3200        // allocation:
3201        // range     minimum length
3202        // lower[4]  4
3203        // lower[3]  3
3204        // lower[2]  2
3205        // middle    1
3206        // upper[2]  2
3207        // upper[3]  3
3208        // upper[4]  4
3209        // We are now going to calculate up to 7 ranges.
3210        // Some of them will typically overlap, so we will then have to merge 
3211        // and eliminate ranges.
3212        
3213        // We have to clean cruft from previous invocations
3214        // before doing anything. C++ already does that
3215        for(int length = 0; length < 5; length++) {
3216            m_utilLowerWeightRange_[length].clear();
3217            m_utilUpperWeightRange_[length].clear();
3218        }
3219        m_utilWeightRange_.clear();
3220        
3221        int weight = lowerLimit;
3222        for (int length = lowerLength; length >= 2; -- length) {
3223            m_utilLowerWeightRange_[length].clear();
3224            int trail = getWeightByte(weight, length);
3225            if (trail < maxByte) {
3226                m_utilLowerWeightRange_[length].m_start_ 
3227            = incWeightTrail(weight, length);
3228                m_utilLowerWeightRange_[length].m_end_ 
3229            = setWeightTrail(weight, length, maxByte);
3230                m_utilLowerWeightRange_[length].m_length_ = length;
3231                m_utilLowerWeightRange_[length].m_count_ = maxByte - trail;
3232            }
3233            weight = truncateWeight(weight, length - 1);
3234        }
3235        m_utilWeightRange_.m_start_ = incWeightTrail(weight, 1);
3236    
3237        weight = upperLimit;
3238        // [0] and [1] are not used - this simplifies indexing, 
3239        // m_utilUpperWeightRange_
3240        
3241        for (int length = upperLength; length >= 2; length --) {
3242            int trail = getWeightByte(weight, length);
3243            if (trail > RuleBasedCollator.BYTE_FIRST_TAILORED_) {
3244                m_utilUpperWeightRange_[length].m_start_ 
3245            = setWeightTrail(weight, length, 
3246                     RuleBasedCollator.BYTE_FIRST_TAILORED_);
3247                m_utilUpperWeightRange_[length].m_end_ 
3248            = decWeightTrail(weight, length);
3249                m_utilUpperWeightRange_[length].m_length_ = length;
3250                m_utilUpperWeightRange_[length].m_count_ = trail
3251            - RuleBasedCollator.BYTE_FIRST_TAILORED_;
3252            }
3253            weight = truncateWeight(weight, length - 1);
3254        }
3255        m_utilWeightRange_.m_end_ = decWeightTrail(weight, 1);
3256    
3257        // set the middle range
3258        m_utilWeightRange_.m_length_ = 1;
3259        if (Utility.compareUnsigned(m_utilWeightRange_.m_end_, m_utilWeightRange_.m_start_) >= 0) {
3260        //if (m_utilWeightRange_.m_end_ >= m_utilWeightRange_.m_start_) {
3261            m_utilWeightRange_.m_count_ 
3262        = ((m_utilWeightRange_.m_end_ - m_utilWeightRange_.m_start_) 
3263           >>> 24) + 1;
3264        } 
3265        else {
3266            // eliminate overlaps
3267            // remove the middle range
3268            m_utilWeightRange_.m_count_ = 0;
3269            // reduce or remove the lower ranges that go beyond upperLimit
3270            for (int length = 4; length >= 2; -- length) {
3271                if (m_utilLowerWeightRange_[length].m_count_ > 0 
3272                    && m_utilUpperWeightRange_[length].m_count_ > 0) {
3273                    int start = m_utilUpperWeightRange_[length].m_start_;
3274                    int end = m_utilLowerWeightRange_[length].m_end_;
3275                    if (end >= start || incWeight(end, length, maxByte) 
3276            == start) {
3277                        // lower and upper ranges collide or are directly 
3278                        // adjacent: merge these two and remove all shorter 
3279                        // ranges
3280                        start = m_utilLowerWeightRange_[length].m_start_;
3281                        end = m_utilLowerWeightRange_[length].m_end_ 
3282                            = m_utilUpperWeightRange_[length].m_end_;
3283                        // merging directly adjacent ranges needs to subtract 
3284                        // the 0/1 gaps in between;
3285                        // it may result in a range with count>countBytes
3286                        m_utilLowerWeightRange_[length].m_count_ 
3287                = getWeightByte(end, length)
3288                - getWeightByte(start, length) + 1 
3289                + countBytes * (getWeightByte(end, length - 1)
3290                        - getWeightByte(start, 
3291                                length - 1));
3292                        m_utilUpperWeightRange_[length].m_count_ = 0;
3293                        while (-- length >= 2) {
3294                            m_utilLowerWeightRange_[length].m_count_ 
3295                                = m_utilUpperWeightRange_[length].m_count_ = 0;
3296                        }
3297                        break;
3298                    }
3299                }
3300            }
3301        }
3302    
3303        // copy the ranges, shortest first, into the result array 
3304        int rangeCount = 0;
3305        if (m_utilWeightRange_.m_count_ > 0) {
3306            ranges[0] = new WeightRange(m_utilWeightRange_);
3307            rangeCount = 1;
3308        }
3309        for (int length = 2; length <= 4; ++ length) {
3310            // copy upper first so that later the middle range is more likely 
3311            // the first one to use
3312            if (m_utilUpperWeightRange_[length].m_count_ > 0) {
3313                ranges[rangeCount] 
3314            = new WeightRange(m_utilUpperWeightRange_[length]);
3315                ++ rangeCount;
3316            }
3317            if (m_utilLowerWeightRange_[length].m_count_ > 0) {
3318                ranges[rangeCount] 
3319            = new WeightRange(m_utilLowerWeightRange_[length]);
3320                ++ rangeCount;
3321            }
3322        }
3323        return rangeCount;
3324    }
3325    
3326    /**
3327     * Truncates the weight with length
3328     * @param weight
3329     * @param length
3330     * @return truncated weight
3331     */
3332    private static final int truncateWeight(int weight, int length) 
3333    {
3334        return weight & (0xffffffff << ((4 - length) << 3));
3335    }
3336    
3337    /**
3338     * Length of the weight
3339     * @param weight
3340     * @return length of the weight
3341     */
3342    private static final int lengthOfWeight(int weight) 
3343    {
3344        if ((weight & 0xffffff) == 0) {
3345            return 1;
3346        } 
3347        else if ((weight & 0xffff) == 0) {
3348            return 2;
3349        } 
3350        else if ((weight & 0xff) == 0) {
3351            return 3;
3352        } 
3353        return 4;
3354    }
3355    
3356    /**
3357     * Increment the weight trail
3358     * @param weight 
3359     * @param length
3360     * @return new weight
3361     */
3362    private static final int incWeightTrail(int weight, int length) 
3363    {
3364        return weight + (1 << ((4-length) << 3));
3365    }
3366
3367    /**
3368     * Decrement the weight trail
3369     * @param weight 
3370     * @param length
3371     * @return new weight
3372     */
3373    private static int decWeightTrail(int weight, int length) 
3374    {
3375        return weight - (1 << ((4 - length) << 3));
3376    }
3377    
3378    /**
3379     * Gets the codepoint 
3380     * @param tbl contraction table
3381     * @param codePoint code point to look for
3382     * @return the offset to the code point
3383     */
3384    private static int findCP(BasicContractionTable tbl, char codePoint) 
3385    {
3386        int position = 0;
3387        while (codePoint > tbl.m_codePoints_.charAt(position)) {
3388            position ++;
3389            if (position > tbl.m_codePoints_.length()) {
3390                return -1;
3391            }
3392        }
3393        if (codePoint == tbl.m_codePoints_.charAt(position)) {
3394            return position;
3395        } 
3396        else {
3397            return -1;
3398        }
3399    }
3400
3401    /**
3402     * Finds a contraction ce
3403     * @param table
3404     * @param element
3405     * @param ch
3406     * @return ce
3407     */
3408    private static int findCE(ContractionTable table, int element, char ch) 
3409    {
3410        if (table == null) {
3411            return CE_NOT_FOUND_;
3412        }
3413        BasicContractionTable tbl = getBasicContractionTable(table, element);
3414        if (tbl == null) {
3415            return CE_NOT_FOUND_;
3416        }
3417        int position = findCP(tbl, ch);
3418        if (position > tbl.m_CEs_.size() || position < 0) {
3419            return CE_NOT_FOUND_;
3420        } 
3421        return ((Integer  )tbl.m_CEs_.get(position)).intValue();
3422    }    
3423    
3424    /**
3425     * Checks if the string is tailored in the contraction
3426     * @param table contraction table
3427     * @param element 
3428     * @param array character array to check
3429     * @param offset array offset
3430     * @return true if it is tailored
3431     */
3432    private static boolean isTailored(ContractionTable table, int element, 
3433                                      char array[], int offset) 
3434    {
3435        while (array[offset] != 0) {
3436            element = findCE(table, element, array[offset]);
3437            if (element == CE_NOT_FOUND_) {
3438                return false;
3439            }
3440            if (!isContractionTableElement(element)) {
3441                return true;
3442            }
3443            offset ++;
3444        }
3445        if (getCE(table, element, 0) != CE_NOT_FOUND_) {
3446            return true;
3447        } 
3448        else {
3449            return false; 
3450        }
3451    }
3452    
3453    /**
3454     * Assemble RuleBasedCollator
3455     * @param t build table
3456     * @param collator to update
3457     */
3458    private void assembleTable(BuildTable t, RuleBasedCollator collator) 
3459    {
3460        IntTrieBuilder mapping = t.m_mapping_;
3461        Vector   expansions = t.m_expansions_;
3462        ContractionTable contractions = t.m_contractions_;
3463        MaxExpansionTable maxexpansion = t.m_maxExpansions_;
3464        
3465        // contraction offset has to be in since we are building on the 
3466        // UCA contractions 
3467        // int beforeContractions = (HEADER_SIZE_ 
3468        //                         + paddedsize(expansions.size() << 2)) >>> 1;
3469        collator.m_contractionOffset_ = 0;
3470        int contractionsSize = constructTable(contractions);
3471        
3472        // the following operation depends on the trie data. Therefore, we have 
3473        // to do it before the trie is compacted 
3474        // sets jamo expansions
3475        getMaxExpansionJamo(mapping, maxexpansion, t.m_maxJamoExpansions_,
3476                            collator.m_isJamoSpecial_);
3477        
3478        // TODO: LATIN1 array is now in the utrie - it should be removed from 
3479        // the calculation
3480        setAttributes(collator, t.m_options_);
3481        // copy expansions
3482        int size = expansions.size();
3483        collator.m_expansion_ = new int[size];
3484        for (int i = 0; i < size; i ++) {
3485            collator.m_expansion_[i] = ((Integer  )expansions.get(i)).intValue();
3486        }
3487        // contractions block 
3488        if (contractionsSize != 0) {
3489            // copy contraction index 
3490            collator.m_contractionIndex_ = new char[contractionsSize];
3491            contractions.m_codePoints_.getChars(0, contractionsSize, 
3492                                                collator.m_contractionIndex_, 
3493                                                0);
3494            // copy contraction collation elements
3495            collator.m_contractionCE_ = new int[contractionsSize];
3496            for (int i = 0; i < contractionsSize; i ++) {
3497                collator.m_contractionCE_[i] = ((Integer  )
3498                        contractions.m_CEs_.get(i)).intValue();
3499            }
3500        }
3501        // copy mapping table
3502        collator.m_trie_ = mapping.serialize(t, 
3503                         RuleBasedCollator.DataManipulate.getInstance());
3504        // copy max expansion table
3505        // not copying the first element which is a dummy
3506        // to be in synch with icu4c's builder, we continue to use the 
3507        // expansion offset
3508        // omitting expansion offset in builder
3509        collator.m_expansionOffset_ = 0; 
3510        size = maxexpansion.m_endExpansionCE_.size();
3511        collator.m_expansionEndCE_ = new int[size - 1];
3512        for (int i = 1; i < size; i ++) {
3513            collator.m_expansionEndCE_[i - 1] = ((Integer  )
3514                         maxexpansion.m_endExpansionCE_.get(i)).intValue();
3515        }
3516        collator.m_expansionEndCEMaxSize_ = new byte[size - 1];
3517        for (int i = 1; i < size; i ++) {
3518            collator.m_expansionEndCEMaxSize_[i - 1] 
3519        = ((Byte  )maxexpansion.m_expansionCESize_.get(i)).byteValue();
3520        }
3521        // Unsafe chars table.  Finish it off, then copy it.
3522        unsafeCPAddCCNZ(t);
3523        // Or in unsafebits from UCA, making a combined table.
3524        for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i ++) {    
3525        t.m_unsafeCP_[i] |= RuleBasedCollator.UCA_.m_unsafe_[i];
3526        }
3527        collator.m_unsafe_ = t.m_unsafeCP_;
3528    
3529        // Finish building Contraction Ending chars hash table and then copy it 
3530        // out.
3531        // Or in unsafebits from UCA, making a combined table
3532        for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i ++) {    
3533        t.m_contrEndCP_[i] |= RuleBasedCollator.UCA_.m_contractionEnd_[i];
3534        }
3535        collator.m_contractionEnd_ = t.m_contrEndCP_;
3536    }
3537    
3538    /**
3539     * Sets this collator to use the all options and tables in UCA. 
3540     * @param collator which attribute is to be set 
3541     * @param option to set with
3542     */
3543    private static final void setAttributes(RuleBasedCollator collator,
3544                        CollationRuleParser.OptionSet option)
3545    {
3546        collator.latinOneFailed_ = true;
3547        collator.m_caseFirst_ = option.m_caseFirst_;
3548        collator.setDecomposition(option.m_decomposition_);
3549        collator.setAlternateHandlingShifted(
3550                         option.m_isAlternateHandlingShifted_);
3551        collator.setCaseLevel(option.m_isCaseLevel_);
3552        collator.setFrenchCollation(option.m_isFrenchCollation_);
3553        collator.m_isHiragana4_ = option.m_isHiragana4_;
3554        collator.setStrength(option.m_strength_);
3555        collator.m_variableTopValue_ = option.m_variableTopValue_;    
3556        collator.latinOneFailed_ = false;
3557    }
3558    
3559    /**
3560     * Constructing the contraction table
3561     * @param table contraction table
3562     * @return 
3563     */
3564    private int constructTable(ContractionTable table) 
3565    {
3566        // See how much memory we need 
3567        int tsize = table.m_elements_.size();
3568        if (tsize == 0) {
3569            return 0;
3570        }
3571        table.m_offsets_.clear();
3572        int position = 0;
3573        for (int i = 0; i < tsize; i ++) {
3574            table.m_offsets_.add(new Integer  (position));
3575            position += ((BasicContractionTable)
3576             table.m_elements_.get(i)).m_CEs_.size();
3577        }
3578        table.m_CEs_.clear();
3579        table.m_codePoints_.delete(0, table.m_codePoints_.length());
3580        // Now stuff the things in
3581        StringBuffer   cpPointer = table.m_codePoints_;
3582        Vector   CEPointer = table.m_CEs_;
3583        for (int i = 0; i < tsize; i ++) {
3584            BasicContractionTable bct = (BasicContractionTable)
3585        table.m_elements_.get(i);
3586            int size = bct.m_CEs_.size();
3587            char ccMax = 0;
3588            char ccMin = 255;
3589            int offset = CEPointer.size();
3590            CEPointer.add(bct.m_CEs_.get(0));
3591            for (int j = 1; j < size; j ++) {
3592                char ch = bct.m_codePoints_.charAt(j);
3593                char cc = (char)(UCharacter.getCombiningClass(ch) & 0xFF);
3594                if (cc > ccMax) {
3595                    ccMax = cc;
3596                }
3597                if (cc < ccMin) {
3598                    ccMin = cc;
3599                }
3600                cpPointer.append(ch);
3601                CEPointer.add(bct.m_CEs_.get(j));
3602            }
3603            cpPointer.insert(offset, 
3604                             (char)(((ccMin == ccMax) ? 1 : 0 << 8) | ccMax));
3605            for (int j = 0; j < size; j ++) {
3606                if (isContractionTableElement(((Integer  )
3607                           CEPointer.get(offset + j)).intValue())) {
3608                    int ce = ((Integer  )CEPointer.get(offset + j)).intValue();
3609                    CEPointer.set(offset + j, 
3610                  new Integer  (constructSpecialCE(getCETag(ce), 
3611                                 ((Integer  )table.m_offsets_.get(
3612                                                getContractionOffset(ce))).intValue())));
3613                }
3614            }
3615        }
3616    
3617        for (int i = 0; i <= 0x10FFFF; i ++) {
3618            int CE = table.m_mapping_.getValue(i);
3619            if (isContractionTableElement(CE)) {
3620                CE = constructSpecialCE(getCETag(CE), 
3621                                        ((Integer  )table.m_offsets_.get(
3622                                       getContractionOffset(CE))).intValue());
3623                table.m_mapping_.setValue(i, CE);
3624            }
3625        }
3626        return position;
3627    }
3628    
3629    /**
3630     * Get contraction offset
3631     * @param ce collation element 
3632     * @return contraction offset
3633     */
3634    private static final int getContractionOffset(int ce)
3635    {
3636        return ce & 0xFFFFFF;
3637    }
3638    
3639    /**
3640     * Gets the maximum Jamo expansion
3641     * @param mapping trie table
3642     * @param maxexpansion maximum expansion table
3643     * @param maxjamoexpansion maximum jamo expansion table
3644     * @param jamospecial is jamo special?
3645     */
3646    private static void getMaxExpansionJamo(IntTrieBuilder mapping, 
3647                                            MaxExpansionTable maxexpansion,
3648                                            MaxJamoExpansionTable 
3649                        maxjamoexpansion,
3650                                            boolean jamospecial)
3651    {
3652        int VBASE  = 0x1161;
3653        int TBASE  = 0x11A8;
3654        int VCOUNT = 21;
3655        int TCOUNT = 28;
3656        int v = VBASE + VCOUNT - 1;
3657        int t = TBASE + TCOUNT - 1;
3658        
3659        while (v >= VBASE) {
3660            int ce = mapping.getValue(v);
3661            if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) 
3662        != RuleBasedCollator.CE_SPECIAL_FLAG_) {
3663                setMaxExpansion(ce, (byte)2, maxexpansion);
3664            }
3665            v --;
3666        }
3667        
3668        while (t >= TBASE)
3669        {
3670        int ce = mapping.getValue(t);
3671        if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) 
3672            != RuleBasedCollator.CE_SPECIAL_FLAG_) {
3673            setMaxExpansion(ce, (byte)3, maxexpansion);
3674        }
3675        t --;
3676        }
3677        // According to the docs, 99% of the time, the Jamo will not be special 
3678        if (jamospecial) {
3679            // gets the max expansion in all unicode characters
3680            int count = maxjamoexpansion.m_endExpansionCE_.size();
3681            byte maxTSize = (byte)(maxjamoexpansion.m_maxLSize_ + 
3682                                   maxjamoexpansion.m_maxVSize_ +
3683                                   maxjamoexpansion.m_maxTSize_);
3684            byte maxVSize = (byte)(maxjamoexpansion.m_maxLSize_ + 
3685                                   maxjamoexpansion.m_maxVSize_);
3686        
3687            while (count > 0) {
3688                count --;
3689                if (((Boolean  )maxjamoexpansion.m_isV_.get(count)).booleanValue()
3690            == true) {
3691                    setMaxExpansion(((Integer  )
3692                     maxjamoexpansion.m_endExpansionCE_.get(count)).intValue(), 
3693                    maxVSize, maxexpansion);
3694                }
3695                else {
3696                    setMaxExpansion(((Integer  )
3697                     maxjamoexpansion.m_endExpansionCE_.get(count)).intValue(), 
3698                    maxTSize, maxexpansion);
3699                }
3700            }
3701        }
3702    }
3703    
3704    /**  
3705     * To the UnsafeCP hash table, add all chars with combining class != 0     
3706     * @param t build table
3707     */
3708    private static final void unsafeCPAddCCNZ(BuildTable t) 
3709    {
3710    
3711        for (char c = 0; c < 0xffff; c ++) {
3712            char fcd = NormalizerImpl.getFCD16(c);
3713            if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
3714                (UTF16.isLeadSurrogate(c) && fcd != 0)) {
3715                // c is a leading surrogate with some FCD data
3716                unsafeCPSet(t.m_unsafeCP_, c);
3717            }
3718        }
3719    
3720        if (t.m_prefixLookup_ != null) {
3721            Enumeration   els = t.m_prefixLookup_.elements();
3722            while (els.hasMoreElements()) {
3723                Elements e = (Elements)els.nextElement();
3724                // codepoints here are in the NFD form. We need to add the
3725                // first code point of the NFC form to unsafe, because 
3726                // strcoll needs to backup over them.
3727                // weiv: This is wrong! See the comment above.
3728                //String decomp = Normalizer.decompose(e.m_cPoints_, true);
3729                //unsafeCPSet(t.m_unsafeCP_, decomp.charAt(0));
3730                // it should be:
3731                String   comp = Normalizer.compose(e.m_cPoints_, false);
3732                unsafeCPSet(t.m_unsafeCP_, comp.charAt(0));
3733            } 
3734        }
3735    }
3736    
3737    /**
3738     * Create closure
3739     * @param t build table
3740     * @param collator RuleBasedCollator
3741     * @param colEl collation element iterator
3742     * @param start 
3743     * @param limit
3744     * @param type character type
3745     * @return 
3746     */
3747    private boolean enumCategoryRangeClosureCategory(BuildTable t, 
3748                             RuleBasedCollator collator, 
3749                             CollationElementIterator colEl, 
3750                             int start, int limit, int type) 
3751    {
3752        if (type != UCharacterCategory.UNASSIGNED 
3753            && type != UCharacterCategory.PRIVATE_USE) { 
3754            // if the range is assigned - we might ommit more categories later
3755            
3756            for (int u32 = start; u32 < limit; u32 ++) {
3757                int noOfDec = NormalizerImpl.getDecomposition(u32, false,
3758                                                              m_utilCharBuffer_, 
3759                                                              0, 256);
3760                if (noOfDec > 0) {
3761                    // if we're positive, that means there is no decomposition
3762                    String   comp = UCharacter.toString(u32);
3763                    String   decomp = new String  (m_utilCharBuffer_, 0, noOfDec);
3764                    if (!collator.equals(comp, decomp)) {
3765                        m_utilElement_.m_cPoints_ = decomp;
3766                        m_utilElement_.m_prefix_ = 0;
3767                        Elements prefix 
3768                = (Elements)t.m_prefixLookup_.get(m_utilElement_);
3769                        if (prefix == null) {
3770                            m_utilElement_.m_cPoints_ = comp;
3771                            m_utilElement_.m_prefix_ = 0;
3772                            m_utilElement_.m_prefixChars_ = null;
3773                            colEl.setText(decomp);
3774                            int ce = colEl.next();
3775                            m_utilElement_.m_CELength_ = 0;
3776                            while (ce != CollationElementIterator.NULLORDER) {
3777                                m_utilElement_.m_CEs_[
3778                              m_utilElement_.m_CELength_ ++] 
3779                    = ce;
3780                                ce = colEl.next();
3781                            }
3782                        } 
3783                        else {
3784                            m_utilElement_.m_cPoints_ = comp;
3785                            m_utilElement_.m_prefix_ = 0;
3786                            m_utilElement_.m_prefixChars_ = null;
3787                            m_utilElement_.m_CELength_ = 1;
3788                            m_utilElement_.m_CEs_[0] = prefix.m_mapCE_;
3789                            // This character uses a prefix. We have to add it 
3790                            // to the unsafe table, as it decomposed form is 
3791                            // already in. In Japanese, this happens for \u309e 
3792                            // & \u30fe
3793                            // Since unsafeCPSet is static in ucol_elm, we are 
3794                            // going to wrap it up in the unsafeCPAddCCNZ 
3795                            // function
3796                        }
3797                        addAnElement(t, m_utilElement_);
3798                    }
3799                }
3800            }
3801        }
3802        return true;
3803    }
3804    
3805    /**
3806     * Determine if a character is a Jamo
3807     * @param ch character to test
3808     * @return true if ch is a Jamo, false otherwise
3809     */
3810    private static final boolean isJamo(char ch)
3811    { 
3812    return (ch >= 0x1100 && ch <= 0x1112) 
3813        || (ch >= 0x1175 && ch <= 0x1161) 
3814        || (ch >= 0x11A8 && ch <= 0x11C2);
3815    }
3816    
3817    /**
3818     * Produces canonical closure
3819     */
3820    private void canonicalClosure(BuildTable t) 
3821    {
3822        BuildTable temp = new BuildTable(t);
3823        assembleTable(temp, temp.m_collator_);
3824        // produce canonical closure 
3825        CollationElementIterator coleiter 
3826        = temp.m_collator_.getCollationElementIterator("");
3827        RangeValueIterator typeiter = UCharacter.getTypeIterator();
3828        RangeValueIterator.Element element = new RangeValueIterator.Element();
3829        while (typeiter.next(element)) {
3830            enumCategoryRangeClosureCategory(t, temp.m_collator_, coleiter, 
3831                         element.start, element.limit, 
3832                         element.value);
3833        }
3834    }
3835    
3836    private void processUCACompleteIgnorables(BuildTable t) 
3837    {
3838        TrieIterator trieiterator 
3839        = new TrieIterator(RuleBasedCollator.UCA_.m_trie_);
3840        RangeValueIterator.Element element = new RangeValueIterator.Element();
3841        while (trieiterator.next(element)) {
3842            int start = element.start;
3843            int limit = element.limit;
3844            if (element.value == 0) {
3845                while (start < limit) {
3846                    int CE = t.m_mapping_.getValue(start);
3847                    if (CE == CE_NOT_FOUND_) {
3848                        m_utilElement_.m_prefix_ = 0;
3849                        m_utilElement_.m_uchars_ = UCharacter.toString(start);
3850                        m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
3851                        m_utilElement_.m_cPointsOffset_ = 0;
3852                        m_utilElement_.m_CELength_ = 1;
3853                        m_utilElement_.m_CEs_[0] = 0;
3854                        addAnElement(t, m_utilElement_);
3855                    }
3856                    start ++;
3857                }
3858            }
3859        }
3860    }
3861}
3862
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags