KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > impl > IntTrieBuilder


1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
6 */

7
8 package com.ibm.icu.impl;
9
10 import com.ibm.icu.lang.UCharacter;
11 import com.ibm.icu.text.UTF16;
12 import java.util.Arrays JavaDoc;
13 import java.io.OutputStream JavaDoc;
14 import java.io.DataOutputStream JavaDoc;
15 import java.io.IOException JavaDoc;
16
17 /**
18  * Builder class to manipulate and generate a trie.
19  * This is useful for ICU data in primitive types.
20  * Provides a compact way to store information that is indexed by Unicode
21  * values, such as character properties, types, keyboard values, etc. This is
22  * very useful when you have a block of Unicode data that contains significant
23  * values while the rest of the Unicode data is unused in the application or
24  * when you have a lot of redundance, such as where all 21,000 Han ideographs
25  * have the same value. However, lookup is much faster than a hash table.
26  * A trie of any primitive data type serves two purposes:
27  * <UL type = round>
28  * <LI>Fast access of the indexed values.
29  * <LI>Smaller memory footprint.
30  * </UL>
31  * This is a direct port from the ICU4C version
32  * @author Syn Wee Quek
33  */

34 public class IntTrieBuilder extends TrieBuilder
35 {
36     // public constructor ----------------------------------------------
37

38     /**
39      * Copy constructor
40      */

41     public IntTrieBuilder(IntTrieBuilder table)
42     {
43         super(table);
44         m_data_ = new int[m_dataCapacity_];
45         System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
46         m_initialValue_ = table.m_initialValue_;
47         m_leadUnitValue_ = table.m_leadUnitValue_;
48     }
49     
50     /**
51      * Constructs a build table
52      * @param aliasdata data to be filled into table
53      * @param maxdatalength maximum data length allowed in table
54      * @param initialvalue inital data value
55      * @param latin1linear is latin 1 to be linear
56      */

57     public IntTrieBuilder(int aliasdata[], int maxdatalength,
58                           int initialvalue, int leadunitvalue,
59                           boolean latin1linear)
60     {
61         super();
62         if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear
63                                                   && maxdatalength < 1024)) {
64             throw new IllegalArgumentException JavaDoc(
65                                                "Argument maxdatalength is too small");
66         }
67             
68         if (aliasdata != null) {
69             m_data_ = aliasdata;
70         }
71         else {
72             m_data_ = new int[maxdatalength];
73         }
74         
75         // preallocate and reset the first data block (block index 0)
76
int j = DATA_BLOCK_LENGTH;
77         
78         if (latin1linear) {
79             // preallocate and reset the first block (number 0) and Latin-1
80
// (U+0000..U+00ff) after that made sure above that
81
// maxDataLength >= 1024
82
// set indexes to point to consecutive data blocks
83
int i = 0;
84             do {
85                 // do this at least for trie->index[0] even if that block is
86
// only partly used for Latin-1
87
m_index_[i ++] = j;
88                 j += DATA_BLOCK_LENGTH;
89             } while (i < (256 >> SHIFT_));
90         }
91         
92         m_dataLength_ = j;
93         // reset the initially allocated blocks to the initial value
94
Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
95         m_initialValue_ = initialvalue;
96         m_leadUnitValue_ = leadunitvalue;
97         m_dataCapacity_ = maxdatalength;
98         m_isLatin1Linear_ = latin1linear;
99         m_isCompacted_ = false;
100     }
101
102     // public methods -------------------------------------------------------
103

104     /*public final void print()
105       {
106       int i = 0;
107       int oldvalue = m_index_[i];
108       int count = 0;
109       System.out.println("index length " + m_indexLength_
110       + " --------------------------");
111       while (i < m_indexLength_) {
112       if (m_index_[i] != oldvalue) {
113       System.out.println("index has " + count + " counts of "
114       + Integer.toHexString(oldvalue));
115       count = 0;
116       oldvalue = m_index_[i];
117       }
118       count ++;
119       i ++;
120       }
121       System.out.println("index has " + count + " counts of "
122       + Integer.toHexString(oldvalue));
123       i = 0;
124       oldvalue = m_data_[i];
125       count = 0;
126       System.out.println("data length " + m_dataLength_
127       + " --------------------------");
128       while (i < m_dataLength_) {
129       if (m_data_[i] != oldvalue) {
130       if ((oldvalue & 0xf1000000) == 0xf1000000) {
131       int temp = oldvalue & 0xffffff;
132       temp += 0x320;
133       oldvalue = 0xf1000000 | temp;
134       }
135       if ((oldvalue & 0xf2000000) == 0xf2000000) {
136       int temp = oldvalue & 0xffffff;
137       temp += 0x14a;
138       oldvalue = 0xf2000000 | temp;
139       }
140       System.out.println("data has " + count + " counts of "
141       + Integer.toHexString(oldvalue));
142       count = 0;
143       oldvalue = m_data_[i];
144       }
145       count ++;
146       i ++;
147       }
148       if ((oldvalue & 0xf1000000) == 0xf1000000) {
149       int temp = oldvalue & 0xffffff;
150       temp += 0x320;
151       oldvalue = 0xf1000000 | temp;
152       }
153       if ((oldvalue & 0xf2000000) == 0xf2000000) {
154       int temp = oldvalue & 0xffffff;
155       temp += 0x14a;
156       oldvalue = 0xf2000000 | temp;
157       }
158       System.out.println("data has " + count + " counts of "
159       + Integer.toHexString(oldvalue));
160       }
161     */

162     /**
163      * Gets a 32 bit data from the table data
164      * @param ch codepoint which data is to be retrieved
165      * @return the 32 bit data
166      */

167     public int getValue(int ch)
168     {
169         // valid, uncompacted trie and valid c?
170
if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
171             return 0;
172         }
173     
174         int block = m_index_[ch >> SHIFT_];
175         return m_data_[Math.abs(block) + (ch & MASK_)];
176     }
177     
178     /**
179      * Get a 32 bit data from the table data
180      * @param ch code point for which data is to be retrieved.
181      * @param inBlockZero Output parameter, inBlockZero[0] returns true if the
182      * char maps into block zero, otherwise false.
183      * @return the 32 bit data value.
184      */

185     public int getValue(int ch, boolean [] inBlockZero)
186     {
187         // valid, uncompacted trie and valid c?
188
if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
189             if (inBlockZero != null) {
190                 inBlockZero[0] = true;
191             }
192             return 0;
193         }
194     
195         int block = m_index_[ch >> SHIFT_];
196         if (inBlockZero != null) {
197             inBlockZero[0] = (block == 0);
198         }
199         return m_data_[Math.abs(block) + (ch & MASK_)];
200     }
201     
202     
203     /**
204      * Sets a 32 bit data in the table data
205      * @param ch codepoint which data is to be set
206      * @param value to set
207      * @return true if the set is successful, otherwise
208      * if the table has been compacted return false
209      */

210     public boolean setValue(int ch, int value)
211     {
212         // valid, uncompacted trie and valid c?
213
if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
214             return false;
215         }
216     
217         int block = getDataBlock(ch);
218         if (block < 0) {
219             return false;
220         }
221     
222         m_data_[block + (ch & MASK_)] = value;
223         return true;
224     }
225     
226     /**
227      * Serializes the build table with 32 bit data
228      * @param datamanipulate builder raw fold method implementation
229      * @param triedatamanipulate result trie fold method
230      * @return a new trie
231      */

232     public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
233                              Trie.DataManipulate triedatamanipulate)
234     {
235         if (datamanipulate == null) {
236             throw new IllegalArgumentException JavaDoc("Parameters can not be null");
237         }
238         // fold and compact if necessary, also checks that indexLength is
239
// within limits
240
if (!m_isCompacted_) {
241             // compact once without overlap to improve folding
242
compact(false);
243             // fold the supplementary part of the index array
244
fold(datamanipulate);
245             // compact again with overlap for minimum data array length
246
compact(true);
247             m_isCompacted_ = true;
248         }
249         // is dataLength within limits?
250
if (m_dataLength_ >= MAX_DATA_LENGTH_) {
251             throw new ArrayIndexOutOfBoundsException JavaDoc("Data length too small");
252         }
253     
254         char index[] = new char[m_indexLength_];
255         int data[] = new int[m_dataLength_];
256         // write the index (stage 1) array and the 32-bit data (stage 2) array
257
// write 16-bit index values shifted right by INDEX_SHIFT_
258
for (int i = 0; i < m_indexLength_; i ++) {
259             index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_);
260         }
261         // write 32-bit data values
262
System.arraycopy(m_data_, 0, data, 0, m_dataLength_);
263         
264         int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
265         options |= OPTIONS_DATA_IS_32_BIT_;
266         if (m_isLatin1Linear_) {
267             options |= OPTIONS_LATIN1_IS_LINEAR_;
268         }
269         return new IntTrie(index, data, m_initialValue_, options,
270                            triedatamanipulate);
271     }
272     
273     
274     /**
275      * Serializes the build table to an output stream.
276      *
277      * Compacts the build-time trie after all values are set, and then
278      * writes the serialized form onto an output stream.
279      *
280      * After this, this build-time Trie can only be serialized again and/or closed;
281      * no further values can be added.
282      *
283      * This function is the rough equivalent of utrie_seriaize() in ICU4C.
284      *
285      * @param os the output stream to which the seriaized trie will be written.
286      * If nul, the function still returns the size of the serialized Trie.
287      * @param reduceTo16Bits If true, reduce the data size to 16 bits. The resulting
288      * serialized form can then be used to create a CharTrie.
289      * @param datamanipulate builder raw fold method implementation
290      * @return the number of bytes written to the output stream.
291      */

292      public int serialize(OutputStream JavaDoc os, boolean reduceTo16Bits,
293             TrieBuilder.DataManipulate datamanipulate) throws IOException JavaDoc {
294          if (datamanipulate == null) {
295              throw new IllegalArgumentException JavaDoc("Parameters can not be null");
296          }
297
298          // fold and compact if necessary, also checks that indexLength is
299
// within limits
300
if (!m_isCompacted_) {
301              // compact once without overlap to improve folding
302
compact(false);
303              // fold the supplementary part of the index array
304
fold(datamanipulate);
305              // compact again with overlap for minimum data array length
306
compact(true);
307              m_isCompacted_ = true;
308          }
309          
310          // is dataLength within limits?
311
int length;
312          if (reduceTo16Bits) {
313              length = m_dataLength_ + m_indexLength_;
314          } else {
315              length = m_dataLength_;
316          }
317          if (length >= MAX_DATA_LENGTH_) {
318              throw new ArrayIndexOutOfBoundsException JavaDoc("Data length too small");
319          }
320          
321          // struct UTrieHeader {
322
// int32_t signature;
323
// int32_t options (a bit field)
324
// int32_t indexLength
325
// int32_t dataLength
326
length = Trie.HEADER_LENGTH_ + 2*m_indexLength_;
327          if(reduceTo16Bits) {
328              length+=2*m_dataLength_;
329          } else {
330              length+=4*m_dataLength_;
331          }
332          
333          if (os == null) {
334              // No output stream. Just return the length of the serialized Trie, in bytes.
335
return length;
336          }
337
338          DataOutputStream JavaDoc dos = new DataOutputStream JavaDoc(os);
339          dos.writeInt(Trie.HEADER_SIGNATURE_);
340          
341          int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<<Trie.HEADER_OPTIONS_INDEX_SHIFT_);
342          if(!reduceTo16Bits) {
343              options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_;
344          }
345          if(m_isLatin1Linear_) {
346              options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_;
347          }
348          dos.writeInt(options);
349          
350          dos.writeInt(m_indexLength_);
351          dos.writeInt(m_dataLength_);
352          
353          /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
354          if(reduceTo16Bits) {
355              /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
356              for (int i=0; i<m_indexLength_; i++) {
357                  int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_;
358                  dos.writeChar(v);
359              }
360              
361              /* write 16-bit data values */
362              for(int i=0; i<m_dataLength_; i++) {
363                  int v = m_data_[i] & 0x0000ffff;
364                  dos.writeChar(v);
365              }
366          } else {
367              /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
368              for (int i=0; i<m_indexLength_; i++) {
369                  int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_;
370                  dos.writeChar(v);
371              }
372              
373              /* write 32-bit data values */
374              for(int i=0; i<m_dataLength_; i++) {
375                  dos.writeInt(m_data_[i]);
376              }
377         }
378
379          return length;
380
381     }
382     
383     
384     /**
385      * Set a value in a range of code points [start..limit].
386      * All code points c with start &lt;= c &lt; limit will get the value if
387      * overwrite is true or if the old value is 0.
388      * @param start the first code point to get the value
389      * @param limit one past the last code point to get the value
390      * @param value the value
391      * @param overwrite flag for whether old non-initial values are to be
392      * overwritten
393      * @return false if a failure occurred (illegal argument or data array
394      * overrun)
395      */

396     public boolean setRange(int start, int limit, int value,
397                             boolean overwrite)
398     {
399         // repeat value in [start..limit[
400
// mark index values for repeat-data blocks by setting bit 31 of the
401
// index values fill around existing values if any, if(overwrite)
402

403         // valid, uncompacted trie and valid indexes?
404
if (m_isCompacted_ || start < UCharacter.MIN_VALUE
405             || start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE
406             || limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
407             return false;
408         }
409             
410         if (start == limit) {
411             return true; // nothing to do
412
}
413         
414         if ((start & MASK_) != 0) {
415             // set partial block at [start..following block boundary[
416
int block = getDataBlock(start);
417             if (block < 0) {
418                 return false;
419             }
420         
421             int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
422             if (nextStart <= limit) {
423                 fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
424                           value, overwrite);
425                 start = nextStart;
426             }
427             else {
428                 fillBlock(block, start & MASK_, limit & MASK_,
429                           value, overwrite);
430                 return true;
431             }
432         }
433         
434         // number of positions in the last, partial block
435
int rest = limit & MASK_;
436         
437         // round down limit to a block boundary
438
limit &= ~MASK_;
439         
440         // iterate over all-value blocks
441
int repeatBlock = 0;
442         if (value == m_initialValue_) {
443             // repeatBlock = 0; assigned above
444
}
445         else {
446             repeatBlock = -1;
447         }
448         while (start < limit) {
449             // get index value
450
int block = m_index_[start >> SHIFT_];
451             if (block > 0) {
452                 // already allocated, fill in value
453
fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
454             }
455             else if (m_data_[-block] != value && (block == 0 || overwrite)) {
456                 // set the repeatBlock instead of the current block 0 or range
457
// block
458
if (repeatBlock >= 0) {
459                     m_index_[start >> SHIFT_] = -repeatBlock;
460                 }
461                 else {
462                     // create and set and fill the repeatBlock
463
repeatBlock = getDataBlock(start);
464                     if (repeatBlock < 0) {
465                         return false;
466                     }
467         
468                     // set the negative block number to indicate that it is a
469
// repeat block
470
m_index_[start >> SHIFT_] = -repeatBlock;
471                     fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true);
472                 }
473             }
474         
475             start += DATA_BLOCK_LENGTH;
476         }
477         
478         if (rest > 0) {
479             // set partial block at [last block boundary..limit[
480
int block = getDataBlock(start);
481             if (block < 0) {
482                 return false;
483             }
484             fillBlock(block, 0, rest, value, overwrite);
485         }
486         
487         return true;
488     }
489     
490     // protected data member ------------------------------------------------
491

492     protected int m_data_[];
493     protected int m_initialValue_;
494     
495     // private data member ------------------------------------------------
496

497     private int m_leadUnitValue_;
498      
499     // private methods ------------------------------------------------------
500

501     private int allocDataBlock()
502     {
503         int newBlock = m_dataLength_;
504         int newTop = newBlock + DATA_BLOCK_LENGTH;
505         if (newTop > m_dataCapacity_) {
506             // out of memory in the data array
507
return -1;
508         }
509         m_dataLength_ = newTop;
510         return newBlock;
511     }
512
513     /**
514      * No error checking for illegal arguments.
515      * @param ch codepoint to look for
516      * @return -1 if no new data block available (out of memory in data array)
517      */

518     private int getDataBlock(int ch)
519     {
520         ch >>= SHIFT_;
521         int indexValue = m_index_[ch];
522         if (indexValue > 0) {
523             return indexValue;
524         }
525     
526         // allocate a new data block
527
int newBlock = allocDataBlock();
528         if (newBlock < 0) {
529             // out of memory in the data array
530
return -1;
531         }
532         m_index_[ch] = newBlock;
533     
534         // copy-on-write for a block from a setRange()
535
System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock,
536                          DATA_BLOCK_LENGTH << 2);
537         return newBlock;
538     }
539     
540     /**
541      * Compact a folded build-time trie.
542      * The compaction
543      * - removes blocks that are identical with earlier ones
544      * - overlaps adjacent blocks as much as possible (if overlap == true)
545      * - moves blocks in steps of the data granularity
546      * - moves and overlaps blocks that overlap with multiple values in the overlap region
547      *
548      * It does not
549      * - try to move and overlap blocks that are not already adjacent
550      * @param overlap flag
551      */

552     private void compact(boolean overlap)
553     {
554         if (m_isCompacted_) {
555             return; // nothing left to do
556
}
557     
558         // compaction
559
// initialize the index map with "block is used/unused" flags
560
findUnusedBlocks();
561         
562         // if Latin-1 is preallocated and linear, then do not compact Latin-1
563
// data
564
int overlapStart = DATA_BLOCK_LENGTH;
565         if (m_isLatin1Linear_ && SHIFT_ <= 8) {
566             overlapStart += 256;
567         }
568        
569         int newStart = DATA_BLOCK_LENGTH;
570         int i;
571         for (int start = newStart; start < m_dataLength_;) {
572             // start: index of first entry of current block
573
// newStart: index where the current block is to be moved
574
// (right after current end of already-compacted data)
575
// skip blocks that are not used
576
if (m_map_[start >>> SHIFT_] < 0) {
577                 // advance start to the next block
578
start += DATA_BLOCK_LENGTH;
579                 // leave newStart with the previous block!
580
continue;
581             }
582             // search for an identical block
583
if (start >= overlapStart) {
584                 i = findSameDataBlock(m_data_, newStart, start,
585                                           overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
586                 if (i >= 0) {
587                     // found an identical block, set the other block's index
588
// value for the current block
589
m_map_[start >>> SHIFT_] = i;
590                     // advance start to the next block
591
start += DATA_BLOCK_LENGTH;
592                     // leave newStart with the previous block!
593
continue;
594                 }
595             }
596             // see if the beginning of this block can be overlapped with the
597
// end of the previous block
598
if(overlap && start>=overlapStart) {
599                 /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
600                 for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_;
601                     i>0 && !equal_int(m_data_, newStart-i, start, i);
602                     i-=DATA_GRANULARITY_) {}
603             } else {
604                 i=0;
605             }
606             if (i > 0) {
607                 // some overlap
608
m_map_[start >>> SHIFT_] = newStart - i;
609                 // move the non-overlapping indexes to their new positions
610
start += i;
611                 for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) {
612                     m_data_[newStart ++] = m_data_[start ++];
613                 }
614             }
615             else if (newStart < start) {
616                 // no overlap, just move the indexes to their new positions
617
m_map_[start >>> SHIFT_] = newStart;
618                 for (i = DATA_BLOCK_LENGTH; i > 0; -- i) {
619                     m_data_[newStart ++] = m_data_[start ++];
620                 }
621             }
622             else { // no overlap && newStart==start
623
m_map_[start >>> SHIFT_] = start;
624                 newStart += DATA_BLOCK_LENGTH;
625                 start = newStart;
626             }
627         }
628         // now adjust the index (stage 1) table
629
for (i = 0; i < m_indexLength_; ++ i) {
630             m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
631         }
632         m_dataLength_ = newStart;
633     }
634
635     /**
636      * Find the same data block
637      * @param data array
638      * @param dataLength
639      * @param otherBlock
640      * @param step
641      */

642     private static final int findSameDataBlock(int data[], int dataLength,
643                                                int otherBlock, int step)
644     {
645         // ensure that we do not even partially get past dataLength
646
dataLength -= DATA_BLOCK_LENGTH;
647
648         for (int block = 0; block <= dataLength; block += step) {
649             if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
650                 return block;
651             }
652         }
653         return -1;
654     }
655     
656     /**
657      * Fold the normalization data for supplementary code points into
658      * a compact area on top of the BMP-part of the trie index,
659      * with the lead surrogates indexing this compact area.
660      *
661      * Duplicate the index values for lead surrogates:
662      * From inside the BMP area, where some may be overridden with folded values,
663      * to just after the BMP area, where they can be retrieved for
664      * code point lookups.
665      * @param manipulate fold implementation
666      */

667     private final void fold(DataManipulate manipulate)
668     {
669         int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
670         int index[] = m_index_;
671         // copy the lead surrogate indexes into a temporary array
672
System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
673                          SURROGATE_BLOCK_COUNT_);
674         
675         // set all values for lead surrogate code *units* to leadUnitValue
676
// so that by default runtime lookups will find no data for associated
677
// supplementary code points, unless there is data for such code points
678
// which will result in a non-zero folding value below that is set for
679
// the respective lead units
680
// the above saved the indexes for surrogate code *points*
681
// fill the indexes with simplified code from utrie_setRange32()
682
int block = 0;
683         if (m_leadUnitValue_ == m_initialValue_) {
684             // leadUnitValue == initialValue, use all-initial-value block
685
// block = 0; if block here left empty
686
}
687         else {
688             // create and fill the repeatBlock
689
block = allocDataBlock();
690             if (block < 0) {
691                 // data table overflow
692
throw new IllegalStateException JavaDoc("Internal error: Out of memory space");
693             }
694             fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true);
695             // negative block number to indicate that it is a repeat block
696
block = -block;
697         }
698         for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) {
699             m_index_[c] = block;
700         }
701
702         // Fold significant index values into the area just after the BMP
703
// indexes.
704
// In case the first lead surrogate has significant data,
705
// its index block must be used first (in which case the folding is a
706
// no-op).
707
// Later all folded index blocks are moved up one to insert the copied
708
// lead surrogate indexes.
709
int indexLength = BMP_INDEX_LENGTH_;
710         // search for any index (stage 1) entries for supplementary code points
711
for (int c = 0x10000; c < 0x110000;) {
712             if (index[c >> SHIFT_] != 0) {
713                 // there is data, treat the full block for a lead surrogate
714
c &= ~0x3ff;
715                 // is there an identical index block?
716
block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
717                 
718                 // get a folded value for [c..c+0x400[ and,
719
// if different from the value for the lead surrogate code
720
// point, set it for the lead surrogate code unit
721

722                 int value = manipulate.getFoldedValue(c,
723                                                       block + SURROGATE_BLOCK_COUNT_);
724                 if (value != getValue(UTF16.getLeadSurrogate(c))) {
725                     if (!setValue(UTF16.getLeadSurrogate(c), value)) {
726                         // data table overflow
727
throw new ArrayIndexOutOfBoundsException JavaDoc(
728                                                                  "Data table overflow");
729                     }
730                     // if we did not find an identical index block...
731
if (block == indexLength) {
732                         // move the actual index (stage 1) entries from the
733
// supplementary position to the new one
734
System.arraycopy(index, c >> SHIFT_, index, indexLength,
735                                          SURROGATE_BLOCK_COUNT_);
736                         indexLength += SURROGATE_BLOCK_COUNT_;
737                     }
738                 }
739                 c += 0x400;
740             }
741             else {
742                 c += DATA_BLOCK_LENGTH;
743             }
744         }
745         
746         // index array overflow?
747
// This is to guarantee that a folding offset is of the form
748
// UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
749
// If the index is too large, then n>=1024 and more than 10 bits are
750
// necessary.
751
// In fact, it can only ever become n==1024 with completely unfoldable
752
// data and the additional block of duplicated values for lead
753
// surrogates.
754
if (indexLength >= MAX_INDEX_LENGTH_) {
755             throw new ArrayIndexOutOfBoundsException JavaDoc("Index table overflow");
756         }
757         // make space for the lead surrogate index block and insert it between
758
// the BMP indexes and the folded ones
759
System.arraycopy(index, BMP_INDEX_LENGTH_, index,
760                          BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
761                          indexLength - BMP_INDEX_LENGTH_);
762         System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
763                          SURROGATE_BLOCK_COUNT_);
764         indexLength += SURROGATE_BLOCK_COUNT_;
765         m_indexLength_ = indexLength;
766     }
767     
768     /**
769      * @internal
770      */

771     private void fillBlock(int block, int start, int limit, int value,
772                            boolean overwrite)
773     {
774         limit += block;
775         block += start;
776         if (overwrite) {
777             while (block < limit) {
778                 m_data_[block ++] = value;
779             }
780         }
781         else {
782             while (block < limit) {
783                 if (m_data_[block] == m_initialValue_) {
784                     m_data_[block] = value;
785                 }
786                 ++ block;
787             }
788         }
789     }
790 }
791
792
Popular Tags