1 7 8 package com.ibm.icu.impl; 9 10 import com.ibm.icu.lang.UCharacter; 11 import com.ibm.icu.text.UTF16; 12 import java.util.Arrays ; 13 import java.io.OutputStream ; 14 import java.io.DataOutputStream ; 15 import java.io.IOException ; 16 17 34 public class IntTrieBuilder extends TrieBuilder 35 { 36 38 41 public IntTrieBuilder(IntTrieBuilder table) 42 { 43 super(table); 44 m_data_ = new int[m_dataCapacity_]; 45 System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_); 46 m_initialValue_ = table.m_initialValue_; 47 m_leadUnitValue_ = table.m_leadUnitValue_; 48 } 49 50 57 public IntTrieBuilder(int aliasdata[], int maxdatalength, 58 int initialvalue, int leadunitvalue, 59 boolean latin1linear) 60 { 61 super(); 62 if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear 63 && maxdatalength < 1024)) { 64 throw new IllegalArgumentException ( 65 "Argument maxdatalength is too small"); 66 } 67 68 if (aliasdata != null) { 69 m_data_ = aliasdata; 70 } 71 else { 72 m_data_ = new int[maxdatalength]; 73 } 74 75 int j = DATA_BLOCK_LENGTH; 77 78 if (latin1linear) { 79 int i = 0; 84 do { 85 m_index_[i ++] = j; 88 j += DATA_BLOCK_LENGTH; 89 } while (i < (256 >> SHIFT_)); 90 } 91 92 m_dataLength_ = j; 93 Arrays.fill(m_data_, 0, m_dataLength_, initialvalue); 95 m_initialValue_ = initialvalue; 96 m_leadUnitValue_ = leadunitvalue; 97 m_dataCapacity_ = maxdatalength; 98 m_isLatin1Linear_ = latin1linear; 99 m_isCompacted_ = false; 100 } 101 102 104 162 167 public int getValue(int ch) 168 { 169 if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { 171 return 0; 172 } 173 174 int block = m_index_[ch >> SHIFT_]; 175 return m_data_[Math.abs(block) + (ch & MASK_)]; 176 } 177 178 185 public int getValue(int ch, boolean [] inBlockZero) 186 { 187 if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { 189 if (inBlockZero != null) { 190 inBlockZero[0] = true; 191 } 192 return 0; 193 } 194 195 int block = m_index_[ch >> SHIFT_]; 196 if (inBlockZero != null) { 197 inBlockZero[0] = (block == 0); 198 } 199 return m_data_[Math.abs(block) + (ch & MASK_)]; 200 } 201 202 203 210 public boolean setValue(int ch, int value) 211 { 212 if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { 214 return false; 215 } 216 217 int block = getDataBlock(ch); 218 if (block < 0) { 219 return false; 220 } 221 222 m_data_[block + (ch & MASK_)] = value; 223 return true; 224 } 225 226 232 public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate, 233 Trie.DataManipulate triedatamanipulate) 234 { 235 if (datamanipulate == null) { 236 throw new IllegalArgumentException ("Parameters can not be null"); 237 } 238 if (!m_isCompacted_) { 241 compact(false); 243 fold(datamanipulate); 245 compact(true); 247 m_isCompacted_ = true; 248 } 249 if (m_dataLength_ >= MAX_DATA_LENGTH_) { 251 throw new ArrayIndexOutOfBoundsException ("Data length too small"); 252 } 253 254 char index[] = new char[m_indexLength_]; 255 int data[] = new int[m_dataLength_]; 256 for (int i = 0; i < m_indexLength_; i ++) { 259 index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_); 260 } 261 System.arraycopy(m_data_, 0, data, 0, m_dataLength_); 263 264 int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_); 265 options |= OPTIONS_DATA_IS_32_BIT_; 266 if (m_isLatin1Linear_) { 267 options |= OPTIONS_LATIN1_IS_LINEAR_; 268 } 269 return new IntTrie(index, data, m_initialValue_, options, 270 triedatamanipulate); 271 } 272 273 274 292 public int serialize(OutputStream os, boolean reduceTo16Bits, 293 TrieBuilder.DataManipulate datamanipulate) throws IOException { 294 if (datamanipulate == null) { 295 throw new IllegalArgumentException ("Parameters can not be null"); 296 } 297 298 if (!m_isCompacted_) { 301 compact(false); 303 fold(datamanipulate); 305 compact(true); 307 m_isCompacted_ = true; 308 } 309 310 int length; 312 if (reduceTo16Bits) { 313 length = m_dataLength_ + m_indexLength_; 314 } else { 315 length = m_dataLength_; 316 } 317 if (length >= MAX_DATA_LENGTH_) { 318 throw new ArrayIndexOutOfBoundsException ("Data length too small"); 319 } 320 321 length = Trie.HEADER_LENGTH_ + 2*m_indexLength_; 327 if(reduceTo16Bits) { 328 length+=2*m_dataLength_; 329 } else { 330 length+=4*m_dataLength_; 331 } 332 333 if (os == null) { 334 return length; 336 } 337 338 DataOutputStream dos = new DataOutputStream (os); 339 dos.writeInt(Trie.HEADER_SIGNATURE_); 340 341 int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<<Trie.HEADER_OPTIONS_INDEX_SHIFT_); 342 if(!reduceTo16Bits) { 343 options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_; 344 } 345 if(m_isLatin1Linear_) { 346 options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_; 347 } 348 dos.writeInt(options); 349 350 dos.writeInt(m_indexLength_); 351 dos.writeInt(m_dataLength_); 352 353 354 if(reduceTo16Bits) { 355 356 for (int i=0; i<m_indexLength_; i++) { 357 int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_; 358 dos.writeChar(v); 359 } 360 361 362 for(int i=0; i<m_dataLength_; i++) { 363 int v = m_data_[i] & 0x0000ffff; 364 dos.writeChar(v); 365 } 366 } else { 367 368 for (int i=0; i<m_indexLength_; i++) { 369 int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_; 370 dos.writeChar(v); 371 } 372 373 374 for(int i=0; i<m_dataLength_; i++) { 375 dos.writeInt(m_data_[i]); 376 } 377 } 378 379 return length; 380 381 } 382 383 384 396 public boolean setRange(int start, int limit, int value, 397 boolean overwrite) 398 { 399 403 if (m_isCompacted_ || start < UCharacter.MIN_VALUE 405 || start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE 406 || limit > (UCharacter.MAX_VALUE + 1) || start > limit) { 407 return false; 408 } 409 410 if (start == limit) { 411 return true; } 413 414 if ((start & MASK_) != 0) { 415 int block = getDataBlock(start); 417 if (block < 0) { 418 return false; 419 } 420 421 int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_; 422 if (nextStart <= limit) { 423 fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH, 424 value, overwrite); 425 start = nextStart; 426 } 427 else { 428 fillBlock(block, start & MASK_, limit & MASK_, 429 value, overwrite); 430 return true; 431 } 432 } 433 434 int rest = limit & MASK_; 436 437 limit &= ~MASK_; 439 440 int repeatBlock = 0; 442 if (value == m_initialValue_) { 443 } 445 else { 446 repeatBlock = -1; 447 } 448 while (start < limit) { 449 int block = m_index_[start >> SHIFT_]; 451 if (block > 0) { 452 fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite); 454 } 455 else if (m_data_[-block] != value && (block == 0 || overwrite)) { 456 if (repeatBlock >= 0) { 459 m_index_[start >> SHIFT_] = -repeatBlock; 460 } 461 else { 462 repeatBlock = getDataBlock(start); 464 if (repeatBlock < 0) { 465 return false; 466 } 467 468 m_index_[start >> SHIFT_] = -repeatBlock; 471 fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true); 472 } 473 } 474 475 start += DATA_BLOCK_LENGTH; 476 } 477 478 if (rest > 0) { 479 int block = getDataBlock(start); 481 if (block < 0) { 482 return false; 483 } 484 fillBlock(block, 0, rest, value, overwrite); 485 } 486 487 return true; 488 } 489 490 492 protected int m_data_[]; 493 protected int m_initialValue_; 494 495 497 private int m_leadUnitValue_; 498 499 501 private int allocDataBlock() 502 { 503 int newBlock = m_dataLength_; 504 int newTop = newBlock + DATA_BLOCK_LENGTH; 505 if (newTop > m_dataCapacity_) { 506 return -1; 508 } 509 m_dataLength_ = newTop; 510 return newBlock; 511 } 512 513 518 private int getDataBlock(int ch) 519 { 520 ch >>= SHIFT_; 521 int indexValue = m_index_[ch]; 522 if (indexValue > 0) { 523 return indexValue; 524 } 525 526 int newBlock = allocDataBlock(); 528 if (newBlock < 0) { 529 return -1; 531 } 532 m_index_[ch] = newBlock; 533 534 System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock, 536 DATA_BLOCK_LENGTH << 2); 537 return newBlock; 538 } 539 540 552 private void compact(boolean overlap) 553 { 554 if (m_isCompacted_) { 555 return; } 557 558 findUnusedBlocks(); 561 562 int overlapStart = DATA_BLOCK_LENGTH; 565 if (m_isLatin1Linear_ && SHIFT_ <= 8) { 566 overlapStart += 256; 567 } 568 569 int newStart = DATA_BLOCK_LENGTH; 570 int i; 571 for (int start = newStart; start < m_dataLength_;) { 572 if (m_map_[start >>> SHIFT_] < 0) { 577 start += DATA_BLOCK_LENGTH; 579 continue; 581 } 582 if (start >= overlapStart) { 584 i = findSameDataBlock(m_data_, newStart, start, 585 overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH); 586 if (i >= 0) { 587 m_map_[start >>> SHIFT_] = i; 590 start += DATA_BLOCK_LENGTH; 592 continue; 594 } 595 } 596 if(overlap && start>=overlapStart) { 599 600 for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_; 601 i>0 && !equal_int(m_data_, newStart-i, start, i); 602 i-=DATA_GRANULARITY_) {} 603 } else { 604 i=0; 605 } 606 if (i > 0) { 607 m_map_[start >>> SHIFT_] = newStart - i; 609 start += i; 611 for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) { 612 m_data_[newStart ++] = m_data_[start ++]; 613 } 614 } 615 else if (newStart < start) { 616 m_map_[start >>> SHIFT_] = newStart; 618 for (i = DATA_BLOCK_LENGTH; i > 0; -- i) { 619 m_data_[newStart ++] = m_data_[start ++]; 620 } 621 } 622 else { m_map_[start >>> SHIFT_] = start; 624 newStart += DATA_BLOCK_LENGTH; 625 start = newStart; 626 } 627 } 628 for (i = 0; i < m_indexLength_; ++ i) { 630 m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_]; 631 } 632 m_dataLength_ = newStart; 633 } 634 635 642 private static final int findSameDataBlock(int data[], int dataLength, 643 int otherBlock, int step) 644 { 645 dataLength -= DATA_BLOCK_LENGTH; 647 648 for (int block = 0; block <= dataLength; block += step) { 649 if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) { 650 return block; 651 } 652 } 653 return -1; 654 } 655 656 667 private final void fold(DataManipulate manipulate) 668 { 669 int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_]; 670 int index[] = m_index_; 671 System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0, 673 SURROGATE_BLOCK_COUNT_); 674 675 int block = 0; 683 if (m_leadUnitValue_ == m_initialValue_) { 684 } 687 else { 688 block = allocDataBlock(); 690 if (block < 0) { 691 throw new IllegalStateException ("Internal error: Out of memory space"); 693 } 694 fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true); 695 block = -block; 697 } 698 for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) { 699 m_index_[c] = block; 700 } 701 702 int indexLength = BMP_INDEX_LENGTH_; 710 for (int c = 0x10000; c < 0x110000;) { 712 if (index[c >> SHIFT_] != 0) { 713 c &= ~0x3ff; 715 block = findSameIndexBlock(index, indexLength, c >> SHIFT_); 717 718 722 int value = manipulate.getFoldedValue(c, 723 block + SURROGATE_BLOCK_COUNT_); 724 if (value != getValue(UTF16.getLeadSurrogate(c))) { 725 if (!setValue(UTF16.getLeadSurrogate(c), value)) { 726 throw new ArrayIndexOutOfBoundsException ( 728 "Data table overflow"); 729 } 730 if (block == indexLength) { 732 System.arraycopy(index, c >> SHIFT_, index, indexLength, 735 SURROGATE_BLOCK_COUNT_); 736 indexLength += SURROGATE_BLOCK_COUNT_; 737 } 738 } 739 c += 0x400; 740 } 741 else { 742 c += DATA_BLOCK_LENGTH; 743 } 744 } 745 746 if (indexLength >= MAX_INDEX_LENGTH_) { 755 throw new ArrayIndexOutOfBoundsException ("Index table overflow"); 756 } 757 System.arraycopy(index, BMP_INDEX_LENGTH_, index, 760 BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_, 761 indexLength - BMP_INDEX_LENGTH_); 762 System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_, 763 SURROGATE_BLOCK_COUNT_); 764 indexLength += SURROGATE_BLOCK_COUNT_; 765 m_indexLength_ = indexLength; 766 } 767 768 771 private void fillBlock(int block, int start, int limit, int value, 772 boolean overwrite) 773 { 774 limit += block; 775 block += start; 776 if (overwrite) { 777 while (block < limit) { 778 m_data_[block ++] = value; 779 } 780 } 781 else { 782 while (block < limit) { 783 if (m_data_[block] == m_initialValue_) { 784 m_data_[block] = value; 785 } 786 ++ block; 787 } 788 } 789 } 790 } 791 792 | Popular Tags |