1 7 8 package com.ibm.icu.impl; 9 10 import java.io.InputStream ; 11 import java.io.DataInputStream ; 12 import java.io.IOException ; 13 import java.util.Arrays ; 14 import com.ibm.icu.text.UTF16; 15 import com.ibm.icu.lang.UCharacter; 16 17 46 public abstract class Trie 47 { 48 50 58 public static interface DataManipulate 59 { 60 69 public int getFoldingOffset(int value); 70 } 71 72 private static class DefaultGetFoldingOffset implements DataManipulate { 74 public int getFoldingOffset(int value) { 75 return value; 76 } 77 } 78 79 81 85 public final boolean isLatin1Linear() 86 { 87 return m_isLatin1Linear_; 88 } 89 90 97 public boolean equals(Object other) 99 { 100 if (other == this) { 101 return true; 102 } 103 if (!(other instanceof Trie)) { 104 return false; 105 } 106 Trie othertrie = (Trie)other; 107 return m_isLatin1Linear_ == othertrie.m_isLatin1Linear_ 108 && m_options_ == othertrie.m_options_ 109 && m_dataLength_ == othertrie.m_dataLength_ 110 && Arrays.equals(m_index_, othertrie.m_index_); 111 } 112 114 120 public int getSerializedDataSize() 121 { 122 int result = (4 << 2); 124 result += (m_dataOffset_ << 1); 125 if (isCharTrie()) { 126 result += (m_dataLength_ << 1); 127 } 128 else if (isIntTrie()) { 129 result += (m_dataLength_ << 2); 130 } 131 return result; 132 } 133 134 136 146 protected Trie(InputStream inputStream, 147 DataManipulate dataManipulate) throws IOException 148 { 149 DataInputStream input = new DataInputStream (inputStream); 150 int signature = input.readInt(); 152 m_options_ = input.readInt(); 153 154 if (!checkHeader(signature)) { 155 throw new IllegalArgumentException ("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file"); 156 } 157 158 if(dataManipulate != null) { 159 m_dataManipulate_ = dataManipulate; 160 } else { 161 m_dataManipulate_ = new DefaultGetFoldingOffset(); 162 } 163 m_isLatin1Linear_ = (m_options_ & 164 HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0; 165 m_dataOffset_ = input.readInt(); 166 m_dataLength_ = input.readInt(); 167 unserialize(inputStream); 168 } 169 170 178 protected Trie(char index[], int options, DataManipulate dataManipulate) 179 { 180 m_options_ = options; 181 if(dataManipulate != null) { 182 m_dataManipulate_ = dataManipulate; 183 } else { 184 m_dataManipulate_ = new DefaultGetFoldingOffset(); 185 } 186 m_isLatin1Linear_ = (m_options_ & 187 HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0; 188 m_index_ = index; 189 m_dataOffset_ = m_index_.length; 190 } 191 192 193 195 200 protected static final int LEAD_INDEX_OFFSET_ = 0x2800 >> 5; 201 205 protected static final int INDEX_STAGE_1_SHIFT_ = 5; 206 215 protected static final int INDEX_STAGE_2_SHIFT_ = 2; 216 219 protected static final int DATA_BLOCK_LENGTH=1<<INDEX_STAGE_1_SHIFT_; 220 225 protected static final int INDEX_STAGE_3_MASK_ = DATA_BLOCK_LENGTH - 1; 226 227 protected static final int SURROGATE_BLOCK_BITS=10-INDEX_STAGE_1_SHIFT_; 228 233 protected static final int SURROGATE_BLOCK_COUNT=(1<<SURROGATE_BLOCK_BITS); 234 235 protected static final int BMP_INDEX_LENGTH=0x10000>>INDEX_STAGE_1_SHIFT_; 236 241 protected static final int SURROGATE_MASK_ = 0x3FF; 242 246 protected char m_index_[]; 247 252 protected DataManipulate m_dataManipulate_; 253 260 protected int m_dataOffset_; 261 264 protected int m_dataLength_; 265 266 268 275 protected abstract int getSurrogateOffset(char lead, char trail); 276 277 283 protected abstract int getValue(int index); 284 285 290 protected abstract int getInitialValue(); 291 292 307 protected final int getRawOffset(int offset, char ch) 308 { 309 return (m_index_[offset + (ch >> INDEX_STAGE_1_SHIFT_)] 310 << INDEX_STAGE_2_SHIFT_) 311 + (ch & INDEX_STAGE_3_MASK_); 312 } 313 314 321 protected final int getBMPOffset(char ch) 322 { 323 return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE 324 && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) 325 ? getRawOffset(LEAD_INDEX_OFFSET_, ch) 326 : getRawOffset(0, ch); 327 } 329 330 339 protected final int getLeadOffset(char ch) 340 { 341 return getRawOffset(0, ch); 342 } 343 344 353 protected final int getCodePointOffset(int ch) 354 { 355 if (ch < 0) { 357 return -1; 358 } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { 359 return getRawOffset(0, (char)ch); 361 } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) { 362 return getBMPOffset((char)ch); 364 } else if (ch <= UCharacter.MAX_VALUE) { 365 return getSurrogateOffset(UTF16.getLeadSurrogate(ch), 368 (char)(ch & SURROGATE_MASK_)); 369 } else { 370 return -1; 372 } 373 } 374 375 382 protected void unserialize(InputStream inputStream) throws IOException 383 { 384 m_index_ = new char[m_dataOffset_]; 386 DataInputStream input = new DataInputStream (inputStream); 387 for (int i = 0; i < m_dataOffset_; i ++) { 388 m_index_[i] = input.readChar(); 389 } 390 } 391 392 397 protected final boolean isIntTrie() 398 { 399 return (m_options_ & HEADER_OPTIONS_DATA_IS_32_BIT_) != 0; 400 } 401 402 407 protected final boolean isCharTrie() 408 { 409 return (m_options_ & HEADER_OPTIONS_DATA_IS_32_BIT_) == 0; 410 } 411 412 414 420 423 protected static final int HEADER_LENGTH_ = 4 * 4; 424 427 protected static final int HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_ = 0x200; 428 431 protected static final int HEADER_SIGNATURE_ = 0x54726965; 432 435 private static final int HEADER_OPTIONS_SHIFT_MASK_ = 0xF; 436 protected static final int HEADER_OPTIONS_INDEX_SHIFT_ = 4; 437 protected static final int HEADER_OPTIONS_DATA_IS_32_BIT_ = 0x100; 438 439 442 private boolean m_isLatin1Linear_; 443 444 452 private int m_options_; 453 454 456 463 private final boolean checkHeader(int signature) 464 { 465 if (signature != HEADER_SIGNATURE_) { 469 return false; 470 } 471 472 if ((m_options_ & HEADER_OPTIONS_SHIFT_MASK_) != 473 INDEX_STAGE_1_SHIFT_ || 474 ((m_options_ >> HEADER_OPTIONS_INDEX_SHIFT_) & 475 HEADER_OPTIONS_SHIFT_MASK_) 476 != INDEX_STAGE_2_SHIFT_) { 477 return false; 478 } 479 return true; 480 } 481 } 482 | Popular Tags |