1 7 package com.ibm.icu.text; 8 9 import java.io.BufferedInputStream ; 10 import java.io.ByteArrayInputStream ; 11 import java.io.InputStream ; 12 import java.io.DataInputStream ; 13 import java.io.IOException ; 14 15 import com.ibm.icu.impl.ICUBinary; 16 import com.ibm.icu.impl.ICUData; 17 import com.ibm.icu.impl.ICUResourceBundle; 18 import com.ibm.icu.impl.IntTrie; 19 import com.ibm.icu.lang.UCharacter; 20 import com.ibm.icu.util.VersionInfo; 21 import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA; 22 import com.ibm.icu.text.RuleBasedCollator.UCAConstants; 23 24 37 38 final class CollatorReader 39 { 40 static char[] read(RuleBasedCollator rbc, UCAConstants ucac) throws IOException { 41 InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/ucadata.icu"); 42 BufferedInputStream b = new BufferedInputStream (i, 90000); 43 CollatorReader reader = new CollatorReader(b); 44 char[] result = reader.readImp(rbc, ucac); 45 b.close(); 46 return result; 47 } 48 49 static void initRBC(RuleBasedCollator rbc, byte[] data) throws IOException { 50 final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2; 51 52 InputStream i = new ByteArrayInputStream (data); 53 BufferedInputStream b = new BufferedInputStream (i); 54 CollatorReader reader = new CollatorReader(b, false); 55 if (data.length > MIN_BINARY_DATA_SIZE_) { 56 reader.readImp(rbc, null); 57 } else { 58 reader.readHeader(rbc); 59 reader.readOptions(rbc); 60 rbc.setWithUCATables(); 62 } 63 } 64 65 static InverseUCA getInverseUCA() throws IOException { 66 InverseUCA result = null; 67 InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/invuca.icu"); 68 BufferedInputStream b = new BufferedInputStream (i, 110000); 72 result = CollatorReader.readInverseUCA(b); 73 b.close(); 74 i.close(); 75 return result; 76 } 80 81 83 89 private CollatorReader(InputStream inputStream) throws IOException 90 { 91 this(inputStream, true); 92 103 } 104 105 112 private CollatorReader(InputStream inputStream, boolean readICUHeader) 113 throws IOException 114 { 115 if (readICUHeader) { 116 byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, 117 UCA_AUTHENTICATE_); 118 VersionInfo UCDVersion = UCharacter.getUnicodeVersion(); 121 if(UnicodeVersion[0] != UCDVersion.getMajor() 122 || UnicodeVersion[1] != UCDVersion.getMinor()) { 123 throw new IOException (WRONG_UNICODE_VERSION_ERROR_); 124 } 125 } 126 m_dataInputStream_ = new DataInputStream (inputStream); 127 } 128 129 131 137 private void readHeader(RuleBasedCollator rbc) throws IOException 138 { 139 m_size_ = m_dataInputStream_.readInt(); 140 m_headerSize_ = m_dataInputStream_.readInt(); int readcount = 8; int UCAConst = m_dataInputStream_.readInt(); 148 readcount += 4; 149 m_dataInputStream_.skip(4); 152 readcount += 4; 153 m_dataInputStream_.skipBytes(4); 155 readcount += 4; 156 int mapping = m_dataInputStream_.readInt(); 158 readcount += 4; 159 rbc.m_expansionOffset_ = m_dataInputStream_.readInt(); 161 readcount += 4; 162 rbc.m_contractionOffset_ = m_dataInputStream_.readInt(); 164 readcount += 4; 165 int contractionCE = m_dataInputStream_.readInt(); 167 readcount += 4; 168 int contractionSize = m_dataInputStream_.readInt(); 170 readcount += 4; 171 int expansionEndCE = m_dataInputStream_.readInt(); 173 readcount += 4; 174 int expansionEndCEMaxSize = m_dataInputStream_.readInt(); 177 readcount += 4; 178 m_dataInputStream_.skipBytes(4); 180 readcount += 4; 181 int unsafe = m_dataInputStream_.readInt(); 183 readcount += 4; 184 int contractionEnd = m_dataInputStream_.readInt(); 186 readcount += 4; 187 m_dataInputStream_.skipBytes(4); 189 readcount += 4; 190 rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean(); 192 readcount ++; 193 m_dataInputStream_.skipBytes(3); 195 readcount += 3; 196 rbc.m_version_ = readVersion(m_dataInputStream_); 197 readcount += 4; 198 rbc.m_UCA_version_ = readVersion(m_dataInputStream_); 199 readcount += 4; 200 rbc.m_UCD_version_ = readVersion(m_dataInputStream_); 201 readcount += 4; 202 m_dataInputStream_.skipBytes(32); 204 readcount += 32; 205 m_dataInputStream_.skipBytes(56); readcount += 56; 207 if (m_headerSize_ < readcount) { 208 throw new IOException ("Internal Error: Header size error"); 209 } 210 m_dataInputStream_.skipBytes(m_headerSize_ - readcount); 211 212 if (rbc.m_contractionOffset_ == 0) { rbc.m_contractionOffset_ = mapping; 214 contractionCE = mapping; 215 } 216 m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_; 217 m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_; 218 m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_; 219 m_contractionCESize_ = mapping - contractionCE; 220 m_trieSize_ = expansionEndCE - mapping; 221 m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE; 222 m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize; 223 m_unsafeSize_ = contractionEnd - unsafe; 224 m_UCAValuesSize_ = m_size_ - UCAConst; m_contractionEndSize_ = m_size_ - contractionEnd; 228 229 rbc.m_contractionOffset_ >>= 1; rbc.m_expansionOffset_ >>= 2; } 232 233 240 private void readOptions(RuleBasedCollator rbc) throws IOException 241 { 242 int readcount = 0; 243 rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt(); 244 readcount += 4; 245 rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() 246 == RuleBasedCollator.AttributeValue.ON_); 247 readcount += 4; 248 rbc.m_defaultIsAlternateHandlingShifted_ 249 = (m_dataInputStream_.readInt() == 250 RuleBasedCollator.AttributeValue.SHIFTED_); 251 readcount += 4; 252 rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt(); 253 readcount += 4; 254 rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() 255 == RuleBasedCollator.AttributeValue.ON_); 256 readcount += 4; 257 int value = m_dataInputStream_.readInt(); 258 readcount += 4; 259 if (value == RuleBasedCollator.AttributeValue.ON_) { 260 value = Collator.CANONICAL_DECOMPOSITION; 261 } 262 else { 263 value = Collator.NO_DECOMPOSITION; 264 } 265 rbc.m_defaultDecomposition_ = value; 266 rbc.m_defaultStrength_ = m_dataInputStream_.readInt(); 267 readcount += 4; 268 rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() 269 == RuleBasedCollator.AttributeValue.ON_); 270 readcount += 4; 271 rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() 272 == RuleBasedCollator.AttributeValue.ON_); 273 readcount += 4; 274 m_dataInputStream_.skip(60); readcount += 60; 276 m_dataInputStream_.skipBytes(m_optionSize_ - readcount); 277 if (m_optionSize_ < readcount) { 278 throw new IOException ("Internal Error: Option size error"); 279 } 280 } 281 282 293 private char[] readImp(RuleBasedCollator rbc, 294 RuleBasedCollator.UCAConstants UCAConst) 295 throws IOException 296 { 297 readHeader(rbc); 298 int readcount = m_headerSize_; 300 readOptions(rbc); 302 readcount += m_optionSize_; 303 m_expansionSize_ >>= 2; 304 rbc.m_expansion_ = new int[m_expansionSize_]; 305 for (int i = 0; i < m_expansionSize_; i ++) { 306 rbc.m_expansion_[i] = m_dataInputStream_.readInt(); 307 } 308 readcount += (m_expansionSize_ << 2); 309 if (m_contractionIndexSize_ > 0) { 310 m_contractionIndexSize_ >>= 1; 311 rbc.m_contractionIndex_ = new char[m_contractionIndexSize_]; 312 for (int i = 0; i < m_contractionIndexSize_; i ++) { 313 rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar(); 314 } 315 readcount += (m_contractionIndexSize_ << 1); 316 m_contractionCESize_ >>= 2; 317 rbc.m_contractionCE_ = new int[m_contractionCESize_]; 318 for (int i = 0; i < m_contractionCESize_; i ++) { 319 rbc.m_contractionCE_[i] = m_dataInputStream_.readInt(); 320 } 321 readcount += (m_contractionCESize_ << 2); 322 } 323 rbc.m_trie_ = new IntTrie(m_dataInputStream_, 324 RuleBasedCollator.DataManipulate.getInstance()); 325 if (!rbc.m_trie_.isLatin1Linear()) { 326 throw new IOException ("Data corrupted, " 327 + "Collator Tries expected to have linear " 328 + "latin one data arrays"); 329 } 330 readcount += rbc.m_trie_.getSerializedDataSize(); 331 m_expansionEndCESize_ >>= 2; 332 rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_]; 333 for (int i = 0; i < m_expansionEndCESize_; i ++) { 334 rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt(); 335 } 336 readcount += (m_expansionEndCESize_ << 2); 337 rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_]; 338 for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) { 339 rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte(); 340 } 341 readcount += m_expansionEndCEMaxSizeSize_; 342 rbc.m_unsafe_ = new byte[m_unsafeSize_]; 343 for (int i = 0; i < m_unsafeSize_; i ++) { 344 rbc.m_unsafe_[i] = m_dataInputStream_.readByte(); 345 } 346 readcount += m_unsafeSize_; 347 if (UCAConst != null) { 348 m_contractionEndSize_ -= m_UCAValuesSize_; 352 } 353 rbc.m_contractionEnd_ = new byte[m_contractionEndSize_]; 354 for (int i = 0; i < m_contractionEndSize_; i ++) { 355 rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte(); 356 } 357 readcount += m_contractionEndSize_; 358 if (UCAConst != null) { 359 UCAConst.FIRST_TERTIARY_IGNORABLE_[0] 360 = m_dataInputStream_.readInt(); 361 int readUCAConstcount = 4; 362 UCAConst.FIRST_TERTIARY_IGNORABLE_[1] 363 = m_dataInputStream_.readInt(); 364 readUCAConstcount += 4; 365 UCAConst.LAST_TERTIARY_IGNORABLE_[0] 366 = m_dataInputStream_.readInt(); 367 readUCAConstcount += 4; 368 UCAConst.LAST_TERTIARY_IGNORABLE_[1] 369 = m_dataInputStream_.readInt(); 370 readUCAConstcount += 4; 371 UCAConst.FIRST_PRIMARY_IGNORABLE_[0] 372 = m_dataInputStream_.readInt(); 373 readUCAConstcount += 4; 374 UCAConst.FIRST_PRIMARY_IGNORABLE_[1] 375 = m_dataInputStream_.readInt(); 376 readUCAConstcount += 4; 377 UCAConst.FIRST_SECONDARY_IGNORABLE_[0] 378 = m_dataInputStream_.readInt(); 379 readUCAConstcount += 4; 380 UCAConst.FIRST_SECONDARY_IGNORABLE_[1] 381 = m_dataInputStream_.readInt(); 382 readUCAConstcount += 4; 383 UCAConst.LAST_SECONDARY_IGNORABLE_[0] 384 = m_dataInputStream_.readInt(); 385 readUCAConstcount += 4; 386 UCAConst.LAST_SECONDARY_IGNORABLE_[1] 387 = m_dataInputStream_.readInt(); 388 readUCAConstcount += 4; 389 UCAConst.LAST_PRIMARY_IGNORABLE_[0] 390 = m_dataInputStream_.readInt(); 391 readUCAConstcount += 4; 392 UCAConst.LAST_PRIMARY_IGNORABLE_[1] 393 = m_dataInputStream_.readInt(); 394 readUCAConstcount += 4; 395 UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt(); 396 readUCAConstcount += 4; 397 UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt(); 398 readUCAConstcount += 4; 399 UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt(); 400 readUCAConstcount += 4; 401 UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt(); 402 readUCAConstcount += 4; 403 UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt(); 404 readUCAConstcount += 4; 405 UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt(); 406 readUCAConstcount += 4; 407 UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt(); 408 readUCAConstcount += 4; 409 UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt(); 410 readUCAConstcount += 4; 411 UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt(); 412 readUCAConstcount += 4; 413 UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt(); 414 readUCAConstcount += 4; 415 UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt(); 416 readUCAConstcount += 4; 417 UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt(); 418 readUCAConstcount += 4; 419 UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt(); 420 readUCAConstcount += 4; 421 UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt(); 422 readUCAConstcount += 4; 423 UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt(); 424 readUCAConstcount += 4; 425 UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt(); 426 readUCAConstcount += 4; 427 UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt(); 428 readUCAConstcount += 4; 429 UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt(); 430 readUCAConstcount += 4; 431 UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt(); 432 readUCAConstcount += 4; 433 UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt(); 434 readUCAConstcount += 4; 435 UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt(); 436 readUCAConstcount += 4; 437 UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt(); 438 readUCAConstcount += 4; 439 UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt(); 440 readUCAConstcount += 4; 441 UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt(); 442 readUCAConstcount += 4; 443 UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt(); 444 readUCAConstcount += 4; 445 int resultsize = (m_UCAValuesSize_ - readUCAConstcount) >> 1; 446 char result[] = new char[resultsize]; 447 for (int i = 0; i < resultsize; i ++) { 448 result[i] = m_dataInputStream_.readChar(); 449 } 450 readcount += m_UCAValuesSize_; 451 if (readcount != m_size_) { 452 throw new IOException ("Internal Error: Data file size error"); 453 } 454 return result; 455 } 456 if (readcount != m_size_) { 457 throw new IOException ("Internal Error: Data file size error"); 458 } 459 return null; 460 } 461 462 469 private static CollationParsedRuleBuilder.InverseUCA readInverseUCA( 470 InputStream inputStream) 471 throws IOException 472 { 473 byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, 474 INVERSE_UCA_AUTHENTICATE_); 475 476 VersionInfo UCDVersion = UCharacter.getUnicodeVersion(); 479 if(UnicodeVersion[0] != UCDVersion.getMajor() 480 || UnicodeVersion[1] != UCDVersion.getMinor()) { 481 throw new IOException (WRONG_UNICODE_VERSION_ERROR_); 482 } 483 484 CollationParsedRuleBuilder.InverseUCA result = 485 new CollationParsedRuleBuilder.InverseUCA(); 486 DataInputStream input = new DataInputStream (inputStream); 487 input.readInt(); int tablesize = input.readInt(); int contsize = input.readInt(); input.readInt(); input.readInt(); result.m_UCA_version_ = readVersion(input); 493 input.skipBytes(8); 495 int size = tablesize * 3; result.m_table_ = new int[size]; 497 result.m_continuations_ = new char[contsize]; 498 499 for (int i = 0; i < size; i ++) { 500 result.m_table_[i] = input.readInt(); 501 } 502 for (int i = 0; i < contsize; i ++) { 503 result.m_continuations_[i] = input.readChar(); 504 } 505 input.close(); 506 return result; 507 } 508 509 518 519 protected static VersionInfo readVersion(DataInputStream input) 520 throws IOException { 521 byte[] version = new byte[4]; 522 version[0] = input.readByte(); 523 version[1] = input.readByte(); 524 version[2] = input.readByte(); 525 version[3] = input.readByte(); 526 527 VersionInfo result = 528 VersionInfo.getInstance( 529 (int)version[0], (int)version[1], 530 (int)version[2], (int)version[3]); 531 532 return result; 533 } 534 535 537 539 542 private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ 543 = new ICUBinary.Authenticate() { 544 public boolean isDataVersionAcceptable(byte version[]) 545 { 546 return version[0] == DATA_FORMAT_VERSION_[0] 547 && version[1] >= DATA_FORMAT_VERSION_[1]; 548 } 553 }; 554 555 558 private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ 559 = new ICUBinary.Authenticate() { 560 public boolean isDataVersionAcceptable(byte version[]) 561 { 562 return version[0] 563 == INVERSE_UCA_DATA_FORMAT_VERSION_[0] 564 && version[1] 565 >= INVERSE_UCA_DATA_FORMAT_VERSION_[1]; 566 } 567 }; 568 569 572 private DataInputStream m_dataInputStream_; 573 574 578 private static final byte DATA_FORMAT_VERSION_[] = 579 {(byte)0x2, (byte)0x2, (byte)0x0, (byte)0x0}; 580 private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43, 581 (byte)0x6f, (byte)0x6c}; 582 586 private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = 587 {(byte)0x2, (byte)0x1, (byte)0x0, (byte)0x0}; 588 private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49, 589 (byte)0x6e, 590 (byte)0x76, 591 (byte)0x43}; 592 595 private static final String CORRUPTED_DATA_ERROR_ = 596 "Data corrupted in Collation data file"; 597 598 601 private static final String WRONG_UNICODE_VERSION_ERROR_ = 602 "Unicode version in binary image is not compatible with the current Unicode version"; 603 604 607 private int m_expansionSize_; 608 611 private int m_contractionIndexSize_; 612 615 private int m_contractionCESize_; 616 619 private int m_trieSize_; 620 624 private int m_expansionEndCESize_; 625 630 private int m_expansionEndCEMaxSizeSize_; 631 635 private int m_optionSize_; 636 639 private int m_size_; 640 643 private int m_headerSize_; 644 648 private int m_unsafeSize_; 649 653 private int m_contractionEndSize_; 654 657 private int m_UCAValuesSize_; 658 659 661 } 662 663 | Popular Tags |