1 7 8 package com.ibm.icu.text; 9 10 import java.io.BufferedInputStream ; 11 import java.io.InputStream ; 12 import java.io.DataInputStream ; 13 import java.io.IOException ; 14 15 import com.ibm.icu.impl.ICUData; 16 import com.ibm.icu.impl.ICUResourceBundle; 17 import com.ibm.icu.impl.Trie; 18 import com.ibm.icu.impl.CharTrie; 19 20 27 final class RBBIDataWrapper { 28 RBBIDataHeader fHeader; 33 short fFTable[]; 34 short fRTable[]; 35 short fSFTable[]; 36 short fSRTable[]; 37 CharTrie fTrie; 38 String fRuleSource; 39 int fStatusTable[]; 40 41 final static int DH_SIZE = 24; 46 final static int DH_MAGIC = 0; 47 final static int DH_FORMATVERSION = 1; 48 final static int DH_LENGTH = 2; 49 final static int DH_CATCOUNT = 3; 50 final static int DH_FTABLE = 4; 51 final static int DH_FTABLELEN = 5; 52 final static int DH_RTABLE = 6; 53 final static int DH_RTABLELEN = 7; 54 final static int DH_SFTABLE = 8; 55 final static int DH_SFTABLELEN = 9; 56 final static int DH_SRTABLE = 10; 57 final static int DH_SRTABLELEN = 11; 58 final static int DH_TRIE = 12; 59 final static int DH_TRIELEN = 13; 60 final static int DH_RULESOURCE = 14; 61 final static int DH_RULESOURCELEN = 15; 62 final static int DH_STATUSTABLE = 16; 63 final static int DH_STATUSTABLELEN = 17; 64 65 66 final static int ACCEPTING = 0; 70 final static int LOOKAHEAD = 1; 71 final static int TAGIDX = 2; 72 final static int RESERVED = 3; 73 final static int NEXTSTATES = 4; 74 75 final static int NUMSTATES = 0; 79 final static int ROWLEN = 2; 80 final static int FLAGS = 4; 81 final static int RESERVED_2 = 6; 82 final static int ROW_DATA = 8; 83 84 final static int RBBI_LOOKAHEAD_HARD_BREAK = 1; 88 final static int RBBI_BOF_REQUIRED = 2; 89 90 final static int getNumStates(short table[]) { 93 int hi = table[NUMSTATES]; 94 int lo = table[NUMSTATES+1]; 95 int val = (hi<<16) + (lo&0x0000ffff); 96 return val; 97 } 98 99 100 103 final static class RBBIDataHeader { 104 int fMagic; int fVersion; byte[] fFormatVersion; int fLength; int fCatCount; 111 int fFTable; int fFTableLen; 118 int fRTable; int fRTableLen; 120 int fSFTable; int fSFTableLen; 122 int fSRTable; int fSRTableLen; 124 int fTrie; int fTrieLen; 126 int fRuleSource; int fRuleSourceLen; int fStatusTable; int fStatusTableLen; 130 131 public RBBIDataHeader() { 132 fMagic = 0; 133 fFormatVersion = new byte[4]; 134 } 135 } 136 137 138 143 int getRowIndex(int state){ 144 return ROW_DATA + state * (fHeader.fCatCount + 4); 145 } 146 147 static class TrieFoldingFunc implements Trie.DataManipulate { 148 public int getFoldingOffset(int data) { 149 if ((data & 0x8000) != 0) { 150 return data & 0x7fff; 151 } else { 152 return 0; 153 } 154 } 155 } 156 static TrieFoldingFunc fTrieFoldingFunc = new TrieFoldingFunc(); 157 158 159 RBBIDataWrapper() { 160 } 161 162 static RBBIDataWrapper get(String name) throws IOException { 163 String fullName = "data/" + name; 164 InputStream is = ICUData.getRequiredStream(fullName); 165 return get(is); 166 } 167 168 172 static RBBIDataWrapper get(InputStream is) throws IOException { 173 int i; 174 175 DataInputStream dis = new DataInputStream (new BufferedInputStream (is)); 176 RBBIDataWrapper This = new RBBIDataWrapper(); 177 178 dis.skip(0x80); 181 182 This.fHeader = new RBBIDataHeader(); 184 This.fHeader.fMagic = dis.readInt(); 185 This.fHeader.fVersion = dis.readInt(); 186 This.fHeader.fFormatVersion[0] = (byte) (This.fHeader.fVersion >> 24); 187 This.fHeader.fFormatVersion[1] = (byte) (This.fHeader.fVersion >> 16); 188 This.fHeader.fFormatVersion[2] = (byte) (This.fHeader.fVersion >> 8); 189 This.fHeader.fFormatVersion[3] = (byte) (This.fHeader.fVersion); 190 This.fHeader.fLength = dis.readInt(); 191 This.fHeader.fCatCount = dis.readInt(); 192 This.fHeader.fFTable = dis.readInt(); 193 This.fHeader.fFTableLen = dis.readInt(); 194 This.fHeader.fRTable = dis.readInt(); 195 This.fHeader.fRTableLen = dis.readInt(); 196 This.fHeader.fSFTable = dis.readInt(); 197 This.fHeader.fSFTableLen = dis.readInt(); 198 This.fHeader.fSRTable = dis.readInt(); 199 This.fHeader.fSRTableLen = dis.readInt(); 200 This.fHeader.fTrie = dis.readInt(); 201 This.fHeader.fTrieLen = dis.readInt(); 202 This.fHeader.fRuleSource = dis.readInt(); 203 This.fHeader.fRuleSourceLen = dis.readInt(); 204 This.fHeader.fStatusTable = dis.readInt(); 205 This.fHeader.fStatusTableLen = dis.readInt(); 206 dis.skip(6 * 4); 208 209 if (This.fHeader.fMagic != 0xb1a0 || 210 ! (This.fHeader.fVersion == 1 || This.fHeader.fFormatVersion[0] == 3) ) { 213 throw new IOException ("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version."); 214 } 215 216 int pos = 24 * 4; 219 223 if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) { 225 throw new IOException ("Break iterator Rule data corrupt"); 226 } 227 228 dis.skip(This.fHeader.fFTable - pos); 230 pos = This.fHeader.fFTable; 231 232 This.fFTable = new short[This.fHeader.fFTableLen / 2]; 233 for ( i=0; i<This.fFTable.length; i++) { 234 This.fFTable[i] = dis.readShort(); 235 pos += 2; 236 } 237 238 242 dis.skip(This.fHeader.fRTable - pos); 244 pos = This.fHeader.fRTable; 245 246 This.fRTable = new short[This.fHeader.fRTableLen / 2]; 248 for (i=0; i<This.fRTable.length; i++) { 249 This.fRTable[i] = dis.readShort(); 250 pos += 2; 251 } 252 253 if (This.fHeader.fSFTableLen > 0) { 257 dis.skip(This.fHeader.fSFTable - pos); 259 pos = This.fHeader.fSFTable; 260 261 This.fSFTable = new short[This.fHeader.fSFTableLen / 2]; 263 for (i=0; i<This.fSFTable.length; i++) { 264 This.fSFTable[i] = dis.readShort(); 265 pos += 2; 266 } 267 } 268 269 if (This.fHeader.fSRTableLen > 0) { 273 dis.skip(This.fHeader.fSRTable - pos); 275 pos = This.fHeader.fSRTable; 276 277 This.fSRTable = new short[This.fHeader.fSRTableLen / 2]; 279 for (i=0; i<This.fSRTable.length; i++) { 280 This.fSRTable[i] = dis.readShort(); 281 pos += 2; 282 } 283 } 284 285 dis.skip(This.fHeader.fTrie - pos); pos = This.fHeader.fTrie; 295 dis.mark(This.fHeader.fTrieLen+100); 300 This.fTrie = new CharTrie(dis, fTrieFoldingFunc); 304 dis.reset(); 309 if (pos > This.fHeader.fStatusTable) { 313 throw new IOException ("Break iterator Rule data corrupt"); 314 } 315 dis.skip(This.fHeader.fStatusTable - pos); 316 pos = This.fHeader.fStatusTable; 317 This.fStatusTable = new int[This.fHeader.fStatusTableLen / 4]; 318 for (i=0; i<This.fStatusTable.length; i++) { 319 This.fStatusTable[i] = dis.readInt(); 320 pos += 4; 321 } 322 323 if (pos > This.fHeader.fRuleSource) { 327 throw new IOException ("Break iterator Rule data corrupt"); 328 } 329 dis.skip(This.fHeader.fRuleSource - pos); 330 pos = This.fHeader.fRuleSource; 331 StringBuffer sb = new StringBuffer (This.fHeader.fRuleSourceLen / 2); 332 for (i=0; i<This.fHeader.fRuleSourceLen; i+=2) { 333 sb.append(dis.readChar()); 334 pos += 2; 335 } 336 This.fRuleSource = sb.toString(); 337 338 if (RuleBasedBreakIterator.fDebugEnv!=null && RuleBasedBreakIterator.fDebugEnv.indexOf("data")>=0) { 339 This.dump(); 340 } 341 return This; 342 } 343 344 345 346 349 void dump() { 350 System.out.println("RBBI Data Wrapper dump ..."); 351 System.out.println(); 352 System.out.println("Forward State Table"); 353 dumpTable(fFTable); 354 System.out.println("Reverse State Table"); 355 dumpTable(fRTable); 356 System.out.println("Forward Safe Points Table"); 357 dumpTable(fSFTable); 358 System.out.println("Reverse Safe Points Table"); 359 dumpTable(fSRTable); 360 361 dumpCharCategories(); 362 System.out.println("Source Rules: " + fRuleSource); 363 364 } 365 366 370 static public String intToString(int n, int width) { 371 StringBuffer dest = new StringBuffer (width); 372 dest.append(n); 373 while (dest.length() < width) { 374 dest.insert(0, ' '); 375 } 376 return dest.toString(); 377 } 378 379 383 static public String intToHexString(int n, int width) { 384 StringBuffer dest = new StringBuffer (width); 385 dest.append(Integer.toHexString(n)); 386 while (dest.length() < width) { 387 dest.insert(0, ' '); 388 } 389 return dest.toString(); 390 } 391 392 393 private void dumpTable(short table[]) { 394 if (table == null) { 395 System.out.println(" -- null -- "); 396 } else { 397 int n; 398 int state; 399 String header = " Row Acc Look Tag"; 400 for (n=0; n<fHeader.fCatCount; n++) { 401 header += intToString(n, 5); 402 } 403 System.out.println(header); 404 for (n=0; n<header.length(); n++) { 405 System.out.print("-"); 406 } 407 System.out.println(); 408 for (state=0; state< getNumStates(table); state++) { 409 dumpRow(table, state); 410 } 411 System.out.println(); 412 } 413 } 414 415 421 private void dumpRow(short table[], int state) { 422 StringBuffer dest = new StringBuffer (fHeader.fCatCount*5 + 20); 423 dest.append(intToString(state, 4)); 424 int row = getRowIndex(state); 425 if (table[row+ACCEPTING] != 0) { 426 dest.append(intToString(table[row+ACCEPTING], 5)); 427 }else { 428 dest.append(" "); 429 } 430 if (table[row+LOOKAHEAD] != 0) { 431 dest.append(intToString(table[row+LOOKAHEAD], 5)); 432 }else { 433 dest.append(" "); 434 } 435 dest.append(intToString(table[row+TAGIDX], 5)); 436 437 for (int col=0; col<fHeader.fCatCount; col++) { 438 dest.append(intToString(table[row+NEXTSTATES+col], 5)); 439 } 440 441 System.out.println(dest); 442 } 443 444 private void dumpCharCategories() { 445 int n = fHeader.fCatCount; 446 String catStrings[] = new String [n+1]; 447 int rangeStart = 0; 448 int rangeEnd = 0; 449 int lastCat = -1; 450 int char32; 451 int category; 452 int lastNewline[] = new int[n+1]; 453 454 for (category = 0; category <= fHeader.fCatCount; category ++) { 455 catStrings[category] = ""; 456 } 457 System.out.println("\nCharacter Categories"); 458 System.out.println("--------------------"); 459 for (char32 = 0; char32<=0x10ffff; char32++) { 460 category = fTrie.getCodePointValue(char32); 461 category &= ~0x4000; if (category < 0 || category > fHeader.fCatCount) { 463 System.out.println("Error, bad category " + Integer.toHexString(category) + 464 " for char " + Integer.toHexString(char32)); 465 break; 466 } 467 if (category == lastCat ) { 468 rangeEnd = char32; 469 } else { 470 if (lastCat >= 0) { 471 if (catStrings[lastCat].length() > lastNewline[lastCat] + 70) { 472 lastNewline[lastCat] = catStrings[lastCat].length() + 10; 473 catStrings[lastCat] += "\n "; 474 } 475 476 catStrings[lastCat] += " " + Integer.toHexString(rangeStart); 477 if (rangeEnd != rangeStart) { 478 catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd); 479 } 480 } 481 lastCat = category; 482 rangeStart = rangeEnd = char32; 483 } 484 } 485 catStrings[lastCat] += " " + Integer.toHexString(rangeStart); 486 if (rangeEnd != rangeStart) { 487 catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd); 488 } 489 490 for (category = 0; category <= fHeader.fCatCount; category ++) { 491 System.out.println (intToString(category, 5) + " " + catStrings[category]); 492 } 493 System.out.println(); 494 } 495 496 public static void main(String [] args) { 497 String s; 498 if (args.length == 0) { 499 s = "char"; 500 } else { 501 s = args[0]; 502 } 503 System.out.println("RBBIDataWrapper.main(" + s + ") "); 504 505 String versionedName = ICUResourceBundle.ICU_BUNDLE+"/"+ s + ".brk"; 506 507 try { 508 RBBIDataWrapper This = RBBIDataWrapper.get(versionedName); 509 This.dump(); 510 } 511 catch (Exception e) { 512 System.out.println("Exception: " + e.toString()); 513 } 514 515 } 516 517 } 518 | Popular Tags |