1 7 8 package com.ibm.icu.text; 9 import java.util.List ; 10 import java.util.ArrayList ; 11 import java.util.Iterator ; 12 import java.io.OutputStream ; 13 import java.io.IOException ; 14 15 import com.ibm.icu.impl.Assert; 16 import com.ibm.icu.impl.CharTrie; 17 import com.ibm.icu.impl.Trie; 18 import com.ibm.icu.impl.IntTrieBuilder; 19 20 class RBBISetBuilder { 41 static class RangeDescriptor { 42 int fStartChar; int fEndChar; int fNum; List fIncludesSets; RangeDescriptor fNext; 50 RangeDescriptor() { 51 fIncludesSets = new ArrayList (); 52 } 53 54 RangeDescriptor(RangeDescriptor other) { 55 fStartChar = other.fStartChar; 56 fEndChar = other.fEndChar; 57 fNum = other.fNum; 58 fIncludesSets = new ArrayList (other.fIncludesSets); 59 } 60 61 void split(int where) { 67 Assert.assrt(where>fStartChar && where<=fEndChar); 68 RangeDescriptor nr = new RangeDescriptor(this); 69 70 nr.fStartChar = where; 73 this.fEndChar = where-1; 74 nr.fNext = this.fNext; 75 this.fNext = nr; 76 77 } 81 82 83 void setDictionaryFlag() { 102 int i; 103 104 for (i=0; i<this.fIncludesSets.size(); i++) { 105 RBBINode usetNode = (RBBINode)fIncludesSets.get(i); 106 String setName = ""; 107 RBBINode setRef = usetNode.fParent; 108 if (setRef != null) { 109 RBBINode varRef = setRef.fParent; 110 if (varRef != null && varRef.fType == RBBINode.varRef) { 111 setName = varRef.fText; 112 } 113 } 114 if (setName.equals("dictionary")) { 115 this.fNum |= 0x4000; 116 break; 117 } 118 } 119 120 }; 121 } 122 123 124 RBBIRuleBuilder fRB; RangeDescriptor fRangeList; 127 IntTrieBuilder fTrie; int fTrieSize; 130 int fGroupCount; 137 138 boolean fSawBOF; 139 140 141 RBBISetBuilder(RBBIRuleBuilder rb) 147 { 148 fRB = rb; 149 } 150 151 152 153 void build() { 160 RBBINode usetNode; 161 RangeDescriptor rlRange; 162 163 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("usets")>=0) {printSets();} 164 165 fRangeList = new RangeDescriptor(); 169 fRangeList.fStartChar = 0; 170 fRangeList.fEndChar = 0x10ffff; 171 172 Iterator ni = fRB.fUSetNodes.iterator(); 176 while (ni.hasNext()) { 177 usetNode = (RBBINode)ni.next(); 178 179 UnicodeSet inputSet = usetNode.fInputSet; 180 int inputSetRangeCount = inputSet.getRangeCount(); 181 int inputSetRangeIndex = 0; 182 rlRange = fRangeList; 183 184 for (;;) { 185 if (inputSetRangeIndex >= inputSetRangeCount) { 186 break; 187 } 188 int inputSetRangeBegin = inputSet.getRangeStart(inputSetRangeIndex); 189 int inputSetRangeEnd = inputSet.getRangeEnd(inputSetRangeIndex); 190 191 while (rlRange.fEndChar < inputSetRangeBegin) { 194 rlRange = rlRange.fNext; 195 } 196 197 if (rlRange.fStartChar < inputSetRangeBegin) { 204 rlRange.split(inputSetRangeBegin); 205 continue; 206 } 207 208 if (rlRange.fEndChar > inputSetRangeEnd) { 214 rlRange.split(inputSetRangeEnd+1); 215 } 216 217 if (rlRange.fIncludesSets.indexOf(usetNode) == -1) { 220 rlRange.fIncludesSets.add(usetNode); 221 } 222 223 if (inputSetRangeEnd == rlRange.fEndChar) { 225 inputSetRangeIndex++; 226 } 227 rlRange = rlRange.fNext; 228 } 229 } 230 231 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("range")>=0) { printRanges();} 232 233 RangeDescriptor rlSearchRange; 245 for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) { 246 for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange.fNext) { 247 if (rlRange.fIncludesSets.equals(rlSearchRange.fIncludesSets)) { 248 rlRange.fNum = rlSearchRange.fNum; 249 break; 250 } 251 } 252 if (rlRange.fNum == 0) { 253 fGroupCount ++; 254 rlRange.fNum = fGroupCount+2; 255 rlRange.setDictionaryFlag(); 256 addValToSets(rlRange.fIncludesSets, fGroupCount+2); 257 } 258 } 259 260 270 String eofString = "eof"; 271 String bofString = "bof"; 272 273 ni = fRB.fUSetNodes.iterator(); 274 while (ni.hasNext()) { 275 usetNode = (RBBINode )ni.next(); 276 UnicodeSet inputSet = usetNode.fInputSet; 277 if (inputSet.contains(eofString)) { 278 addValToSet(usetNode, 1); 279 } 280 if (inputSet.contains(bofString)) { 281 addValToSet(usetNode, 2); 282 fSawBOF = true; 283 } 284 } 285 286 287 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();} 288 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();} 289 290 291 295 fTrie = new IntTrieBuilder(null, 100000, 0, 0, true); 301 for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) { 302 fTrie.setRange(rlRange.fStartChar, rlRange.fEndChar+1, rlRange.fNum, true); 303 } 304 } 305 306 307 308 class RBBIDataManipulate implements IntTrieBuilder.DataManipulate { 315 public int getFoldedValue(int start, int offset) { 316 int value; 317 int limit; 318 boolean [] inBlockZero = new boolean[1]; 319 320 limit = start + 0x400; 321 while(start<limit) { 322 value = fTrie.getValue(start, inBlockZero); 323 if (inBlockZero[0]) { 324 start += IntTrieBuilder.DATA_BLOCK_LENGTH; 325 } else if (value != 0) { 326 return offset | 0x08000; 327 } else { 328 ++start; 329 } 330 } 331 return 0; 332 } 333 } 334 RBBIDataManipulate dm = new RBBIDataManipulate(); 335 336 int getTrieSize() { 342 int size = 0; 343 try { 344 size = fTrie.serialize(null, true, dm ); 347 } catch (IOException e) { 348 Assert.assrt (false); 349 } 350 return size; 351 } 352 353 354 void serializeTrie(OutputStream os) throws IOException { 360 fTrie.serialize(os, true, dm ); 361 } 362 363 void addValToSets(List sets, int val) { 378 int ix; 379 380 for (ix=0; ix<sets.size(); ix++) { 381 RBBINode usetNode = (RBBINode )sets.get(ix); 382 addValToSet(usetNode, val); 383 } 384 } 385 386 void addValToSet(RBBINode usetNode, int val) { 387 RBBINode leafNode = new RBBINode(RBBINode.leafChar); 388 leafNode.fVal = val; 389 if (usetNode.fLeftChild == null) { 390 usetNode.fLeftChild = leafNode; 391 leafNode.fParent = usetNode; 392 } else { 393 RBBINode orNode = new RBBINode(RBBINode.opOr); 397 orNode.fLeftChild = usetNode.fLeftChild; 398 orNode.fRightChild = leafNode; 399 orNode.fLeftChild.fParent = orNode; 400 orNode.fRightChild.fParent = orNode; 401 usetNode.fLeftChild = orNode; 402 orNode.fParent = usetNode; 403 } 404 } 405 406 407 int getNumCharCategories() { 413 return fGroupCount + 3; 414 } 415 416 417 boolean sawBOF() { 423 return fSawBOF; 424 } 425 426 427 int getFirstChar(int category) { 434 RangeDescriptor rlRange; 435 int retVal = -1; 436 for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) { 437 if (rlRange.fNum == category) { 438 retVal = rlRange.fStartChar; 439 break; 440 } 441 } 442 return retVal; 443 } 444 445 446 447 void printRanges() { 454 RangeDescriptor rlRange; 455 int i; 456 457 System.out.print("\n\n Nonoverlapping Ranges ...\n"); 458 for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) { 459 System.out.print(" " + rlRange.fNum + " " + (int)rlRange.fStartChar + "-" + (int)rlRange.fEndChar); 460 461 for (i=0; i<rlRange.fIncludesSets.size(); i++) { 462 RBBINode usetNode = (RBBINode )rlRange.fIncludesSets.get(i); 463 String setName = "anon"; 464 RBBINode setRef = usetNode.fParent; 465 if (setRef != null) { 466 RBBINode varRef = setRef.fParent; 467 if (varRef != null && varRef.fType == RBBINode.varRef) { 468 setName = varRef.fText; 469 } 470 } 471 System.out.print(setName); System.out.print(" "); 472 } 473 System.out.println(""); 474 } 475 } 476 477 478 void printRangeGroups() { 485 RangeDescriptor rlRange; 486 RangeDescriptor tRange; 487 int i; 488 int lastPrintedGroupNum = 0; 489 490 System.out.print("\nRanges grouped by Unicode Set Membership...\n"); 491 for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) { 492 int groupNum = rlRange.fNum & 0xbfff; 493 if (groupNum > lastPrintedGroupNum) { 494 lastPrintedGroupNum = groupNum; 495 if (groupNum<10) {System.out.print(" ");} 496 System.out.print(groupNum + " "); 497 498 if ((rlRange.fNum & 0x4000) != 0) { System.out.print(" <DICT> ");} 499 500 for (i=0; i<rlRange.fIncludesSets.size(); i++) { 501 RBBINode usetNode = (RBBINode )rlRange.fIncludesSets.get(i); 502 String setName = "anon"; 503 RBBINode setRef = usetNode.fParent; 504 if (setRef != null) { 505 RBBINode varRef = setRef.fParent; 506 if (varRef != null && varRef.fType == RBBINode.varRef) { 507 setName = varRef.fText; 508 } 509 } 510 System.out.print(setName); System.out.print(" "); 511 } 512 513 i = 0; 514 for (tRange = rlRange; tRange != null; tRange = tRange.fNext) { 515 if (tRange.fNum == rlRange.fNum) { 516 if (i++ % 5 == 0) { 517 System.out.print("\n "); 518 } 519 RBBINode.printHex((int)tRange.fStartChar, -1); 520 System.out.print("-"); 521 RBBINode.printHex((int)tRange.fEndChar, 0); 522 } 523 } 524 System.out.print("\n"); 525 } 526 } 527 System.out.print("\n"); 528 } 529 530 531 void printSets() { 538 int i; 539 System.out.print("\n\nUnicode Sets List\n------------------\n"); 540 for (i=0; i<fRB.fUSetNodes.size(); i++) { 541 RBBINode usetNode; 542 RBBINode setRef; 543 RBBINode varRef; 544 String setName; 545 546 usetNode = (RBBINode )fRB.fUSetNodes.get(i); 547 548 RBBINode.printInt(2, i); 550 setName = "anonymous"; 551 setRef = usetNode.fParent; 552 if (setRef != null) { 553 varRef = setRef.fParent; 554 if (varRef != null && varRef.fType == RBBINode.varRef) { 555 setName = varRef.fText; 556 } 557 } 558 System.out.print(" " + setName); 559 System.out.print(" "); 560 System.out.print(usetNode.fText); 561 System.out.print("\n"); 562 if (usetNode.fLeftChild != null) { 563 usetNode.fLeftChild.printTree(true); 564 } 565 } 566 System.out.print("\n"); 567 } 568 569 570 571 572 } 573 | Popular Tags |