1 7 8 package com.ibm.icu.text; 9 10 import java.util.List ; 11 import java.util.ArrayList ; 12 import java.util.Set ; 13 import java.util.HashSet ; 14 import java.util.SortedSet ; 15 import java.util.TreeSet ; 16 import java.util.Iterator ; 17 import java.util.HashMap ; 18 import java.util.Map ; 19 import java.util.Collection ; 20 21 import com.ibm.icu.impl.Assert; 22 import com.ibm.icu.lang.UCharacter; 23 import com.ibm.icu.lang.UProperty; 24 25 class RBBITableBuilder { 34 35 36 37 static class RBBIStateDescriptor { 41 boolean fMarked; 42 int fAccepting; 43 int fLookAhead; 44 SortedSet fTagVals; 45 int fTagsIdx; 46 Set fPositions; 50 int[] fDtran; 55 RBBIStateDescriptor(int maxInputSymbol) { 56 fTagVals = new TreeSet (); 57 fPositions = new HashSet (); 58 fDtran = new int[maxInputSymbol+1]; } 63 } 64 65 66 private RBBIRuleBuilder fRB; 67 private int fRootIx; 71 private List fDStates; 75 RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx) { 83 fRootIx = rootNodeIx; 84 fRB = rb; 85 fDStates = new ArrayList (); 86 } 87 88 89 90 91 void build() { 98 if (fRB.fTreeRoots[fRootIx]==null) { 101 return; 102 } 103 104 fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables(); 109 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) { 110 System.out.println("Parse tree after flattening variable references."); 111 fRB.fTreeRoots[fRootIx].printTree(true); 112 } 113 114 if (fRB.fSetBuilder.sawBOF()) { 121 RBBINode bofTop = new RBBINode(RBBINode.opCat); 122 RBBINode bofLeaf = new RBBINode(RBBINode.leafChar); 123 bofTop.fLeftChild = bofLeaf; 124 bofTop.fRightChild = fRB.fTreeRoots[fRootIx]; 125 bofLeaf.fParent = bofTop; 126 bofLeaf.fVal = 2; fRB.fTreeRoots[fRootIx] = bofTop; 128 } 129 130 RBBINode cn = new RBBINode(RBBINode.opCat); 136 cn.fLeftChild = fRB.fTreeRoots[fRootIx]; 137 fRB.fTreeRoots[fRootIx].fParent = cn; 138 cn.fRightChild = new RBBINode(RBBINode.endMark); 139 cn.fRightChild.fParent = cn; 140 fRB.fTreeRoots[fRootIx] = cn; 141 142 fRB.fTreeRoots[fRootIx].flattenSets(); 147 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) { 148 System.out.println("Parse tree after flattening Unicode Set references."); 149 fRB.fTreeRoots[fRootIx].printTree(true); 150 } 151 152 153 calcNullable(fRB.fTreeRoots[fRootIx]); 161 calcFirstPos(fRB.fTreeRoots[fRootIx]); 162 calcLastPos(fRB.fTreeRoots[fRootIx]); 163 calcFollowPos(fRB.fTreeRoots[fRootIx]); 164 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) { 165 System.out.print("\n"); 166 printPosSets(fRB.fTreeRoots[fRootIx]); 167 } 168 169 if (fRB.fChainRules) { 173 calcChainedFollowPos(fRB.fTreeRoots[fRootIx]); 174 } 175 176 if (fRB.fSetBuilder.sawBOF()) { 180 bofFixup(); 181 } 182 183 buildStateTable(); 187 flagAcceptingStates(); 188 flagLookAheadStates(); 189 flagTaggedStates(); 190 191 mergeRuleStatusVals(); 197 198 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("states")>=0) {printStates();}; 199 } 200 201 202 203 void calcNullable(RBBINode n) { 209 if (n == null) { 210 return; 211 } 212 if (n.fType == RBBINode.setRef || 213 n.fType == RBBINode.endMark ) { 214 n.fNullable = false; 216 return; 217 } 218 219 if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) { 220 n.fNullable = true; 223 return; 224 } 225 226 227 calcNullable(n.fLeftChild); 230 calcNullable(n.fRightChild); 231 232 if (n.fType == RBBINode.opOr) { 234 n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable; 235 } 236 else if (n.fType == RBBINode.opCat) { 237 n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable; 238 } 239 else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) { 240 n.fNullable = true; 241 } 242 else { 243 n.fNullable = false; 244 } 245 } 246 247 248 249 250 void calcFirstPos(RBBINode n) { 256 if (n == null) { 257 return; 258 } 259 if (n.fType == RBBINode.leafChar || 260 n.fType == RBBINode.endMark || 261 n.fType == RBBINode.lookAhead || 262 n.fType == RBBINode.tag) { 263 n.fFirstPosSet.add(n); 265 return; 266 } 267 268 calcFirstPos(n.fLeftChild); 271 calcFirstPos(n.fRightChild); 272 273 if (n.fType == RBBINode.opOr) { 275 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 276 n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet); 277 } 278 else if (n.fType == RBBINode.opCat) { 279 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 280 if (n.fLeftChild.fNullable) { 281 n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet); 282 } 283 } 284 else if (n.fType == RBBINode.opStar || 285 n.fType == RBBINode.opQuestion || 286 n.fType == RBBINode.opPlus) { 287 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 288 } 289 } 290 291 292 293 void calcLastPos(RBBINode n) { 299 if (n == null) { 300 return; 301 } 302 if (n.fType == RBBINode.leafChar || 303 n.fType == RBBINode.endMark || 304 n.fType == RBBINode.lookAhead || 305 n.fType == RBBINode.tag) { 306 n.fLastPosSet.add(n); 308 return; 309 } 310 311 calcLastPos(n.fLeftChild); 314 calcLastPos(n.fRightChild); 315 316 if (n.fType == RBBINode.opOr) { 318 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 319 n.fLastPosSet.addAll(n.fRightChild.fLastPosSet); 320 } 321 else if (n.fType == RBBINode.opCat) { 322 n.fLastPosSet.addAll(n.fRightChild.fLastPosSet); 323 if (n.fRightChild.fNullable) { 324 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 325 } 326 } 327 else if (n.fType == RBBINode.opStar || 328 n.fType == RBBINode.opQuestion || 329 n.fType == RBBINode.opPlus) { 330 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 331 } 332 } 333 334 335 336 void calcFollowPos(RBBINode n) { 342 if (n == null || 343 n.fType == RBBINode.leafChar || 344 n.fType == RBBINode.endMark) { 345 return; 346 } 347 348 calcFollowPos(n.fLeftChild); 349 calcFollowPos(n.fRightChild); 350 351 if (n.fType == RBBINode.opCat) { 353 RBBINode i; 355 Set LastPosOfLeftChild = n.fLeftChild.fLastPosSet; 356 357 Iterator ix = LastPosOfLeftChild.iterator(); 358 while (ix.hasNext()) { 359 i = (RBBINode )ix.next(); 360 i.fFollowPos.addAll(n.fRightChild.fFirstPosSet); 361 } 362 } 363 364 if (n.fType == RBBINode.opStar || 366 n.fType == RBBINode.opPlus) { 367 RBBINode i; Iterator ix = n.fLastPosSet.iterator(); 369 while (ix.hasNext()) { 370 i = (RBBINode) ix.next(); 371 i.fFollowPos.addAll(n.fFirstPosSet); 372 } 373 374 } 375 376 377 378 } 379 380 381 void calcChainedFollowPos(RBBINode tree) { 388 389 List endMarkerNodes = new ArrayList (); 390 List leafNodes = new ArrayList (); 391 392 tree.findNodes(endMarkerNodes, RBBINode.endMark); 394 395 tree.findNodes(leafNodes, RBBINode.leafChar); 397 398 RBBINode userRuleRoot = tree; 402 if (fRB.fSetBuilder.sawBOF()) { 403 userRuleRoot = tree.fLeftChild.fRightChild; 404 } 405 Assert.assrt(userRuleRoot != null); 406 Set matchStartNodes = userRuleRoot.fFirstPosSet; 407 408 409 Iterator endNodeIx = leafNodes.iterator(); 412 413 while (endNodeIx.hasNext()) { 414 RBBINode tNode = (RBBINode)endNodeIx.next(); 415 RBBINode endNode = null; 416 417 Iterator i = endMarkerNodes.iterator(); 420 while (i.hasNext()) { 421 RBBINode endMarkerNode = (RBBINode)i.next(); 422 if (tNode.fFollowPos.contains(endMarkerNode)) { 423 endNode = tNode; 424 break; 425 } 426 } 427 if (endNode == null) { 428 continue; 430 } 431 432 434 if (fRB.fLBCMNoChain) { 439 int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal); 440 if (c != -1) { 441 int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK); 443 if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) { 444 continue; 445 } 446 } 447 } 448 449 450 RBBINode startNode; 453 Iterator startNodeIx = matchStartNodes.iterator(); 454 while (startNodeIx.hasNext()) { 455 startNode = (RBBINode )startNodeIx.next(); 456 if (startNode.fType != RBBINode.leafChar) { 457 continue; 458 } 459 460 if (endNode.fVal == startNode.fVal) { 461 464 endNode.fFollowPos.addAll(startNode.fFollowPos); 469 } 470 } 471 } 472 } 473 474 475 void bofFixup() { 486 RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild; 498 Assert.assrt(bofNode.fType == RBBINode.leafChar); 499 Assert.assrt(bofNode.fVal == 2); 500 501 Set matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet; 507 Iterator startNodeIt = matchStartNodes.iterator(); 508 while (startNodeIt.hasNext()) { 509 RBBINode startNode = (RBBINode)startNodeIt.next(); 510 if (startNode.fType != RBBINode.leafChar) { 511 continue; 512 } 513 514 if (startNode.fVal == bofNode.fVal) { 515 bofNode.fFollowPos.addAll(startNode.fFollowPos); 521 } 522 } 523 } 524 525 void buildStateTable() { 535 int lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1; 538 RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol); 539 fDStates.add(failState); 540 541 RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol); 544 initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet); 545 fDStates.add(initialState); 546 547 for (;;) { 549 RBBIStateDescriptor T = null; 550 int tx; 551 for (tx=1; tx<fDStates.size(); tx++) { 552 RBBIStateDescriptor temp = (RBBIStateDescriptor )fDStates.get(tx); 553 if (temp.fMarked == false) { 554 T = temp; 555 break; 556 } 557 } 558 if (T == null) { 559 break; 560 } 561 562 T.fMarked = true; 564 565 int a; 567 for (a = 1; a<=lastInputSymbol; a++) { 568 Set U = null; 572 RBBINode p; 573 Iterator pit = T.fPositions.iterator(); 574 while (pit.hasNext()) { 575 p = (RBBINode )pit.next(); 576 if ((p.fType == RBBINode.leafChar) && (p.fVal == a)) { 577 if (U == null) { 578 U = new HashSet (); 579 } 580 U.addAll(p.fFollowPos); 581 } 582 } 583 584 int ux = 0; 586 boolean UinDstates = false; 587 if (U != null) { 588 Assert.assrt(U.size() > 0); 589 int ix; 590 for (ix=0; ix<fDStates.size(); ix++) { 591 RBBIStateDescriptor temp2; 592 temp2 = (RBBIStateDescriptor )fDStates.get(ix); 593 if (U.equals(temp2.fPositions)) { 594 U = temp2.fPositions; 595 ux = ix; 596 UinDstates = true; 597 break; 598 } 599 } 600 601 if (!UinDstates) 603 { 604 RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol); 605 newState.fPositions = U; 606 fDStates.add(newState); 607 ux = fDStates.size()-1; 608 } 609 610 T.fDtran[a] = ux; 612 } 613 } 614 } 615 } 616 617 618 619 void flagAcceptingStates() { 629 List endMarkerNodes = new ArrayList (); 630 RBBINode endMarker; 631 int i; 632 int n; 633 634 fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark); 635 636 for (i=0; i<endMarkerNodes.size(); i++) { 637 endMarker = (RBBINode)endMarkerNodes.get(i); 638 for (n=0; n<fDStates.size(); n++) { 639 RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n); 640 if (sd.fPositions.contains(endMarker)) { 642 646 if (sd.fAccepting==0) { 647 sd.fAccepting = endMarker.fVal; 649 if (sd.fAccepting == 0) { 650 sd.fAccepting = -1; 651 } 652 } 653 if (sd.fAccepting==-1 && endMarker.fVal != 0) { 654 sd.fAccepting = endMarker.fVal; 658 } 659 662 if (endMarker.fLookAheadEnd) { 665 sd.fLookAhead = sd.fAccepting; 669 } 670 } 671 } 672 } 673 } 674 675 676 void flagLookAheadStates() { 682 List lookAheadNodes = new ArrayList (); 683 RBBINode lookAheadNode; 684 int i; 685 int n; 686 687 fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead); 688 for (i=0; i<lookAheadNodes.size(); i++) { 689 lookAheadNode = (RBBINode )lookAheadNodes.get(i); 690 691 for (n=0; n<fDStates.size(); n++) { 692 RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n); 693 if (sd.fPositions.contains(lookAheadNode)) { 694 sd.fLookAhead = lookAheadNode.fVal; 695 } 696 } 697 } 698 } 699 700 701 702 703 void flagTaggedStates() { 709 List tagNodes = new ArrayList (); 710 RBBINode tagNode; 711 int i; 712 int n; 713 714 fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag); 715 for (i=0; i<tagNodes.size(); i++) { tagNode = (RBBINode )tagNodes.get(i); 717 718 for (n=0; n<fDStates.size(); n++) { RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n); 720 if (sd.fPositions.contains(tagNode)) { sd.fTagVals.add(new Integer (tagNode.fVal)); 722 } 723 } 724 } 725 } 726 727 728 729 752 void mergeRuleStatusVals() { 753 int i; 764 int n; 765 766 if (fRB.fRuleStatusVals.size() == 0) { 770 fRB.fRuleStatusVals.add(new Integer (1)); fRB.fRuleStatusVals.add(new Integer (0)); 773 SortedSet s0 = new TreeSet (); 774 Integer izero = new Integer (0); 775 fRB.fStatusSets.put(s0, izero); 776 SortedSet s1 = new TreeSet (); 777 s1.add(izero); 778 fRB.fStatusSets.put(s0, izero); 779 } 780 781 for (n=0; n<fDStates.size(); n++) { 784 RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n); 785 Set statusVals = sd.fTagVals; 786 Integer arrayIndexI = (Integer )fRB.fStatusSets.get(statusVals); 787 if (arrayIndexI == null) { 788 arrayIndexI = new Integer (fRB.fRuleStatusVals.size()); 793 fRB.fStatusSets.put(statusVals, arrayIndexI); 794 795 fRB.fRuleStatusVals.add(new Integer (statusVals.size())); 798 Iterator it = statusVals.iterator(); 799 while (it.hasNext()) { 800 fRB.fRuleStatusVals.add(it.next()); 801 } 802 803 } 804 805 sd.fTagsIdx = arrayIndexI.intValue(); 807 } 808 } 809 810 811 812 813 814 815 816 823 void printPosSets(RBBINode n) { 824 if (n==null) { 825 return; 826 } 827 RBBINode.printNode(n); 828 System.out.print(" Nullable: " + n.fNullable); 829 830 System.out.print(" firstpos: "); 831 printSet(n.fFirstPosSet); 832 833 System.out.print(" lastpos: "); 834 printSet(n.fLastPosSet); 835 836 System.out.print(" followpos: "); 837 printSet(n.fFollowPos); 838 839 printPosSets(n.fLeftChild); 840 printPosSets(n.fRightChild); 841 } 842 843 844 845 846 int getTableSize() { 856 int size = 0; 857 int numRows; 858 int numCols; 859 int rowSize; 860 861 if (fRB.fTreeRoots[fRootIx] == null) { 862 return 0; 863 } 864 865 size = 16; 867 numRows = fDStates.size(); 868 numCols = fRB.fSetBuilder.getNumCharCategories(); 869 870 rowSize = 8 + 2*numCols; 875 size += numRows * rowSize; 876 while (size % 8 > 0) { size++; 878 } 879 880 return size; 881 } 882 883 884 885 899 short [] exportTable() { 900 int state; 901 int col; 902 903 if (fRB.fTreeRoots[fRootIx] == null) { 904 return new short[0]; 905 } 906 907 Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff && 908 fDStates.size() < 0x7fff); 909 910 int numStates = fDStates.size(); 911 912 int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories(); 915 int tableSize = getTableSize() / 2; 916 917 918 short [] table = new short[tableSize]; 919 920 table[RBBIDataWrapper.NUMSTATES] = (short)(numStates >>> 16); 926 table[RBBIDataWrapper.NUMSTATES+1] = (short)(numStates & 0x0000ffff); 927 928 table[RBBIDataWrapper.ROWLEN] = (short)(rowLen >>> 16); 930 table[RBBIDataWrapper.ROWLEN+1] = (short)(rowLen & 0x0000ffff); 931 932 int flags = 0; 934 if (fRB.fLookAheadHardBreak) { 935 flags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK; 936 } 937 if (fRB.fSetBuilder.sawBOF()) { 938 flags |= RBBIDataWrapper.RBBI_BOF_REQUIRED; 939 } 940 table[RBBIDataWrapper.FLAGS] = (short)(flags >>> 16); 941 table[RBBIDataWrapper.FLAGS+1] = (short)(flags & 0x0000ffff); 942 943 int numCharCategories = fRB.fSetBuilder.getNumCharCategories(); 944 for (state=0; state<numStates; state++) { 945 RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(state); 946 int row = 8 + state*rowLen; 947 Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767); 948 Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767); 949 table[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting; 950 table[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead; 951 table[row + RBBIDataWrapper.TAGIDX] = (short)sd.fTagsIdx; 952 for (col=0; col<numCharCategories; col++) { 953 table[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col]; 954 } 955 } 956 return table; 957 } 958 959 960 961 967 void printSet(Collection s) { 968 Iterator it = s.iterator(); 969 while (it.hasNext()) { 970 RBBINode n = (RBBINode)it.next(); 971 RBBINode.printInt(n.fSerialNum, 8); 972 } 973 System.out.println(); 974 } 975 976 977 978 984 void printStates() { 985 int c; int n; 988 System.out.print("state | i n p u t s y m b o l s \n"); 989 System.out.print(" | Acc LA Tag"); 990 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 991 RBBINode.printInt((int)c, 3); 992 } 993 System.out.print("\n"); 994 System.out.print(" |---------------"); 995 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 996 System.out.print("---"); 997 } 998 System.out.print("\n"); 999 1000 for (n=0; n<fDStates.size(); n++) { 1001 RBBIStateDescriptor sd = (RBBIStateDescriptor)fDStates.get(n); 1002 RBBINode.printInt(n, 5); 1003 System.out.print(" | "); 1004 1005 RBBINode.printInt(sd.fAccepting, 3); 1006 RBBINode.printInt(sd.fLookAhead, 4); 1007 RBBINode.printInt(sd.fTagsIdx, 6); 1008 System.out.print(" "); 1009 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 1010 RBBINode.printInt(sd.fDtran[c], 3); 1011 } 1012 System.out.print("\n"); 1013 } 1014 System.out.print("\n\n"); 1015 } 1016 1017 1018 1019 1020 1026 void printRuleStatusTable() { 1027 int thisRecord = 0; 1028 int nextRecord = 0; 1029 int i; 1030 List tbl = fRB.fRuleStatusVals; 1031 1032 System.out.print("index | tags \n"); 1033 System.out.print("-------------------\n"); 1034 1035 while (nextRecord < tbl.size()) { 1036 thisRecord = nextRecord; 1037 nextRecord = thisRecord + ((Integer )tbl.get(thisRecord)).intValue() + 1; 1038 RBBINode.printInt(thisRecord, 7); 1039 for (i=thisRecord+1; i<nextRecord; i++) { 1040 int val = ((Integer )tbl.get(i)).intValue(); 1041 RBBINode.printInt(val, 7); 1042 } 1043 System.out.print("\n"); 1044 } 1045 System.out.print("\n\n"); 1046 } 1047 1048 1049 1050} 1051 | Popular Tags |