1 20 package JFlex; 21 22 import java.util.*; 23 import java.io.*; 24 25 26 34 final public class NFA { 35 36 StateSet [][] table; 39 40 StateSet [] epsilon; 43 44 boolean [] isFinal; 46 47 boolean [] isPushback; 50 51 Action [] action; 54 55 int numStates; 57 58 int numInput; 60 61 int numLexStates; 64 65 int estSize = 256; 67 68 Macros macros; 69 CharClasses classes; 70 71 LexScan scanner; 72 RegExps regExps; 73 74 private static StateSetEnumerator states = new StateSetEnumerator(); 76 private static StateSet tempStateSet = new StateSet(); 77 78 public NFA(int numInput, int estSize) { 79 this.numInput = numInput; 80 this.estSize = estSize; 81 numStates = 0; 82 epsilon = new StateSet [estSize]; 83 action = new Action [estSize]; 84 isFinal = new boolean [estSize]; 85 isPushback = new boolean [estSize]; 86 table = new StateSet [estSize][numInput]; 87 } 88 89 public NFA(int numInput, LexScan scanner, RegExps regExps, 90 Macros macros, CharClasses classes) { 91 this(numInput, regExps.NFASize(macros)+2*scanner.states.number()); 92 93 this.scanner = scanner; 94 this.regExps = regExps; 95 this.macros = macros; 96 this.classes = classes; 97 98 numLexStates = scanner.states.number(); 99 100 ensureCapacity(2*numLexStates); 101 102 numStates = 2*numLexStates; 103 } 104 105 public void addStandaloneRule() { 106 int start = numStates; 109 int end = numStates+1; 110 111 for (int c = 0; c < classes.getNumClasses(); c++) 112 addTransition(start, c, end); 113 114 for (int i = 0; i < numLexStates*2; i++) 115 addEpsilonTransition(i, start); 116 117 action[end] = new Action("System.out.print(yytext());", Integer.MAX_VALUE); 118 isFinal[end] = true; 119 } 120 121 122 public void addRegExp(int regExpNum) { 123 124 if (Options.DEBUG) 125 Out.debug("Adding nfa for regexp "+regExpNum+" :"+Out.NL+regExps.getRegExp(regExpNum)); 126 127 IntPair nfa = insertNFA( regExps.getRegExp(regExpNum) ); 128 129 Enumeration lexStates = regExps.getStates(regExpNum).elements(); 130 131 if ( !lexStates.hasMoreElements() ) 132 lexStates = scanner.states.getInclusiveStates(); 133 134 while ( lexStates.hasMoreElements() ) { 135 int stateNum = ((Integer )lexStates.nextElement()).intValue(); 136 137 if ( !regExps.isBOL(regExpNum) ) 138 addEpsilonTransition(2*stateNum, nfa.start); 139 140 addEpsilonTransition(2*stateNum+1, nfa.start); 141 } 142 143 144 if ( regExps.getLookAhead(regExpNum) != null ) { 145 IntPair look = insertNFA( regExps.getLookAhead(regExpNum) ); 146 147 addEpsilonTransition(nfa.end, look.start); 148 149 Action a = regExps.getAction(regExpNum); 150 a.setLookAction(true); 151 152 isPushback[nfa.end] = true; 153 action[look.end] = a; 154 isFinal[look.end] = true; 155 } 156 else { 157 action[nfa.end] = regExps.getAction(regExpNum); 158 isFinal[nfa.end] = true; 159 } 160 } 161 162 163 private void ensureCapacity(int newNumStates) { 164 int oldLength = epsilon.length; 165 166 if ( newNumStates < oldLength ) return; 167 168 int newStatesLength = Math.max(oldLength*2, newNumStates); 169 170 boolean [] newFinal = new boolean [newStatesLength]; 171 boolean [] newIsPush = new boolean [newStatesLength]; 172 Action [] newAction = new Action [newStatesLength]; 173 StateSet [] [] newTable = new StateSet [newStatesLength] [numInput]; 174 StateSet [] newEpsilon = new StateSet [newStatesLength]; 175 176 System.arraycopy(isFinal,0,newFinal,0,numStates); 177 System.arraycopy(isPushback,0,newIsPush,0,numStates); 178 System.arraycopy(action,0,newAction,0,numStates); 179 System.arraycopy(epsilon,0,newEpsilon,0,numStates); 180 System.arraycopy(table,0,newTable,0,numStates); 181 182 isFinal = newFinal; 183 isPushback = newIsPush; 184 action = newAction; 185 epsilon = newEpsilon; 186 table = newTable; 187 } 188 189 public void addTransition(int start, int input, int dest) { 190 Out.debug("Adding transition ("+start+", "+input+", "+dest+")"); 191 192 int maxS = Math.max(start,dest)+1; 193 194 ensureCapacity( maxS ); 195 196 if (maxS > numStates) numStates = maxS; 197 198 if ( table[start][input] != null ) 199 table[start][input].addState(dest); 200 else 201 table[start][input] = new StateSet(estSize,dest); 202 } 203 204 public void addEpsilonTransition(int start, int dest) { 205 int max = Math.max(start,dest)+1; 206 ensureCapacity( max ); 207 if (max > numStates) numStates = max; 208 209 if (epsilon[start] != null) 210 epsilon[start].addState(dest); 211 else 212 epsilon[start] = new StateSet(estSize,dest); 213 } 214 215 216 222 private boolean containsFinal(StateSet set) { 223 states.reset(set); 224 225 while ( states.hasMoreElements() ) 226 if ( isFinal[states.nextElement()] ) return true; 227 228 return false; 229 } 230 231 232 238 private boolean containsPushback(StateSet set) { 239 states.reset(set); 240 241 while ( states.hasMoreElements() ) 242 if ( isPushback[states.nextElement()] ) return true; 243 244 return false; 245 } 246 247 248 254 private Action getAction(StateSet set) { 255 256 states.reset(set); 257 258 Action maxAction = null; 259 260 Out.debug("Determining action of : "+set); 261 262 while ( states.hasMoreElements() ) { 263 264 Action currentAction = action[ states.nextElement() ]; 265 266 if ( currentAction != null ) { 267 if (maxAction == null) 268 maxAction = currentAction; 269 else 270 maxAction = maxAction.getHigherPriority(currentAction); 271 } 272 273 } 274 275 return maxAction; 276 } 277 278 279 290 private StateSet closure(int startState) { 291 292 294 StateSet notvisited = tempStateSet; 295 StateSet closure = new StateSet(numStates,startState); 296 297 notvisited.clear(); 298 notvisited.addState(startState); 299 300 while ( notvisited.containsElements() ) { 301 int state = notvisited.getAndRemoveElement(); 304 notvisited.add(closure.complement(epsilon[state])); 307 closure.add(epsilon[state]); 308 } 309 310 312 return closure; 313 } 314 315 318 private StateSet closure(StateSet startStates) { 319 StateSet result = new StateSet(numStates); 320 321 if (startStates != null) { 322 states.reset(startStates); 323 while (states.hasMoreElements()) 324 result.add( closure(states.nextElement()) ); 325 } 326 327 return result; 328 } 329 330 331 private void epsilonFill() { 332 for (int i = 0; i < numStates; i++) { 333 epsilon[i] = closure(i); 334 } 335 } 336 337 348 private StateSet DFAEdge(StateSet start, char input) { 349 351 tempStateSet.clear(); 352 353 states.reset(start); 354 while ( states.hasMoreElements() ) 355 tempStateSet.add( table[states.nextElement()][input] ); 356 357 StateSet result = new StateSet(tempStateSet); 358 359 states.reset(tempStateSet); 360 while ( states.hasMoreElements() ) 361 result.add( epsilon[states.nextElement()] ); 362 363 365 return result; 366 } 367 368 369 373 public DFA getDFA() { 374 375 Hashtable dfaStates = new Hashtable(numStates); 376 Vector dfaVector = new Vector(numStates); 377 378 DFA dfa = new DFA(2*numLexStates, numInput); 379 380 int numDFAStates = 0; 381 int currentDFAState = 0; 382 383 Out.println("Converting NFA to DFA : "); 384 385 epsilonFill(); 386 387 StateSet currentState, newState; 388 389 for ( int i = 0; i < 2*numLexStates; i++ ) { 390 newState = epsilon[i]; 391 392 dfaStates.put(newState, new Integer (numDFAStates)); 393 dfaVector.addElement(newState); 394 395 dfa.setLexState( i, numDFAStates ); 396 397 dfa.setFinal( numDFAStates, containsFinal(newState) ); 398 dfa.setPushback( numDFAStates, containsPushback(newState) ); 399 dfa.setAction( numDFAStates, getAction(newState) ); 400 401 numDFAStates++; 402 } 403 404 numDFAStates--; 405 406 if (Options.DEBUG) 407 Out.debug("DFA start states are :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector); 408 409 currentDFAState = 0; 410 411 StateSet tempStateSet = NFA.tempStateSet; 412 StateSetEnumerator states = NFA.states; 413 414 newState = new StateSet(numStates); 416 417 while ( currentDFAState <= numDFAStates ) { 418 419 currentState = (StateSet) dfaVector.elementAt(currentDFAState); 420 421 for (char input = 0; input < numInput; input++) { 422 423 425 427 429 tempStateSet.clear(); 430 states.reset(currentState); 431 while ( states.hasMoreElements() ) 432 tempStateSet.add( table[states.nextElement()][input] ); 433 434 newState.copy(tempStateSet); 435 436 states.reset(tempStateSet); 437 while ( states.hasMoreElements() ) 438 newState.add( epsilon[states.nextElement()] ); 439 440 442 443 if ( newState.containsElements() ) { 444 445 447 Integer nextDFAState = (Integer ) dfaStates.get(newState); 449 450 if ( nextDFAState != null ) { 451 dfa.addTransition(currentDFAState, input, nextDFAState.intValue()); 453 } 454 else { 455 if (Options.progress) Out.print("."); 456 numDFAStates++; 459 460 StateSet storeState = new StateSet(newState); 462 463 dfaStates.put(storeState, new Integer (numDFAStates)); 464 dfaVector.addElement(storeState); 465 466 dfa.addTransition(currentDFAState, input, numDFAStates); 467 dfa.setFinal( numDFAStates, containsFinal(storeState) ); 468 dfa.setPushback( numDFAStates, containsPushback(storeState) ); 469 dfa.setAction( numDFAStates, getAction(storeState) ); 470 } 471 } 472 } 473 474 currentDFAState++; 475 } 476 477 if (Options.verbose) Out.println(""); 478 479 return dfa; 480 } 481 482 483 public void dumpTable() { 484 Out.dump(toString()); 485 } 486 487 public String toString() { 488 StringBuffer result = new StringBuffer (); 489 490 for (int i=0; i < numStates; i++) { 491 result.append("State"); 492 if ( isFinal[i] ) result.append("[FINAL]"); 493 if ( isPushback[i] ) result.append(" [PUSHBACK]"); 494 result.append(" "+i+Out.NL); 495 496 for (char input = 0; input < numInput; input++) { 497 if ( table[i][input] != null && table[i][input].containsElements() ) 498 result.append(" with "+((int) input)+" in "+table[i][input]+Out.NL); 499 500 501 } 502 503 if ( epsilon[i] != null && epsilon[i].containsElements() ) 504 result.append(" with epsilon in "+epsilon[i]+Out.NL); 505 } 506 507 return result.toString(); 508 } 509 510 public void writeDot(File file) { 511 try { 512 PrintWriter writer = new PrintWriter(new FileWriter(file)); 513 writer.println(dotFormat()); 514 writer.close(); 515 } 516 catch (IOException e) { 517 Out.error(ErrorMessages.FILE_WRITE, file); 518 throw new GeneratorException(); 519 } 520 } 521 522 public String dotFormat() { 523 StringBuffer result = new StringBuffer (); 524 525 result.append("digraph NFA {"+Out.NL); 526 result.append("rankdir = LR"+Out.NL); 527 528 for (int i=0; i < numStates; i++) { 529 if ( isFinal[i] || isPushback[i] ) result.append(i); 530 if ( isFinal[i] ) result.append(" [shape = doublecircle]"); 531 if ( isPushback[i] ) result.append(" [shape = box]"); 532 if ( isFinal[i] || isPushback[i] ) result.append(Out.NL); 533 } 534 535 for (int i=0; i < numStates; i++) { 536 for (int input = 0; input < numInput; input++) { 537 if ( table[i][input] != null ) { 538 StateSetEnumerator states = table[i][input].states(); 539 540 while (states.hasMoreElements()) { 541 int s = states.nextElement(); 542 result.append(i+" -> "+s); 543 result.append(" [label=\""+classes.toString(input)+"\"]"+Out.NL); 544 } 545 } 546 } 547 if ( epsilon[i] != null ) { 548 StateSetEnumerator states = epsilon[i].states(); 549 while (states.hasMoreElements()) { 550 int s = states.nextElement(); 551 result.append(i+" -> "+s+" [style=dotted]"+Out.NL); 552 } 553 } 554 } 555 556 result.append("}"+Out.NL); 557 558 return result.toString(); 559 } 560 561 562 565 private void insertLetterNFA(boolean caseless, char letter, int start, int end) { 566 if (caseless) { 567 int lower = classes.getClassCode(Character.toLowerCase(letter)); 568 int upper = classes.getClassCode(Character.toUpperCase(letter)); 569 addTransition(start, lower, end); 570 if (upper != lower) addTransition(start, upper, end); 571 } 572 else { 573 addTransition(start, classes.getClassCode(letter), end); 574 } 575 } 576 577 private IntPair insertStringNFA(boolean caseless, String letters) { 578 int start = numStates; 579 int i; 580 581 for (i = 0; i < letters.length(); i++) { 582 if (caseless) { 583 char c = letters.charAt(i); 584 int lower = classes.getClassCode(Character.toLowerCase(c)); 585 int upper = classes.getClassCode(Character.toUpperCase(c)); 586 addTransition(i+start, lower, i+start+1); 587 if (upper != lower) addTransition(i+start, upper, i+start+1); 588 } 589 else { 590 addTransition(i+start, classes.getClassCode(letters.charAt(i)), i+start+1); 591 } 592 } 593 594 return new IntPair(start, i+start); 595 } 596 597 598 private void insertClassNFA(Vector intervalls, int start, int end) { 599 if (intervalls == null) return; 601 602 int [] cl = classes.getClassCodes(intervalls); 603 for (int i = 0; i < cl.length; i++) 604 addTransition(start, cl[i], end); 605 } 606 607 private void insertNotClassNFA(Vector intervalls, int start, int end) { 608 int [] cl = classes.getNotClassCodes(intervalls); 609 610 for (int i = 0; i < cl.length; i++) 611 addTransition(start, cl[i], end); 612 } 613 614 615 627 private IntPair complement(IntPair nfa) { 628 629 if (Options.DEBUG) { 630 Out.debug("complement for "+nfa); 631 Out.debug("NFA is :"+Out.NL+this); 632 } 633 634 int dfaStart = nfa.end+1; 635 636 epsilonFill(); 638 639 Hashtable dfaStates = new Hashtable(numStates); 640 Vector dfaVector = new Vector(numStates); 641 642 int numDFAStates = 0; 643 int currentDFAState = 0; 644 645 StateSet currentState, newState; 646 647 newState = epsilon[nfa.start]; 648 dfaStates.put(newState, new Integer (numDFAStates)); 649 dfaVector.addElement(newState); 650 651 if (Options.DEBUG) 652 Out.debug("pos DFA start state is :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector); 653 654 currentDFAState = 0; 655 656 while ( currentDFAState <= numDFAStates ) { 657 658 currentState = (StateSet) dfaVector.elementAt(currentDFAState); 659 660 for (char input = 0; input < numInput; input++) { 661 newState = DFAEdge(currentState, input); 662 663 if ( newState.containsElements() ) { 664 665 667 Integer nextDFAState = (Integer ) dfaStates.get(newState); 669 670 if ( nextDFAState != null ) { 671 addTransition(dfaStart+currentDFAState, input, dfaStart+nextDFAState.intValue()); 673 } 674 else { 675 if (Options.dump) Out.print("+"); 676 numDFAStates++; 679 680 dfaStates.put(newState, new Integer (numDFAStates)); 681 dfaVector.addElement(newState); 682 683 addTransition(dfaStart+currentDFAState, input, dfaStart+numDFAStates); 684 } 685 } 686 } 687 688 currentDFAState++; 689 } 690 691 693 if (Options.DEBUG) 695 Out.debug("dfa finished, nfa is now :"+Out.NL+this); 696 697 int start = dfaStart+numDFAStates+1; 698 int error = dfaStart+numDFAStates+2; 699 int end = dfaStart+numDFAStates+3; 700 701 addEpsilonTransition(start, dfaStart); 702 703 for (int i = 0; i < numInput; i++) 704 addTransition(error, i, error); 705 706 addEpsilonTransition(error, end); 707 708 for (int s = 0; s <= numDFAStates; s++) { 709 currentState = (StateSet) dfaVector.elementAt(s); 710 711 currentDFAState = dfaStart+s; 712 713 if (!currentState.isElement(nfa.end)) 715 addEpsilonTransition(currentDFAState, end); 716 717 for (int i = 0; i < numInput; i++) 720 if (table[currentDFAState][i] == null) 721 addTransition(currentDFAState, i, error); 722 } 723 724 if (live == null || live.length < numStates) { 726 live = new boolean [2*numStates]; 727 visited = new boolean [2*numStates]; 728 } 729 730 _end = end; 731 _dfaStates = dfaVector; 732 _dfaStart = dfaStart; 733 removeDead(dfaStart); 734 735 if (Options.DEBUG) 736 Out.debug("complement finished, nfa ("+start+","+end+") is now :"+this); 737 738 return new IntPair(start, end); 739 } 740 741 private boolean [] live; private boolean [] visited; private int _end; private Vector _dfaStates; 747 private int _dfaStart; 749 private void removeDead(int start) { 750 752 if ( visited[start] || live[start] ) return; 753 visited[start] = true; 754 755 757 if (closure(start).isElement(_end)) 758 live[start] = true; 759 760 762 for (int i = 0; i < numInput; i++) { 763 StateSet nextState = closure(table[start][i]); 764 StateSetEnumerator states = nextState.states(); 765 while (states.hasMoreElements()) { 766 int next = states.nextElement(); 767 768 if (next != start) { 769 removeDead(next); 770 771 if (live[next]) 772 live[start] = true; 773 else 774 table[start][i] = null; 775 } 776 } 777 } 778 779 StateSet nextState = closure(epsilon[start]); 780 StateSetEnumerator states = nextState.states(); 781 while (states.hasMoreElements()) { 782 int next = states.nextElement(); 783 784 if (next != start) { 785 removeDead(next); 786 787 if (live[next]) 788 live[start] = true; 789 } 790 } 791 792 } 794 795 796 813 private void insertNFA(RegExp regExp, int start, int end) { 814 switch (regExp.type) { 815 816 case sym.BAR: 817 RegExp2 r = (RegExp2) regExp; 818 insertNFA(r.r1, start, end); 819 insertNFA(r.r2, start, end); 820 return; 821 822 case sym.CCLASS: 823 insertClassNFA( (Vector) ((RegExp1) regExp).content, start, end); 824 return; 825 826 case sym.CCLASSNOT: 827 insertNotClassNFA( (Vector) ((RegExp1) regExp).content, start, end); 828 return; 829 830 case sym.CHAR: 831 insertLetterNFA( 832 false, ((Character ) ((RegExp1) regExp).content).charValue(), 833 start, end); 834 return; 835 836 case sym.CHAR_I: 837 insertLetterNFA( 838 true, ((Character ) ((RegExp1) regExp).content).charValue(), 839 start, end); 840 return; 841 842 case sym.MACROUSE: 843 insertNFA(macros.getDefinition((String ) ((RegExp1) regExp).content), 844 start, end); 845 return; 846 } 847 848 throw new Error ("Unknown expression type "+regExp.type+" in NFA construction"); 849 } 850 851 852 866 public IntPair insertNFA(RegExp regExp) { 867 868 IntPair nfa1, nfa2; 869 int start, end; 870 RegExp2 r; 871 872 if (Options.DEBUG) 873 Out.debug("Inserting RegExp : "+regExp); 874 875 if (regExp.isCharClass(macros)) { 876 start = numStates; 877 end = numStates+1; 878 879 ensureCapacity(end+1); 880 if (end+1 > numStates) numStates = end+1; 881 882 insertNFA(regExp, start, end); 883 884 return new IntPair(start, end); 885 } 886 887 switch (regExp.type) { 888 889 case sym.BAR: 890 891 r = (RegExp2) regExp; 892 893 nfa1 = insertNFA(r.r1); 894 nfa2 = insertNFA(r.r2); 895 896 start = nfa2.end+1; 897 end = nfa2.end+2; 898 899 addEpsilonTransition(start, nfa1.start); 900 addEpsilonTransition(start, nfa2.start); 901 addEpsilonTransition(nfa1.end, end); 902 addEpsilonTransition(nfa2.end, end); 903 904 return new IntPair(start, end); 905 906 case sym.CONCAT: 907 908 r = (RegExp2) regExp; 909 910 nfa1 = insertNFA(r.r1); 911 nfa2 = insertNFA(r.r2); 912 913 addEpsilonTransition(nfa1.end, nfa2.start); 914 915 return new IntPair(nfa1.start, nfa2.end); 916 917 case sym.STAR: 918 nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content ); 919 920 start = nfa1.end+1; 921 end = nfa1.end+2; 922 923 addEpsilonTransition(nfa1.end, end); 924 addEpsilonTransition(start, nfa1.start); 925 926 addEpsilonTransition(start, end); 927 addEpsilonTransition(nfa1.end, nfa1.start); 928 929 return new IntPair(start, end); 930 931 case sym.PLUS: 932 nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content ); 933 934 start = nfa1.end+1; 935 end = nfa1.end+2; 936 937 addEpsilonTransition(nfa1.end, end); 938 addEpsilonTransition(start, nfa1.start); 939 940 addEpsilonTransition(nfa1.end, nfa1.start); 941 942 return new IntPair(start, end); 943 944 case sym.QUESTION: 945 nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content ); 946 947 addEpsilonTransition(nfa1.start, nfa1.end); 948 949 return new IntPair(nfa1.start, nfa1.end); 950 951 case sym.BANG: 952 return complement(insertNFA((RegExp) ((RegExp1) regExp).content)); 953 954 case sym.TILDE: 955 nfa1 = insertNFA((RegExp) ((RegExp1) regExp).content); 956 957 start = nfa1.end+1; 958 int s1 = start+1; 959 int s2 = s1+1; 960 end = s2+1; 961 962 for (int i = 0; i < numInput; i++) { 963 addTransition(s1,i,s1); 964 addTransition(s2,i,s2); 965 } 966 967 addEpsilonTransition(start, s1); 968 addEpsilonTransition(s1, nfa1.start); 969 addEpsilonTransition(nfa1.end, s2); 970 addEpsilonTransition(s2, end); 971 972 nfa1 = complement(new IntPair(start,end)); 973 nfa2 = insertNFA((RegExp) ((RegExp1) regExp).content); 974 975 addEpsilonTransition(nfa1.end, nfa2.start); 976 977 return new IntPair(nfa1.start, nfa2.end); 978 979 case sym.STRING: 980 return insertStringNFA(false, (String ) ((RegExp1) regExp).content ); 981 982 case sym.STRING_I: 983 return insertStringNFA(true, (String ) ((RegExp1) regExp).content ); 984 985 case sym.MACROUSE: 986 return insertNFA(macros.getDefinition((String ) ((RegExp1) regExp).content)); 987 } 988 989 throw new Error ("Unknown expression type "+regExp.type+" in NFA construction"); 990 } 991 } 992 | Popular Tags |