KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > JFlex > NFA


1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * JFlex 1.4.1 *
3  * Copyright (C) 1998-2004 Gerwin Klein <lsf@jflex.de> *
4  * All rights reserved. *
5  * *
6  * This program is free software; you can redistribute it and/or modify *
7  * it under the terms of the GNU General Public License. See the file *
8  * COPYRIGHT for more information. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License along *
16  * with this program; if not, write to the Free Software Foundation, Inc., *
17  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
18  * *
19  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

20 package JFlex;
21
22 import java.util.*;
23 import java.io.*;
24
25
26 /**
27  * NFA representation in JFlex.
28  *
29  * Contains algorithms RegExp -> NFA and NFA -> DFA.
30  *
31  * @author Gerwin Klein
32  * @version JFlex 1.4.1, $Revision: 2.8 $, $Date: 2004/11/06 23:03:32 $
33  */

34 final public class NFA {
35
36   // table[current_state][next_char] is the set of states that can be reached
37
// from current_state with an input next_char
38
StateSet [][] table;
39
40   // epsilon[current_state] is the set of states that can be reached
41
// from current_state via epsilon-edges
42
StateSet [] epsilon;
43
44   // isFinal[state] == true <=> state is a final state of the NFA
45
boolean [] isFinal;
46
47   // isPushback[state] == true <=> state is the final state of a regexp that
48
// should only be matched when followed by a certain lookahead.
49
boolean [] isPushback;
50
51   // action[current_state]: the action associated with the state
52
// current_state (null, if there is no action for the state)
53
Action [] action;
54
55   // the number of states in this NFA
56
int numStates;
57
58   // the current maximum number of input characters
59
int numInput;
60
61   // the number of lexical States. Lexical states have the indices
62
// 0..numLexStates-1 in the transition table
63
int numLexStates;
64
65   // estimated size of the NFA (before actual construction)
66
int estSize = 256;
67   
68   Macros macros;
69   CharClasses classes;
70
71   LexScan scanner;
72   RegExps regExps;
73
74   // will be reused by several methods (avoids excessive object creation)
75
private static StateSetEnumerator states = new StateSetEnumerator();
76   private static StateSet tempStateSet = new StateSet();
77   
78   public NFA(int numInput, int estSize) {
79     this.numInput = numInput;
80     this.estSize = estSize;
81     numStates = 0;
82     epsilon = new StateSet [estSize];
83     action = new Action [estSize];
84     isFinal = new boolean [estSize];
85     isPushback = new boolean [estSize];
86     table = new StateSet [estSize][numInput];
87   }
88
89   public NFA(int numInput, LexScan scanner, RegExps regExps,
90              Macros macros, CharClasses classes) {
91     this(numInput, regExps.NFASize(macros)+2*scanner.states.number());
92
93     this.scanner = scanner;
94     this.regExps = regExps;
95     this.macros = macros;
96     this.classes = classes;
97     
98     numLexStates = scanner.states.number();
99     
100     ensureCapacity(2*numLexStates);
101     
102     numStates = 2*numLexStates;
103   }
104   
105   public void addStandaloneRule() {
106     // standalone rule has least priority, fires
107
// transition on all characters and has "print it rule"
108
int start = numStates;
109     int end = numStates+1;
110
111     for (int c = 0; c < classes.getNumClasses(); c++)
112       addTransition(start, c, end);
113    
114     for (int i = 0; i < numLexStates*2; i++)
115       addEpsilonTransition(i, start);
116
117     action[end] = new Action("System.out.print(yytext());", Integer.MAX_VALUE);
118     isFinal[end] = true;
119   }
120
121   
122   public void addRegExp(int regExpNum) {
123
124     if (Options.DEBUG)
125       Out.debug("Adding nfa for regexp "+regExpNum+" :"+Out.NL+regExps.getRegExp(regExpNum));
126     
127     IntPair nfa = insertNFA( regExps.getRegExp(regExpNum) );
128     
129     Enumeration lexStates = regExps.getStates(regExpNum).elements();
130     
131     if ( !lexStates.hasMoreElements() )
132       lexStates = scanner.states.getInclusiveStates();
133
134     while ( lexStates.hasMoreElements() ) {
135       int stateNum = ((Integer JavaDoc)lexStates.nextElement()).intValue();
136         
137       if ( !regExps.isBOL(regExpNum) )
138         addEpsilonTransition(2*stateNum, nfa.start);
139       
140       addEpsilonTransition(2*stateNum+1, nfa.start);
141     }
142         
143         
144     if ( regExps.getLookAhead(regExpNum) != null ) {
145       IntPair look = insertNFA( regExps.getLookAhead(regExpNum) );
146       
147       addEpsilonTransition(nfa.end, look.start);
148
149       Action a = regExps.getAction(regExpNum);
150       a.setLookAction(true);
151
152       isPushback[nfa.end] = true;
153       action[look.end] = a;
154       isFinal[look.end] = true;
155     }
156     else {
157       action[nfa.end] = regExps.getAction(regExpNum);
158       isFinal[nfa.end] = true;
159     }
160   }
161
162
163   private void ensureCapacity(int newNumStates) {
164     int oldLength = epsilon.length;
165     
166     if ( newNumStates < oldLength ) return;
167       
168     int newStatesLength = Math.max(oldLength*2, newNumStates);
169
170     boolean [] newFinal = new boolean [newStatesLength];
171     boolean [] newIsPush = new boolean [newStatesLength];
172     Action [] newAction = new Action [newStatesLength];
173     StateSet [] [] newTable = new StateSet [newStatesLength] [numInput];
174     StateSet [] newEpsilon = new StateSet [newStatesLength];
175
176     System.arraycopy(isFinal,0,newFinal,0,numStates);
177     System.arraycopy(isPushback,0,newIsPush,0,numStates);
178     System.arraycopy(action,0,newAction,0,numStates);
179     System.arraycopy(epsilon,0,newEpsilon,0,numStates);
180     System.arraycopy(table,0,newTable,0,numStates);
181
182     isFinal = newFinal;
183     isPushback = newIsPush;
184     action = newAction;
185     epsilon = newEpsilon;
186     table = newTable;
187   }
188   
189   public void addTransition(int start, int input, int dest) {
190     Out.debug("Adding transition ("+start+", "+input+", "+dest+")");
191     
192     int maxS = Math.max(start,dest)+1;
193  
194     ensureCapacity( maxS );
195
196     if (maxS > numStates) numStates = maxS;
197
198     if ( table[start][input] != null )
199       table[start][input].addState(dest);
200     else
201       table[start][input] = new StateSet(estSize,dest);
202   }
203
204   public void addEpsilonTransition(int start, int dest) {
205     int max = Math.max(start,dest)+1;
206     ensureCapacity( max );
207     if (max > numStates) numStates = max;
208
209     if (epsilon[start] != null)
210       epsilon[start].addState(dest);
211     else
212       epsilon[start] = new StateSet(estSize,dest);
213   }
214
215
216   /**
217    * Returns <code>true</code>, iff the specified set of states
218    * contains a final state.
219    *
220    * @param set the set of states that is tested for final states.
221    */

222   private boolean containsFinal(StateSet set) {
223     states.reset(set);
224
225     while ( states.hasMoreElements() )
226       if ( isFinal[states.nextElement()] ) return true;
227
228     return false;
229   }
230
231
232   /**
233    * Returns <code>true</code>, iff the specified set of states
234    * contains a pushback-state.
235    *
236    * @param set the set of states that is tested for pushback-states.
237    */

238   private boolean containsPushback(StateSet set) {
239     states.reset(set);
240
241     while ( states.hasMoreElements() )
242       if ( isPushback[states.nextElement()] ) return true;
243
244     return false;
245   }
246
247
248   /**
249    * Returns the action with highest priority in the specified
250    * set of states.
251    *
252    * @param set the set of states for which to determine the action
253    */

254   private Action getAction(StateSet set) {
255
256     states.reset(set);
257     
258     Action maxAction = null;
259
260     Out.debug("Determining action of : "+set);
261
262     while ( states.hasMoreElements() ) {
263
264       Action currentAction = action[ states.nextElement() ];
265       
266       if ( currentAction != null ) {
267           if (maxAction == null)
268             maxAction = currentAction;
269           else
270             maxAction = maxAction.getHigherPriority(currentAction);
271       }
272
273     }
274
275     return maxAction;
276   }
277
278
279   /**
280    * Calculates the epsilon closure for a specified set of states.
281    *
282    * The epsilon closure for set a is the set of states that can be reached
283    * by epsilon edges from a.
284    *
285    * @param set the set of states to calculate the epsilon closure for
286    *
287    * @return the epsilon closure of the specified set of states
288    * in this NFA
289    */

290   private StateSet closure(int startState) {
291
292     // Out.debug("Calculating closure of "+set);
293

294     StateSet notvisited = tempStateSet;
295     StateSet closure = new StateSet(numStates,startState);
296
297     notvisited.clear();
298     notvisited.addState(startState);
299
300     while ( notvisited.containsElements() ) {
301       // Out.debug("closure is now "+closure);
302
// Out.debug("notvisited is "+notvisited);
303
int state = notvisited.getAndRemoveElement();
304       // Out.debug("removed element "+state+" of "+notvisited);
305
// Out.debug("epsilon[states] = "+epsilon[state]);
306
notvisited.add(closure.complement(epsilon[state]));
307       closure.add(epsilon[state]);
308     }
309
310     // Out.debug("Closure is : "+closure);
311

312     return closure;
313   }
314
315   /**
316    * Returns the epsilon closure of a set of states
317    */

318   private StateSet closure(StateSet startStates) {
319     StateSet result = new StateSet(numStates);
320
321     if (startStates != null) {
322       states.reset(startStates);
323       while (states.hasMoreElements())
324         result.add( closure(states.nextElement()) );
325     }
326       
327     return result;
328   }
329   
330
331   private void epsilonFill() {
332     for (int i = 0; i < numStates; i++) {
333       epsilon[i] = closure(i);
334     }
335   }
336
337   /**
338    * Calculates the set of states that can be reached from another
339    * set of states <code>start</code> with an specified input
340    * character <code>input</code>
341    *
342    * @param start the set of states to start from
343    * @param input the input character for which to search the next states
344    *
345    * @return the set of states that are reached from <code>start</code>
346    * via <code>input</code>
347    */

348   private StateSet DFAEdge(StateSet start, char input) {
349     // Out.debug("Calculating DFAEdge for state set "+start+" and input '"+input+"'");
350

351     tempStateSet.clear();
352
353     states.reset(start);
354     while ( states.hasMoreElements() )
355       tempStateSet.add( table[states.nextElement()][input] );
356
357     StateSet result = new StateSet(tempStateSet);
358     
359     states.reset(tempStateSet);
360     while ( states.hasMoreElements() )
361       result.add( epsilon[states.nextElement()] );
362     
363     // Out.debug("DFAEdge is : "+result);
364

365     return result;
366   }
367
368   
369   /**
370    * Returns an DFA that accepts the same language as this NFA.
371    * This DFA is usualy not minimal.
372    */

373   public DFA getDFA() {
374
375     Hashtable dfaStates = new Hashtable(numStates);
376     Vector dfaVector = new Vector(numStates);
377
378     DFA dfa = new DFA(2*numLexStates, numInput);
379
380     int numDFAStates = 0;
381     int currentDFAState = 0;
382
383     Out.println("Converting NFA to DFA : ");
384
385     epsilonFill();
386
387     StateSet currentState, newState;
388     
389     for ( int i = 0; i < 2*numLexStates; i++ ) {
390       newState = epsilon[i];
391   
392       dfaStates.put(newState, new Integer JavaDoc(numDFAStates));
393       dfaVector.addElement(newState);
394   
395       dfa.setLexState( i, numDFAStates );
396         
397       dfa.setFinal( numDFAStates, containsFinal(newState) );
398       dfa.setPushback( numDFAStates, containsPushback(newState) );
399       dfa.setAction( numDFAStates, getAction(newState) );
400
401       numDFAStates++;
402     }
403      
404     numDFAStates--;
405
406     if (Options.DEBUG)
407       Out.debug("DFA start states are :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector);
408      
409     currentDFAState = 0;
410       
411     StateSet tempStateSet = NFA.tempStateSet;
412     StateSetEnumerator states = NFA.states;
413
414     // will be reused
415
newState = new StateSet(numStates);
416
417     while ( currentDFAState <= numDFAStates ) {
418
419       currentState = (StateSet) dfaVector.elementAt(currentDFAState);
420
421       for (char input = 0; input < numInput; input++) {
422
423           // newState = DFAEdge(currentState, input);
424

425         // inlining DFAEdge for performance:
426

427         // Out.debug("Calculating DFAEdge for state set "+currentState+" and input '"+input+"'");
428

429         tempStateSet.clear();
430         states.reset(currentState);
431         while ( states.hasMoreElements() )
432           tempStateSet.add( table[states.nextElement()][input] );
433         
434         newState.copy(tempStateSet);
435         
436         states.reset(tempStateSet);
437         while ( states.hasMoreElements() )
438           newState.add( epsilon[states.nextElement()] );
439     
440         // Out.debug("DFAEdge is : "+newState);
441

442
443           if ( newState.containsElements() ) {
444
445           // Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState);
446

447             // Out.debug("Looking for state set "+newState);
448
Integer JavaDoc nextDFAState = (Integer JavaDoc) dfaStates.get(newState);
449
450             if ( nextDFAState != null ) {
451               // Out.debug("FOUND!");
452
dfa.addTransition(currentDFAState, input, nextDFAState.intValue());
453             }
454             else {
455             if (Options.progress) Out.print(".");
456               // Out.debug("NOT FOUND!");
457
// Out.debug("Table was "+dfaStates);
458
numDFAStates++;
459
460             // make a new copy of newState to store in dfaStates
461
StateSet storeState = new StateSet(newState);
462
463               dfaStates.put(storeState, new Integer JavaDoc(numDFAStates));
464               dfaVector.addElement(storeState);
465         
466               dfa.addTransition(currentDFAState, input, numDFAStates);
467               dfa.setFinal( numDFAStates, containsFinal(storeState) );
468             dfa.setPushback( numDFAStates, containsPushback(storeState) );
469               dfa.setAction( numDFAStates, getAction(storeState) );
470             }
471           }
472       }
473       
474       currentDFAState++;
475     }
476     
477     if (Options.verbose) Out.println("");
478
479     return dfa;
480   }
481
482
483   public void dumpTable() {
484     Out.dump(toString());
485   }
486
487   public String JavaDoc toString() {
488     StringBuffer JavaDoc result = new StringBuffer JavaDoc();
489
490     for (int i=0; i < numStates; i++) {
491       result.append("State");
492       if ( isFinal[i] ) result.append("[FINAL]");
493       if ( isPushback[i] ) result.append(" [PUSHBACK]");
494       result.append(" "+i+Out.NL);
495       
496       for (char input = 0; input < numInput; input++) {
497           if ( table[i][input] != null && table[i][input].containsElements() )
498             result.append(" with "+((int) input)+" in "+table[i][input]+Out.NL);
499           
500           
501         }
502
503       if ( epsilon[i] != null && epsilon[i].containsElements() )
504           result.append(" with epsilon in "+epsilon[i]+Out.NL);
505     }
506     
507     return result.toString();
508   }
509
510   public void writeDot(File file) {
511     try {
512       PrintWriter writer = new PrintWriter(new FileWriter(file));
513       writer.println(dotFormat());
514       writer.close();
515     }
516     catch (IOException e) {
517       Out.error(ErrorMessages.FILE_WRITE, file);
518       throw new GeneratorException();
519     }
520   }
521
522   public String JavaDoc dotFormat() {
523     StringBuffer JavaDoc result = new StringBuffer JavaDoc();
524
525     result.append("digraph NFA {"+Out.NL);
526     result.append("rankdir = LR"+Out.NL);
527
528     for (int i=0; i < numStates; i++) {
529       if ( isFinal[i] || isPushback[i] ) result.append(i);
530       if ( isFinal[i] ) result.append(" [shape = doublecircle]");
531       if ( isPushback[i] ) result.append(" [shape = box]");
532       if ( isFinal[i] || isPushback[i] ) result.append(Out.NL);
533     }
534
535     for (int i=0; i < numStates; i++) {
536       for (int input = 0; input < numInput; input++) {
537           if ( table[i][input] != null ) {
538           StateSetEnumerator states = table[i][input].states();
539         
540           while (states.hasMoreElements()) {
541             int s = states.nextElement();
542             result.append(i+" -> "+s);
543             result.append(" [label=\""+classes.toString(input)+"\"]"+Out.NL);
544           }
545         }
546       }
547       if ( epsilon[i] != null ) {
548         StateSetEnumerator states = epsilon[i].states();
549         while (states.hasMoreElements()) {
550           int s = states.nextElement();
551           result.append(i+" -> "+s+" [style=dotted]"+Out.NL);
552         }
553       }
554     }
555
556     result.append("}"+Out.NL);
557
558     return result.toString();
559   }
560
561
562   //-----------------------------------------------------------------------
563
// Functions for constructing NFAs out of regular expressions.
564

565   private void insertLetterNFA(boolean caseless, char letter, int start, int end) {
566     if (caseless) {
567       int lower = classes.getClassCode(Character.toLowerCase(letter));
568       int upper = classes.getClassCode(Character.toUpperCase(letter));
569       addTransition(start, lower, end);
570       if (upper != lower) addTransition(start, upper, end);
571     }
572     else {
573       addTransition(start, classes.getClassCode(letter), end);
574     }
575   }
576   
577   private IntPair insertStringNFA(boolean caseless, String JavaDoc letters) {
578     int start = numStates;
579     int i;
580
581     for (i = 0; i < letters.length(); i++) {
582       if (caseless) {
583         char c = letters.charAt(i);
584         int lower = classes.getClassCode(Character.toLowerCase(c));
585         int upper = classes.getClassCode(Character.toUpperCase(c));
586         addTransition(i+start, lower, i+start+1);
587         if (upper != lower) addTransition(i+start, upper, i+start+1);
588       }
589       else {
590         addTransition(i+start, classes.getClassCode(letters.charAt(i)), i+start+1);
591       }
592     }
593
594     return new IntPair(start, i+start);
595   }
596   
597
598   private void insertClassNFA(Vector intervalls, int start, int end) {
599     // empty char class is ok:
600
if (intervalls == null) return;
601
602     int [] cl = classes.getClassCodes(intervalls);
603     for (int i = 0; i < cl.length; i++)
604       addTransition(start, cl[i], end);
605   }
606
607   private void insertNotClassNFA(Vector intervalls, int start, int end) {
608     int [] cl = classes.getNotClassCodes(intervalls);
609
610     for (int i = 0; i < cl.length; i++)
611       addTransition(start, cl[i], end);
612   }
613   
614
615   /**
616    * Constructs an NFA accepting the complement of the language
617    * of a given NFA.
618    *
619    * Converts the NFA into a DFA, then negates that DFA.
620    * Exponential state blowup possible and common.
621    *
622    * @param the NFA to construct the complement for.
623    *
624    * @return a pair of integers denoting the index of start
625    * and end state of the complement NFA.
626    */

627   private IntPair complement(IntPair nfa) {
628
629     if (Options.DEBUG) {
630       Out.debug("complement for "+nfa);
631       Out.debug("NFA is :"+Out.NL+this);
632     }
633
634     int dfaStart = nfa.end+1;
635     
636     // fixme: only need epsilon closure of states reachable from nfa.start
637
epsilonFill();
638     
639     Hashtable dfaStates = new Hashtable(numStates);
640     Vector dfaVector = new Vector(numStates);
641
642     int numDFAStates = 0;
643     int currentDFAState = 0;
644
645     StateSet currentState, newState;
646     
647     newState = epsilon[nfa.start];
648     dfaStates.put(newState, new Integer JavaDoc(numDFAStates));
649     dfaVector.addElement(newState);
650
651     if (Options.DEBUG)
652       Out.debug("pos DFA start state is :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector);
653      
654     currentDFAState = 0;
655       
656     while ( currentDFAState <= numDFAStates ) {
657
658       currentState = (StateSet) dfaVector.elementAt(currentDFAState);
659
660       for (char input = 0; input < numInput; input++) {
661           newState = DFAEdge(currentState, input);
662
663           if ( newState.containsElements() ) {
664
665           // Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState);
666

667             // Out.debug("Looking for state set "+newState);
668
Integer JavaDoc nextDFAState = (Integer JavaDoc) dfaStates.get(newState);
669
670             if ( nextDFAState != null ) {
671               // Out.debug("FOUND!");
672
addTransition(dfaStart+currentDFAState, input, dfaStart+nextDFAState.intValue());
673             }
674             else {
675             if (Options.dump) Out.print("+");
676               // Out.debug("NOT FOUND!");
677
// Out.debug("Table was "+dfaStates);
678
numDFAStates++;
679
680               dfaStates.put(newState, new Integer JavaDoc(numDFAStates));
681               dfaVector.addElement(newState);
682
683             addTransition(dfaStart+currentDFAState, input, dfaStart+numDFAStates);
684             }
685           }
686       }
687       
688       currentDFAState++;
689     }
690     
691     // We have a dfa accepting the positive regexp.
692

693     // Now the complement:
694
if (Options.DEBUG)
695       Out.debug("dfa finished, nfa is now :"+Out.NL+this);
696
697     int start = dfaStart+numDFAStates+1;
698     int error = dfaStart+numDFAStates+2;
699     int end = dfaStart+numDFAStates+3;
700
701     addEpsilonTransition(start, dfaStart);
702
703     for (int i = 0; i < numInput; i++)
704       addTransition(error, i, error);
705
706     addEpsilonTransition(error, end);
707
708     for (int s = 0; s <= numDFAStates; s++) {
709       currentState = (StateSet) dfaVector.elementAt(s);
710       
711       currentDFAState = dfaStart+s;
712
713       // if it was not a final state, it is now in the complement
714
if (!currentState.isElement(nfa.end))
715         addEpsilonTransition(currentDFAState, end);
716
717       // all inputs not present (formerly leading to an implicit error)
718
// now lead to an explicit (final) state accepting everything.
719
for (int i = 0; i < numInput; i++)
720         if (table[currentDFAState][i] == null)
721           addTransition(currentDFAState, i, error);
722     }
723
724     // eliminate transitions leading to dead states
725
if (live == null || live.length < numStates) {
726       live = new boolean [2*numStates];
727       visited = new boolean [2*numStates];
728     }
729
730     _end = end;
731     _dfaStates = dfaVector;
732     _dfaStart = dfaStart;
733     removeDead(dfaStart);
734
735     if (Options.DEBUG)
736       Out.debug("complement finished, nfa ("+start+","+end+") is now :"+this);
737
738     return new IntPair(start, end);
739   }
740
741   // "global" data for use in method removeDead only:
742
// live[s] == false <=> no final state can be reached from s
743
private boolean [] live; // = new boolean [estSize];
744
private boolean [] visited; // = new boolean [estSize];
745
private int _end; // final state of original nfa for dfa (nfa coordinates)
746
private Vector _dfaStates;
747   private int _dfaStart; // in nfa coordinates
748

749   private void removeDead(int start) {
750     // Out.debug("removeDead ("+start+")");
751

752     if ( visited[start] || live[start] ) return;
753     visited[start] = true;
754
755     // Out.debug("not yet visited");
756

757     if (closure(start).isElement(_end))
758       live[start] = true;
759
760     // Out.debug("is final :"+live[start]);
761

762     for (int i = 0; i < numInput; i++) {
763       StateSet nextState = closure(table[start][i]);
764       StateSetEnumerator states = nextState.states();
765       while (states.hasMoreElements()) {
766         int next = states.nextElement();
767         
768         if (next != start) {
769           removeDead(next);
770           
771           if (live[next])
772             live[start] = true;
773           else
774             table[start][i] = null;
775         }
776       }
777     }
778
779     StateSet nextState = closure(epsilon[start]);
780     StateSetEnumerator states = nextState.states();
781     while (states.hasMoreElements()) {
782       int next = states.nextElement();
783       
784       if (next != start) {
785         removeDead(next);
786         
787         if (live[next])
788           live[start] = true;
789       }
790     }
791     
792     // Out.debug("state "+start+" is live :"+live[start]);
793
}
794
795
796   /**
797    * Constructs a two state NFA for char class regexps,
798    * such that the NFA has
799    *
800    * exactly one start state,
801    * exactly one end state,
802    * no transitions leading out of the end state
803    * no transitions leading into the start state
804    *
805    * Assumes that regExp.isCharClass(macros) == true
806    *
807    * @param regExp the regular expression to construct the
808    * NFA for
809    *
810    * @return a pair of integers denoting the index of start
811    * and end state of the NFA.
812    */

813   private void insertNFA(RegExp regExp, int start, int end) {
814     switch (regExp.type) {
815       
816     case sym.BAR:
817       RegExp2 r = (RegExp2) regExp;
818       insertNFA(r.r1, start, end);
819       insertNFA(r.r2, start, end);
820       return;
821             
822     case sym.CCLASS:
823       insertClassNFA( (Vector) ((RegExp1) regExp).content, start, end);
824       return;
825       
826     case sym.CCLASSNOT:
827       insertNotClassNFA( (Vector) ((RegExp1) regExp).content, start, end);
828       return;
829       
830     case sym.CHAR:
831       insertLetterNFA(
832         false, ((Character JavaDoc) ((RegExp1) regExp).content).charValue(),
833         start, end);
834       return;
835       
836     case sym.CHAR_I:
837       insertLetterNFA(
838        true, ((Character JavaDoc) ((RegExp1) regExp).content).charValue(),
839        start, end);
840       return;
841       
842     case sym.MACROUSE:
843       insertNFA(macros.getDefinition((String JavaDoc) ((RegExp1) regExp).content),
844                 start, end);
845       return;
846     }
847     
848     throw new Error JavaDoc("Unknown expression type "+regExp.type+" in NFA construction");
849   }
850
851
852   /**
853    * Constructs an NFA for regExp such that the NFA has
854    *
855    * exactly one start state,
856    * exactly one end state,
857    * no transitions leading out of the end state
858    * no transitions leading into the start state
859    *
860    * @param regExp the regular expression to construct the
861    * NFA for
862    *
863    * @return a pair of integers denoting the index of start
864    * and end state of the NFA.
865    */

866   public IntPair insertNFA(RegExp regExp) {
867     
868     IntPair nfa1, nfa2;
869     int start, end;
870     RegExp2 r;
871     
872     if (Options.DEBUG)
873       Out.debug("Inserting RegExp : "+regExp);
874     
875     if (regExp.isCharClass(macros)) {
876       start = numStates;
877       end = numStates+1;
878
879       ensureCapacity(end+1);
880       if (end+1 > numStates) numStates = end+1;
881       
882       insertNFA(regExp, start, end);
883
884       return new IntPair(start, end);
885     }
886     
887     switch (regExp.type) {
888       
889     case sym.BAR:
890       
891       r = (RegExp2) regExp;
892       
893       nfa1 = insertNFA(r.r1);
894       nfa2 = insertNFA(r.r2);
895       
896       start = nfa2.end+1;
897       end = nfa2.end+2;
898       
899       addEpsilonTransition(start, nfa1.start);
900       addEpsilonTransition(start, nfa2.start);
901       addEpsilonTransition(nfa1.end, end);
902       addEpsilonTransition(nfa2.end, end);
903       
904       return new IntPair(start, end);
905       
906     case sym.CONCAT:
907         
908       r = (RegExp2) regExp;
909         
910       nfa1 = insertNFA(r.r1);
911       nfa2 = insertNFA(r.r2);
912       
913       addEpsilonTransition(nfa1.end, nfa2.start);
914       
915       return new IntPair(nfa1.start, nfa2.end);
916       
917     case sym.STAR:
918       nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
919       
920       start = nfa1.end+1;
921       end = nfa1.end+2;
922       
923       addEpsilonTransition(nfa1.end, end);
924       addEpsilonTransition(start, nfa1.start);
925       
926       addEpsilonTransition(start, end);
927       addEpsilonTransition(nfa1.end, nfa1.start);
928       
929       return new IntPair(start, end);
930       
931     case sym.PLUS:
932       nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
933       
934       start = nfa1.end+1;
935       end = nfa1.end+2;
936       
937       addEpsilonTransition(nfa1.end, end);
938       addEpsilonTransition(start, nfa1.start);
939       
940       addEpsilonTransition(nfa1.end, nfa1.start);
941       
942       return new IntPair(start, end);
943       
944     case sym.QUESTION:
945       nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
946       
947       addEpsilonTransition(nfa1.start, nfa1.end);
948       
949       return new IntPair(nfa1.start, nfa1.end);
950       
951     case sym.BANG:
952       return complement(insertNFA((RegExp) ((RegExp1) regExp).content));
953
954     case sym.TILDE:
955       nfa1 = insertNFA((RegExp) ((RegExp1) regExp).content);
956                  
957       start = nfa1.end+1;
958       int s1 = start+1;
959       int s2 = s1+1;
960       end = s2+1;
961
962       for (int i = 0; i < numInput; i++) {
963         addTransition(s1,i,s1);
964         addTransition(s2,i,s2);
965       }
966
967       addEpsilonTransition(start, s1);
968       addEpsilonTransition(s1, nfa1.start);
969       addEpsilonTransition(nfa1.end, s2);
970       addEpsilonTransition(s2, end);
971
972       nfa1 = complement(new IntPair(start,end));
973       nfa2 = insertNFA((RegExp) ((RegExp1) regExp).content);
974       
975       addEpsilonTransition(nfa1.end, nfa2.start);
976
977       return new IntPair(nfa1.start, nfa2.end);
978       
979     case sym.STRING:
980       return insertStringNFA(false, (String JavaDoc) ((RegExp1) regExp).content );
981
982     case sym.STRING_I:
983       return insertStringNFA(true, (String JavaDoc) ((RegExp1) regExp).content );
984       
985     case sym.MACROUSE:
986       return insertNFA(macros.getDefinition((String JavaDoc) ((RegExp1) regExp).content));
987     }
988     
989     throw new Error JavaDoc("Unknown expression type "+regExp.type+" in NFA construction");
990   }
991 }
992
Popular Tags