KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sourceforge > chaperon > build > PatternAutomatonBuilder


1 /*
2  * Copyright (C) Chaperon. All rights reserved.
3  * -------------------------------------------------------------------------
4  * This software is published under the terms of the Apache Software License
5  * version 1.1, a copy of which has been included with this distribution in
6  * the LICENSE file.
7  */

8
9 package net.sourceforge.chaperon.build;
10
11 import net.sourceforge.chaperon.model.Violations;
12 import net.sourceforge.chaperon.model.pattern.Alternation;
13 import net.sourceforge.chaperon.model.pattern.BeginOfLine;
14 import net.sourceforge.chaperon.model.pattern.CharacterClass;
15 import net.sourceforge.chaperon.model.pattern.CharacterInterval;
16 import net.sourceforge.chaperon.model.pattern.CharacterSet;
17 import net.sourceforge.chaperon.model.pattern.CharacterString;
18 import net.sourceforge.chaperon.model.pattern.Concatenation;
19 import net.sourceforge.chaperon.model.pattern.EndOfLine;
20 import net.sourceforge.chaperon.model.pattern.Pattern;
21 import net.sourceforge.chaperon.model.pattern.PatternGroup;
22 import net.sourceforge.chaperon.model.pattern.UniversalCharacter;
23 import net.sourceforge.chaperon.process.PatternAutomaton;
24
25 /**
26  * This class represents a builder for the pattern automata.
27  *
28  * @author <a HREF="mailto:stephan@apache.org">Stephan Michels</a>
29  * @version CVS $Id: PatternAutomatonBuilder.java,v 1.7 2003/12/09 19:55:52 benedikta Exp $
30  */

31 public class PatternAutomatonBuilder
32 {
33   private Pattern pattern = null;
34   private PatternAutomaton automaton = null;
35   private Violations violations = new Violations();
36   private int statecount = 0;
37   private int stateindex = 0;
38   private int groupcount = 0;
39   private int groupindex = 0;
40
41   /**
42    * Create a builder for the pattern automata.
43    *
44    * @param pattern Pattern, which should be used to build the pattern automaton.
45    */

46   public PatternAutomatonBuilder(Pattern pattern)
47   {
48     violations.addViolations(pattern.validate());
49
50     if ((violations!=null) && (violations.getViolationCount()>0))
51       throw new IllegalArgumentException JavaDoc("Pattern is not valid: "+violations.getViolation(0));
52
53     this.pattern = pattern;
54
55     statecount = getStateCount(pattern)+3;
56     stateindex = statecount-1;
57
58     groupcount = getGroupCount(pattern);
59     groupindex = groupcount;
60
61     PatternAutomaton automaton = new PatternAutomaton(statecount);
62
63     automaton.setGroupCount(groupcount+1);
64
65     int finalstate = stateindex--;
66
67     automaton.setFinalState(finalstate);
68
69     automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPEND);
70     automaton.setGroupIndex(stateindex, 0);
71     automaton.setTransitions(stateindex, new int[]{finalstate});
72
73     int state = stateindex--;
74
75     state = traverse(automaton, pattern, state);
76
77     automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPSTART);
78     automaton.setGroupIndex(stateindex, 0);
79     automaton.setTransitions(stateindex, new int[]{state});
80
81     automaton.setFirstState(stateindex);
82
83     this.automaton = automaton;
84   }
85
86   /**
87    * Return the builded automaton. This method will return null, if an error occurs.
88    *
89    * @return Pattern automaton.
90    */

91   public PatternAutomaton getPatternAutomaton()
92   {
93     return automaton;
94   }
95
96   /**
97    * Calculates the count of group in the pattern.
98    *
99    * @param element Root pattern.
100    *
101    * @return Count of groups.
102    */

103   private int getGroupCount(Pattern element)
104   {
105     int groupcount = 0;
106
107     if (element instanceof Alternation)
108     {
109       Alternation alternation = (Alternation)element;
110
111       for (int i = 0; i<alternation.getPatternCount(); i++)
112         groupcount += getGroupCount(alternation.getPattern(i));
113     }
114     else if (element instanceof Concatenation)
115     {
116       Concatenation concatenation = (Concatenation)element;
117
118       for (int i = 0; i<concatenation.getPatternCount(); i++)
119         groupcount += getGroupCount(concatenation.getPattern(i));
120     }
121     else if (element instanceof PatternGroup)
122     {
123       groupcount++;
124
125       PatternGroup group = (PatternGroup)element;
126
127       for (int i = 0; i<group.getPatternCount(); i++)
128         groupcount += getGroupCount(group.getPattern(i));
129     }
130
131     return groupcount;
132   }
133
134   /**
135    * Calculates the count of states.
136    *
137    * @param element Root pattern.
138    *
139    * @return Count of states.
140    */

141   private int getStateCount(Pattern element)
142   {
143     int factor = 1;
144     int offset = 0;
145     int statecount = 0;
146
147     // generate closure for p
148
if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==1))
149     {
150       // nothing
151
}
152
153     // generate closure for p?
154
else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==1))
155       offset = 1;
156
157     // generate closure for p+
158
else if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==Integer.MAX_VALUE))
159       offset = 1;
160
161     // generate closure for p*
162
else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==Integer.MAX_VALUE))
163       offset = 2;
164
165     // generate closure for p{n,m}
166
else
167     {
168       factor = element.getMaxOccurs();
169       offset = 1;
170     }
171
172     if (element instanceof Alternation)
173     {
174       Alternation alternation = (Alternation)element;
175
176       for (int i = 0; i<alternation.getPatternCount(); i++)
177         statecount += getStateCount(alternation.getPattern(i));
178
179       if (alternation.getPatternCount()>1)
180         statecount++;
181     }
182     else if (element instanceof BeginOfLine)
183       statecount = 1;
184     else if (element instanceof CharacterClass)
185     {
186       CharacterClass characterclass = (CharacterClass)element;
187
188       for (int i = 0; i<characterclass.getCharacterClassElementCount(); i++)
189       {
190         if (characterclass.getCharacterClassElement(i) instanceof CharacterInterval)
191           statecount++;
192         else if (characterclass.getCharacterClassElement(i) instanceof CharacterSet)
193         {
194           CharacterSet set = (CharacterSet)characterclass.getCharacterClassElement(i);
195
196           statecount += set.getCharacters().length();
197         }
198       }
199
200       statecount++;
201     }
202     else if (element instanceof CharacterString)
203     {
204       CharacterString string = (CharacterString)element;
205
206       statecount += string.getString().length();
207     }
208     else if (element instanceof Concatenation)
209     {
210       Concatenation concatenation = (Concatenation)element;
211
212       for (int i = 0; i<concatenation.getPatternCount(); i++)
213         statecount += getStateCount(concatenation.getPattern(i));
214     }
215     else if (element instanceof EndOfLine)
216       statecount = 1;
217     else if (element instanceof PatternGroup)
218     {
219       statecount = 2;
220
221       PatternGroup group = (PatternGroup)element;
222
223       for (int i = 0; i<group.getPatternCount(); i++)
224         statecount += getStateCount(group.getPattern(i));
225     }
226     else if (element instanceof UniversalCharacter)
227       statecount = 1;
228     else
229       throw new IllegalArgumentException JavaDoc("Pattern element not recognized");
230
231     return (factor*statecount)+offset;
232   }
233
234   /*
235    * @param laststate First state of the following pattern
236    *
237    * @return First state of the current pattern
238    */

239
240   /**
241    * Build the automaton by traversing the pattern tree.
242    *
243    * @param automaton The current automaton.
244    * @param element The current pattern element.
245    * @param laststate Last used state in the automaton.
246    *
247    * @return New last state in the automaton.
248    */

249   private int traverse(PatternAutomaton automaton, Pattern element, int laststate)
250   {
251     int firststate;
252
253     // generate closure for p
254
if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==1))
255       firststate = evalPattern(automaton, element, laststate);
256
257     // generate closure for p?
258
else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==1))
259     {
260       int s1 = evalPattern(automaton, element, laststate);
261       automaton.setTransitions(stateindex, new int[]{s1, laststate});
262       firststate = stateindex--;
263     }
264
265     // generate closure for p+
266
else if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==Integer.MAX_VALUE))
267     {
268       int s1 = stateindex--;
269       firststate = evalPattern(automaton, element, s1);
270       automaton.setTransitions(s1, new int[]{firststate, laststate});
271     }
272
273     // generate closure for p*
274
else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==Integer.MAX_VALUE))
275     {
276       int s2 = stateindex--;
277       int s1 = evalPattern(automaton, element, s2);
278       automaton.setTransitions(s2, new int[]{s1, laststate});
279
280       firststate = stateindex--;
281       automaton.setTransitions(firststate, new int[]{s1, laststate});
282     }
283
284     // generate closure for p{n,m}
285
else
286     {
287       int s2 = laststate;
288       for (int i = 0; i<element.getMinOccurs(); i++)
289         s2 = evalPattern(automaton, element, s2);
290
291       int s1 = s2;
292
293       for (int i = element.getMinOccurs(); i<element.getMaxOccurs(); i++)
294       {
295         s1 = evalPattern(automaton, element, s1);
296         if (i>element.getMinOccurs())
297           automaton.addTransition(s1, s2);
298       }
299
300       firststate = stateindex--;
301       automaton.setTransitions(firststate, new int[]{s1, s2});
302     }
303
304     if (element instanceof PatternGroup)
305       groupindex--;
306
307     return firststate;
308   }
309
310   /**
311    * Evalutates a pattern element.
312    *
313    * @param automaton The current automaton.
314    * @param element The current pattern element.
315    * @param laststate Last used state in the automaton.
316    *
317    * @return New last state in the automaton.
318    */

319   private int evalPattern(PatternAutomaton automaton, Pattern element, int laststate)
320   {
321     if (element instanceof Alternation)
322       return evalAlternation(automaton, (Alternation)element, laststate);
323     else if (element instanceof BeginOfLine)
324       return evalBeginOfLine(automaton, (BeginOfLine)element, laststate);
325     else if (element instanceof CharacterClass)
326       return evalCharacterClass(automaton, (CharacterClass)element, laststate);
327     else if (element instanceof CharacterString)
328       return evalCharacterString(automaton, (CharacterString)element, laststate);
329     else if (element instanceof Concatenation)
330       return evalConcatenation(automaton, (Concatenation)element, laststate);
331     else if (element instanceof EndOfLine)
332       return evalEndOfLine(automaton, (EndOfLine)element, laststate);
333     else if (element instanceof PatternGroup)
334       return evalPatternGroup(automaton, (PatternGroup)element, laststate);
335     else if (element instanceof UniversalCharacter)
336       return evalUniversalCharacter(automaton, (UniversalCharacter)element, laststate);
337     else
338       throw new IllegalArgumentException JavaDoc("Pattern element not recognized");
339   }
340
341   /**
342    * Create the states and transitions for an alternation
343    *
344    * @param automaton The current automaton.
345    * @param element The current pattern element.
346    * @param laststate Last used state in the automaton.
347    *
348    * @return New last state in the automaton.
349    */

350   private int evalAlternation(PatternAutomaton automaton, Alternation element, int laststate)
351   {
352     if (element.getPatternCount()==1)
353       return traverse(automaton, element.getPattern(0), laststate);
354     else
355     {
356       int nextstate = stateindex--;
357       int state;
358
359       for (int i = element.getPatternCount()-1; i>=0; i--)
360       {
361         state = traverse(automaton, element.getPattern(i), laststate);
362         automaton.addTransition(nextstate, state);
363       }
364
365       return nextstate;
366     }
367   }
368
369   /**
370    * Create the states and transitions for a pattern that matches the begin of line.
371    *
372    * @param automaton The current automaton.
373    * @param element The current pattern element.
374    * @param laststate Last used state in the automaton.
375    *
376    * @return New last state in the automaton.
377    */

378   private int evalBeginOfLine(PatternAutomaton automaton, BeginOfLine element, int laststate)
379   {
380     automaton.setType(stateindex, PatternAutomaton.TYPE_BOL);
381     automaton.setTransitions(stateindex, new int[]{laststate});
382     return stateindex--;
383   }
384
385   /**
386    * Create the states and transition for a character class.
387    *
388    * @param automaton The current automaton.
389    * @param element The current pattern element.
390    * @param laststate Last used state in the automaton.
391    *
392    * @return New last state in the automaton.
393    */

394   private int evalCharacterClass(PatternAutomaton automaton, CharacterClass element, int laststate)
395   {
396     int state;
397
398     if (!element.isExclusive())
399     {
400       int firststate = stateindex--;
401
402       for (int i = 0; i<element.getCharacterClassElementCount(); i++)
403       {
404         if (element.getCharacterClassElement(i) instanceof CharacterInterval)
405         {
406           CharacterInterval interval = (CharacterInterval)element.getCharacterClassElement(i);
407
408           automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
409           automaton.setInterval(stateindex, interval.getMinimum(), interval.getMaximum());
410           automaton.addTransition(stateindex, laststate);
411           state = stateindex--;
412           automaton.addTransition(firststate, state);
413         }
414         else if (element.getCharacterClassElement(i) instanceof CharacterSet)
415         {
416           CharacterSet set = (CharacterSet)element.getCharacterClassElement(i);
417           String JavaDoc chars = set.getCharacters();
418
419           for (int j = 0; j<chars.length(); j++)
420           {
421             automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
422             automaton.setInterval(stateindex, chars.charAt(j), chars.charAt(j));
423             automaton.addTransition(stateindex, laststate);
424             state = stateindex--;
425             automaton.addTransition(firststate, state);
426           }
427         }
428       }
429
430       return firststate;
431     }
432     else
433     {
434       state = stateindex--;
435       automaton.setType(state, PatternAutomaton.TYPE_MATCHANY);
436       automaton.setTransitions(state, new int[]{laststate});
437       for (int i = element.getCharacterClassElementCount()-1; i>=0; i--)
438       {
439         if (element.getCharacterClassElement(i) instanceof CharacterInterval)
440         {
441           CharacterInterval interval = (CharacterInterval)element.getCharacterClassElement(i);
442
443           automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
444           automaton.setInterval(stateindex, interval.getMinimum(), interval.getMaximum());
445           automaton.setTransitions(stateindex, new int[]{state});
446           state = stateindex--;
447         }
448         else if (element.getCharacterClassElement(i) instanceof CharacterSet)
449         {
450           CharacterSet set = (CharacterSet)element.getCharacterClassElement(i);
451           String JavaDoc chars = set.getCharacters();
452
453           for (int j = 0; j<chars.length(); j++)
454           {
455             automaton.setType(stateindex, PatternAutomaton.TYPE_EXMATCH);
456             automaton.setInterval(stateindex, chars.charAt(j), chars.charAt(j));
457             automaton.setType(stateindex, PatternAutomaton.TYPE_EXMATCH);
458             automaton.setTransitions(stateindex, new int[]{state});
459             state = stateindex--;
460           }
461         }
462       }
463
464       return state;
465     }
466   }
467
468   /**
469    * Create the states and transitions for a string of characters.
470    *
471    * @param automaton The current automaton.
472    * @param element The current pattern element.
473    * @param laststate Last used state in the automaton.
474    *
475    * @return New last state in the automaton.
476    */

477   private int evalCharacterString(PatternAutomaton automaton, CharacterString element, int laststate)
478   {
479     int state = laststate;
480
481     for (int i = element.getString().length()-1; i>=0; i--)
482     {
483       automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
484       automaton.setInterval(stateindex, element.getString().charAt(i), element.getString().charAt(i));
485       automaton.setTransitions(stateindex, new int[]{state});
486       state = stateindex--;
487     }
488
489     return state;
490   }
491
492   /**
493    * Create the states and transitions for a catenation of pattern
494    *
495    * @param automaton The current automaton.
496    * @param element The current pattern element.
497    * @param laststate Last used state in the automaton.
498    *
499    * @return New last state in the automaton.
500    */

501   private int evalConcatenation(PatternAutomaton automaton, Concatenation element, int laststate)
502   {
503     int state = laststate;
504
505     for (int i = element.getPatternCount()-1; i>=0; i--)
506       state = traverse(automaton, element.getPattern(i), state);
507
508     return state;
509   }
510
511   /**
512    * Create the states and transitions for a pattern that matches the end of line.
513    *
514    * @param automaton The current automaton.
515    * @param element The current pattern element.
516    * @param laststate Last used state in the automaton.
517    *
518    * @return New last state in the automaton.
519    */

520   private int evalEndOfLine(PatternAutomaton automaton, EndOfLine element, int laststate)
521   {
522     automaton.setType(stateindex, PatternAutomaton.TYPE_EOL);
523     automaton.setTransitions(stateindex, new int[]{laststate});
524     return stateindex--;
525   }
526
527   /**
528    * Create the states and transitions for a pattern group.
529    *
530    * @param automaton The current automaton.
531    * @param element The current pattern element.
532    * @param laststate Last used state in the automaton.
533    *
534    * @return New last state in the automaton.
535    */

536   private int evalPatternGroup(PatternAutomaton automaton, PatternGroup element, int laststate)
537   {
538     int endstate = stateindex--;
539
540     automaton.setType(endstate, PatternAutomaton.TYPE_GROUPEND);
541     automaton.setGroupIndex(endstate, groupindex);
542     automaton.setTransitions(endstate, new int[]{laststate});
543
544     int nextstate = endstate;
545
546     for (int i = element.getPatternCount()-1; i>=0; i--)
547       nextstate = traverse(automaton, element.getPattern(i), nextstate);
548
549     automaton.setGroupIndex(endstate, groupindex);
550
551     automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPSTART);
552     automaton.setGroupIndex(stateindex, groupindex);
553     automaton.setTransitions(stateindex, new int[]{nextstate});
554     return stateindex--;
555   }
556
557   /**
558    * Create the states and transition for an universal character.
559    *
560    * @param automaton The current automaton.
561    * @param element The current pattern element.
562    * @param laststate Last used state in the automaton.
563    *
564    * @return New last state in the automaton.
565    */

566   private int evalUniversalCharacter(PatternAutomaton automaton, UniversalCharacter element,
567                                      int laststate)
568   {
569     automaton.setType(stateindex, PatternAutomaton.TYPE_MATCHANY);
570     automaton.setTransitions(stateindex, new int[]{laststate});
571     return stateindex--;
572   }
573 }
574
Popular Tags