KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > tc > jrexx > regex > Automaton_Pattern


1 /*
2 * 01/07/2003 - 15:19:32
3 *
4 * Automaton_Pattern.java -
5 * Copyright (C) 2003 Buero fuer Softwarearchitektur GbR
6 * ralf.meyer@karneim.com
7 * http://jrexx.sf.net
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2
12 * of the License, or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */

23 package com.tc.jrexx.regex;
24
25 import com.tc.jrexx.set.*;
26 import com.tc.jrexx.automaton.*;
27 import java.text.*;
28 import java.util.*;
29
30
31 public class Automaton_Pattern extends com.tc.jrexx.set.AutomatonSet_String {
32
33   protected static class PProperties extends SProperties {}
34
35   protected interface IPState extends ISState {
36
37   }
38
39   protected class PState extends AutomatonSet_String.SState implements IPState {
40     public PState(boolean isFinal) {
41       super(isFinal);
42     }
43
44     protected Transition addTransition(IProperties properties,ISet_char charSet,State toState) {
45       return super.addTransition(properties,charSet,toState);
46     }
47
48     protected boolean removeTransition(Transition trans) {
49       return super.removeTransition(trans);
50     }
51
52     protected void setFinal(boolean isFinal) {
53       super.setFinal(isFinal);
54     }
55
56     protected IState getEClosure() {
57       return super.getEClosure();
58     }
59
60   }
61
62   protected class LinkedSet_PState extends AutomatonSet_String.LinkedSet_SState implements IPState {
63
64     protected LinkedSet_PState() {
65       super();
66     }
67
68     protected LinkedSet_PState(PState state) {
69       super(state);
70     }
71
72   }
73
74   private Map preDefinedAutomatons = null;
75   protected String JavaDoc regEx = null;
76
77   protected Automaton_Pattern(ISet_char fullSet) {
78     super(fullSet);
79   }
80
81   protected Automaton_Pattern() {
82     super();
83     this.regEx= "";
84   }
85
86   protected Automaton_Pattern(String JavaDoc regEx) {
87     super();
88     this.regEx = "";
89     this.addAll(regEx);
90   }
91
92   protected Automaton.State getStartState() {
93     return super.getStartState();
94   }
95
96   protected State createState() {
97     return new PState(false);
98   }
99
100   protected SState createState(boolean isFinal) {
101     return new PState(isFinal);
102   }
103
104   protected LinkedSet_State newLinkedSet_State() {
105     return new LinkedSet_PState();
106   }
107
108   protected LinkedSet_State newLinkedSet_State(State state) {
109     return new LinkedSet_PState((PState)state);
110   }
111
112   protected void setStartState(SState state) {
113     super.setStartState(state);
114   }
115
116   protected SState addState(boolean isFinal) {
117     return super.addState(isFinal);
118   }
119
120   protected boolean removeState(PState removeState) {
121     return super.removeState(removeState);
122   }
123
124   protected void clear() {
125     super.clear();
126     this.regEx = "";
127   }
128
129   protected LinkedSet_State getStates() {
130     return super.getStates();
131   }
132
133   protected void minimize() {
134     super.minimize();
135   }
136
137   protected void removeUselessStates() {
138     super.removeUselessStates();
139   }
140
141   protected void addAll(SState state) {
142     super.addAll(state);
143   }
144
145   protected SState complement(SState state) {
146     return super.complement(state);
147   }
148
149   protected SState concat(SState state_A,SState state_B) {
150     return super.concat(state_A,state_B);
151   }
152
153   protected SState repeat(SState state,int minTimes,int maxTimes) {
154     //performance leak
155
if ((state instanceof PState)==false) throw new IllegalArgumentException JavaDoc("(state instanceof PState)==false");
156
157     return super.repeat(state,minTimes,maxTimes);
158   }
159   protected SState union(SState state_A,SState state_B) {
160     return super.union(state_A,state_B);
161   }
162
163   protected SState intersect(SState state_A,SState state_B) {
164     return super.intersect(state_A,state_B);
165   }
166 /*
167   protected PState minus(PState state_A,PState state_B) {
168     return (PState)super.minus(state_A,state_B);
169   }
170 */

171
172   protected void complement() {
173     super.complement();
174     if (this.regEx==null) return;
175     if (this.regEx=="") this.regEx = ".*";
176     else this.regEx = "!("+this.regEx+")";
177   }
178
179   protected void addAll(String JavaDoc regEx) {
180     if (this.regEx==null) return;
181     if (this.regEx=="") this.regEx = regEx;
182     else {
183       this.regEx = new StringBuffer JavaDoc(this.regEx.length()+regEx.length()+5)
184                  .append('(').append(this.regEx).append(')')
185                  .append('|')
186                  .append('(').append(regEx).append(')')
187                  .toString();
188     }
189
190     this.addAll(this.parseRegEx(regEx));
191     this.removeUselessStates();
192   }
193
194   protected void retainAll(String JavaDoc regEx) {
195     if (this.regEx==null) return;
196     if (this.regEx=="" || regEx=="") this.regEx = "";
197     else {
198       this.regEx = new StringBuffer JavaDoc(this.regEx.length()+regEx.length()+5)
199                  .append('(').append(this.regEx).append(')')
200                  .append('&')
201                  .append('(').append(regEx).append(')')
202                  .toString();
203     }
204
205     this.retainAll(this.parseRegEx(regEx));
206     this.removeUselessStates();
207   }
208
209   protected void removeAll(String JavaDoc regEx) {
210     if (this.regEx==null) return;
211     if (this.regEx=="") this.regEx = "";
212     else {
213       this.regEx = new StringBuffer JavaDoc(this.regEx.length()+regEx.length()+6)
214                  .append('(').append(this.regEx).append(')')
215                  .append("&!")
216                  .append('(').append(regEx).append(')')
217                  .toString();
218     }
219
220     this.removeAll(this.parseRegEx(regEx));
221     this.removeUselessStates();
222   }
223
224
225   protected boolean isDeterministic() {
226     return super.isDeterministic();
227   }
228
229   protected boolean isDeterministic(State startState) {
230     return super.isDeterministic(startState);
231   }
232
233   protected void addAll(AutomatonSet_String automaton) {
234     super.addAll(automaton);
235
236     Automaton_Pattern pAutomaton = (Automaton_Pattern)automaton;
237
238     if (this.regEx==null || pAutomaton.regEx==null) return;
239     if (this.regEx=="") this.regEx = pAutomaton.regEx;
240     else {
241       this.regEx = new StringBuffer JavaDoc(this.regEx.length()+pAutomaton.regEx.length()+5)
242                  .append('(').append(this.regEx).append(')')
243                  .append('|')
244                  .append('(').append(pAutomaton.regEx).append(')')
245                  .toString();
246     }
247   }
248
249   protected void retainAll(AutomatonSet_String automaton) {
250     super.retainAll(automaton);
251
252     Automaton_Pattern pAutomaton = (Automaton_Pattern)automaton;
253
254     if (this.regEx==null || pAutomaton.regEx==null) return;
255     if (this.regEx=="" || pAutomaton.regEx=="") this.regEx = "";
256     else {
257       this.regEx = new StringBuffer JavaDoc(this.regEx.length()+pAutomaton.regEx.length()+5)
258                  .append('(').append(this.regEx).append(')')
259                  .append('&')
260                  .append('(').append(pAutomaton.regEx).append(')')
261                  .toString();
262     }
263   }
264
265   protected void removeAll(AutomatonSet_String automaton) {
266     super.removeAll(automaton);
267
268     Automaton_Pattern pAutomaton = (Automaton_Pattern)automaton;
269
270     if (this.regEx==null || pAutomaton.regEx==null) return;
271     if (this.regEx=="") this.regEx = "";
272     else if (pAutomaton.regEx!="") {
273       this.regEx = new StringBuffer JavaDoc(this.regEx.length()+pAutomaton.regEx.length()+6)
274                  .append('(').append(this.regEx).append(')')
275                  .append("&!")
276                  .append('(').append(pAutomaton.regEx).append(')')
277                  .toString();
278     }
279   }
280
281
282
283   protected Object JavaDoc clone() {
284     final Automaton_Pattern clone = (Automaton_Pattern)super.clone();
285     clone.scanner = clone.newScanner();
286     return clone;
287   }
288
289
290   //////////////////////////////////////////////////////////////////////////
291
///// P A R S I N G
292
//////////////////////////////////////////////////////////////////////////
293

294       private static final int ERROR = -2, // the possible parser
295
SHIFT = -3, // actions used in ACTIONTABLE
296
REDUCE = -4, // value -1 is reserved
297
ACCEPT = -5, // for unknown constant value
298

299                               RE = 0, // NonTerminal Symbols
300
TERM = 1, // IMPORTANT: the value represents the
301
ELEMENT = 2, // rowNr in ACTIONTABLE
302

303                               notOp = 3, // Terminal Symbols
304
andOp = 4, // .
305
orOp = 5, // .
306
groupBegin = 6, // .
307
groupEnd = 7, // .
308
repetition = 8, // .
309
label = 9, // .
310
regExp = 10, // IMPORTANT: the value represents the
311
EOF = 11; // rowNr in ACTIONTABLE
312

313       private static final int[][][] ACTIONTABLE = {
314       // state RE TERM ELEMENT notOp andOp orOp groupBegin groupEnd repetition label regExp EOF
315
/* 0 */ {{SHIFT,2},{SHIFT,7},{SHIFT,5},{SHIFT,11},{ERROR, 0},{ERROR, 0},{SHIFT,14},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,16},{ERROR, 0}},
316       /* 1 */ {{ERROR,0},{ERROR,0},{ERROR,0},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3}},
317       /* 2 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ACCEPT,0}},
318       /* 3 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{REDUCE,1},{ERROR, 0},{ERROR, 0},{ERROR, 0},{REDUCE,1}},
319       /* 4 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,13},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0}},
320       /* 5 */ {{ERROR,0},{SHIFT,8},{SHIFT,5},{SHIFT,11},{SHIFT,10},{REDUCE,6},{SHIFT,14},{REDUCE,6},{SHIFT, 1},{SHIFT,12},{SHIFT,16},{REDUCE,6}},
321       /* 6 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{REDUCE,9},{ERROR, 0},{REDUCE,9},{SHIFT, 1},{SHIFT,12},{ERROR, 0},{REDUCE,9}},
322       /* 7 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{SHIFT,15},{ERROR, 0},{REDUCE,0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{REDUCE,0}},
323       /* 8 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{REDUCE,7},{ERROR, 0},{REDUCE,7},{ERROR, 0},{ERROR, 0},{ERROR, 0},{REDUCE,7}},
324       /* 9 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{REDUCE,8},{ERROR, 0},{REDUCE,8},{ERROR, 0},{ERROR, 0},{ERROR, 0},{REDUCE,8}},
325       /* 10 */ {{ERROR,0},{SHIFT,9},{SHIFT,5},{SHIFT,11},{ERROR, 0},{ERROR, 0},{SHIFT,14},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,16},{ERROR, 0}},
326       /* 11 */ {{ERROR,0},{ERROR,0},{SHIFT,6},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,14},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,16},{ERROR, 0}},
327       /* 12 */ {{ERROR,0},{ERROR,0},{ERROR,0},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4}},
328       /* 13 */ {{ERROR,0},{ERROR,0},{ERROR,0},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2}},
329       /* 14 */ {{SHIFT,4},{SHIFT,7},{SHIFT,5},{SHIFT,11},{ERROR, 0},{ERROR, 0},{SHIFT,14},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,16},{ERROR, 0}},
330       /* 15 */ {{SHIFT,3},{SHIFT,7},{SHIFT,5},{SHIFT,11},{ERROR, 0},{ERROR, 0},{SHIFT,14},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,16},{ERROR, 0}},
331       /* 16 */ {{ERROR,0},{ERROR,0},{ERROR,0},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5}}
332     };
333
334
335       // the number after a SHIFT action is the next state to go to (see case SHIFT)
336
// the number after a REDUCE action is the number of a rule (see case REDUCE)
337

338
339     private static final Integer JavaDoc[] INTEGERS = new Integer JavaDoc[ACTIONTABLE.length];
340     static {
341       for (int i=0; i<INTEGERS.length; i++) INTEGERS[i] = new Integer JavaDoc(i);
342     }
343
344   protected SState parseRegEx(String JavaDoc regEx) throws InvalidExpression {
345     final java.util.List JavaDoc tokenList = this.scanner.scan( regEx );
346
347     final Object JavaDoc[] extdTokenList = tokenList.toArray(new Object JavaDoc[tokenList.size()+1]);
348     extdTokenList[extdTokenList.length-1] = Terminal_EOF.INSTANCE;
349
350     java.util.Stack JavaDoc symbolStack = new java.util.Stack JavaDoc();
351     java.util.Stack JavaDoc stateStack = new java.util.Stack JavaDoc();
352
353     int extdTokenListIndex = 0;
354     Object JavaDoc token = extdTokenList[extdTokenListIndex];
355
356     int stateNr = 0, tokenSymbol=-1, action = Automaton_Pattern.ERROR;
357     do {
358       if (tokenSymbol==-1) {
359         if (token instanceof SState) tokenSymbol = Automaton_Pattern.regExp;
360         else if (token instanceof Terminal_Repetition) tokenSymbol = Automaton_Pattern.repetition;
361         else if (token instanceof Terminal_GroupBegin) tokenSymbol = Automaton_Pattern.groupBegin;
362         else if (token instanceof Terminal_GroupEnd) tokenSymbol = Automaton_Pattern.groupEnd;
363         else if (token instanceof String JavaDoc) tokenSymbol = Automaton_Pattern.label;
364         else if (token instanceof Terminal_OrOp) tokenSymbol = Automaton_Pattern.orOp;
365         else if (token instanceof Terminal_RegExp) tokenSymbol = Automaton_Pattern.regExp;
366         else if (token instanceof Terminal_AndOp) tokenSymbol = Automaton_Pattern.andOp;
367         else if (token instanceof Terminal_NotOp) tokenSymbol = Automaton_Pattern.notOp;
368         else if (token instanceof Terminal_EOF) tokenSymbol = Automaton_Pattern.EOF;
369         else {
370           String JavaDoc message = "Unknown symbol/token: " + token;
371           message+= "\n(check Parser or Scanner for this symbol/token)";
372           throw new RuntimeException JavaDoc(message);
373         }
374       }
375
376 //System.out.println("$ "+symbolStack);
377
//System.out.print("+ "+stateNr+","+tokenSymbol+" -> ");
378
action = Automaton_Pattern.ACTIONTABLE[stateNr][tokenSymbol][0];
379
380
381     PState finalState,aState;
382
383     switch (action) {
384       case Automaton_Pattern.SHIFT :
385 //System.out.println("SHIFT "+ACTIONTABLE[stateNr][tokenSymbol][1]);
386
stateStack.push( Automaton_Pattern.INTEGERS[stateNr] );
387         symbolStack.push( token );
388         stateNr = Automaton_Pattern.ACTIONTABLE[stateNr][tokenSymbol][1];
389         ++extdTokenListIndex;
390         token = extdTokenList[extdTokenListIndex];
391         tokenSymbol = -1;
392         break;
393
394       case Automaton_Pattern.REDUCE :
395 //System.out.println("REDUCE "+ACTIONTABLE[stateNr][tokenSymbol][1]);
396
final int ruleNr = Automaton_Pattern.ACTIONTABLE[stateNr][tokenSymbol][1];
397
398         Object JavaDoc node=null; int nodeSymbol=-1;
399         switch(ruleNr) {
400           case 0: // RE ::= TERM
401
{
402             node = symbolStack.pop();
403             nodeSymbol = Automaton_Pattern.RE;
404             break;
405           }
406           case 1: // RE ::= TERM orOp RE
407
{
408             PState re = (PState)symbolStack.pop();
409             /*Terminal_OrOp =*/ symbolStack.pop();
410             PState term = (PState)symbolStack.pop();
411
412             node = this.union(term,re);
413             nodeSymbol = Automaton_Pattern.RE;
414             break;
415           }
416           case 2: // ELEMENT ::= groupBegin RE groupEnd
417
{
418             Terminal_GroupEnd end = (Terminal_GroupEnd)symbolStack.pop();
419             node = symbolStack.pop();
420             Terminal_GroupBegin begin =(Terminal_GroupBegin)symbolStack.pop();
421             if (begin.name==null && end.name!=null
422                 || begin.name!=null && begin.name.equals(end.name)==false)
423               throw new IllegalArgumentException JavaDoc("endtag exspected for "+begin+" but found: "+end);
424
425             nodeSymbol = Automaton_Pattern.ELEMENT;
426             break;
427           }
428           case 3: // ELEMENT ::= ELEMENT repetition
429
{
430             Terminal_Repetition repetition = (Terminal_Repetition)symbolStack.pop();
431             PState element = (PState)symbolStack.pop();
432
433             node = repetition.to==Terminal_Repetition.UNLIMITED
434                     ? this.repeat(element,repetition.from,0)
435                     : this.repeat(element,repetition.from,repetition.to);
436
437             nodeSymbol = Automaton_Pattern.ELEMENT;
438             break;
439           }
440
441           case 4: // ELEMENT ::= ELEMENT label
442
{
443             String JavaDoc label = (String JavaDoc)symbolStack.pop();
444             String JavaDoc labelDot = null;
445             PState element = (PState)symbolStack.pop();
446
447             node = element;
448             nodeSymbol = Automaton_Pattern.ELEMENT;
449             break;
450           }
451           case 5: // ELEMENT ::= regExp
452
{
453             node = symbolStack.pop();
454             if (node instanceof Terminal_RegExp) { // or instanceOf Terminal_RuntimeValue
455
Automaton_Pattern preDefAutomaton;
456               if (this.preDefinedAutomatons==null) preDefAutomaton = null;
457               else {
458                 preDefAutomaton = (Automaton_Pattern)this.preDefinedAutomatons.get(((Terminal_RegExp)node).name);
459               }
460               if (preDefAutomaton==null) throw new IllegalArgumentException JavaDoc(((Terminal_RegExp)node).name+" is not defined");
461
462               final Automaton.State startState = preDefAutomaton.getStartState();
463               if (startState==null) {
464                 node = this.addState(false);
465               } else {
466                 java.util.Map JavaDoc map = this.cloneState(startState);
467                 node = (Automaton_Pattern.PState)map.get(startState);
468               }
469             }
470             nodeSymbol = Automaton_Pattern.ELEMENT;
471             break;
472           }
473           case 6: // TERM ::= ELEMENT
474
{
475             node = symbolStack.pop();
476             nodeSymbol = Automaton_Pattern.TERM;
477             break;
478           }
479           case 7: // TERM ::= ELEMENT TERM
480
{
481             PState term = (PState)symbolStack.pop();
482             PState element = (PState)symbolStack.pop();
483
484             node = this.concat(element,term);
485
486             nodeSymbol = Automaton_Pattern.TERM;
487             break;
488           }
489           case 8: // TERM ::= ELEMENT andOp TERM
490
{
491             PState term = (PState)symbolStack.pop();
492             /*Terminal_AndOp = */ symbolStack.pop();
493             PState element = (PState)symbolStack.pop();
494
495             node = this.intersect(element,term);
496             nodeSymbol = Automaton_Pattern.TERM;
497             break;
498           }
499           case 9: // TERM ::= notOp ELEMENT
500
{
501             PState element = (PState)symbolStack.pop();
502             /*Terminal_NotOp = */ symbolStack.pop();
503
504             node = this.complement(element);
505             nodeSymbol = Automaton_Pattern.TERM;
506             break;
507           }
508           default :
509             String JavaDoc message = "\nProgramming error in RE-Parser:"
510                             +"\nACTIONTABLE contains wrong ruleNr "+ruleNr
511                             +"\nor case "+ruleNr+" statement missing";
512             throw new RuntimeException JavaDoc(message);
513         } // end switch(rule)
514

515         for (int i=stateStack.size()-symbolStack.size(); i>1; i--) stateStack.pop();
516         stateNr = ((Integer JavaDoc)stateStack.peek()).intValue();
517         symbolStack.push( node );
518         stateNr = Automaton_Pattern.ACTIONTABLE[stateNr][nodeSymbol][1];
519         break;
520       } // end switch(action)
521

522     } while (action!=Automaton_Pattern.ACCEPT && action!=Automaton_Pattern.ERROR);
523
524     if (action==Automaton_Pattern.ERROR) {
525       System.out.print("parsed:");
526       for (int i=0; i<extdTokenListIndex; ++i) {
527         System.out.print(" "+extdTokenList[i]);
528       }
529       System.out.println("");
530       System.out.print("rest: ");
531       for (int i=extdTokenListIndex; i<extdTokenList.length-1; ++i) {
532         System.out.print(" "+extdTokenList[i]);
533       }
534       System.out.println("");
535       System.out.println("current state: "+stateNr);
536       System.out.print("current Token: "+tokenSymbol);
537
538 // for (int i=0; i<Automaton_Pattern.ACTIONTABLE[stateNr].length; ++i) {
539
// if (Automaton_Pattern.ACTIONTABLE[stateNr][i][0]!=Automaton_Pattern.ERROR) {
540
// System.out.println(
541
// }
542
// }
543
// System.out.println([stateNr][0];
544
throw new Error JavaDoc();
545     }
546
547 // String expression = ""; int tokenPosition=-1;
548
// for (int i=0; i<tokenList.size(); i++) {
549
// if (i==extdTokenListIndex) tokenPosition=expression.length();
550
// expression+= String.valueOf(tokenList.get(i));
551
// }
552
// throw new InvalidExpression(
553
// expression,
554
// String.valueOf( extdTokenList[extdTokenListIndex] ),
555
// tokenPosition
556
// );
557
// }
558
// return (SState)this.minimize(((SState)symbolStack.peek()));
559
// return (SState)this.makeDeterministic(((SState)symbolStack.peek()));
560
return (SState)symbolStack.peek();
561   }
562
563
564
565
566 interface TerminalFormat {
567   public Object JavaDoc parseObject(char[] source, ParsePosition status);
568   public int maxLength();
569 }
570
571 /*
572 final class TerminalFormat_SPECIALLITERALS implements TerminalFormat {
573
574     public TerminalFormat_SPECIALLITERALS() {};
575
576     public Object parseObject(char[] source, ParsePosition status) {
577         final int index = status.getIndex();
578
579         switch (source[index]) {
580             case '|' : status.setIndex(index+1); return Terminal_OrOp.INSTANCE;
581             case '(' : status.setIndex(index+1); return Terminal_GroupBegin.INSTANCE;
582             case ')' : status.setIndex(index+1); return Terminal_GroupEnd.INSTANCE;
583             case '*' : status.setIndex(index+1); return new Terminal_Repetition(0,Terminal_Repetition.UNLIMITED);
584             case '+' : status.setIndex(index+1); return new Terminal_Repetition(1,Terminal_Repetition.UNLIMITED);
585             case '?' : status.setIndex(index+1); return new Terminal_Repetition(0,1);
586             case '.' : status.setIndex(index+1); return Det_AnyLiteral.INSTANCE;
587             default : return null; // throw new ParseException
588         }
589     }
590
591     public int maxLength() {return 1;}
592
593 }
594 */

595
596 final class TerminalFormat_LITERAL implements TerminalFormat {
597
598     private final static char ESCAPE_CHAR = '\\';
599
600     public TerminalFormat_LITERAL() {
601         };
602
603     public Object JavaDoc parseObject(char[] source, ParsePosition status) {
604           int index = status.getIndex();
605
606           switch (source[index]) {
607             case '\\' : {
608               ++index;
609               if (index==source.length) return null;
610               status.setIndex(index+1);
611
612               final PState startState = (PState)Automaton_Pattern.this.addState(false);
613               startState.addTransition(
614                 null,
615                 new CharSet(source[index]),
616                 Automaton_Pattern.this.addState(true)
617               );
618               return startState;
619             }
620
621             case '|' : status.setIndex(index+1); return Terminal_OrOp.INSTANCE;
622             case '&' : status.setIndex(index+1); return Terminal_AndOp.INSTANCE;
623             case '!' : status.setIndex(index+1); return Terminal_NotOp.INSTANCE;
624             case '(' : status.setIndex(index+1); return Terminal_GroupBegin.INSTANCE;
625             case ')' : status.setIndex(index+1); return Terminal_GroupEnd.INSTANCE;
626             case '*' : status.setIndex(index+1); return new Terminal_Repetition(0,Terminal_Repetition.UNLIMITED);
627             case '+' : status.setIndex(index+1); return new Terminal_Repetition(1,Terminal_Repetition.UNLIMITED);
628             case '?' : status.setIndex(index+1); return new Terminal_Repetition(0,1);
629             case '.' : {
630               status.setIndex(index+1);
631               ISet_char charSet = new CharSet();
632               charSet.complement();
633               final PState startState = (PState)Automaton_Pattern.this.addState(false);
634               startState.addTransition(null,charSet,Automaton_Pattern.this.addState(true));
635               return startState;
636             }
637             case '{' :
638             case '}' :
639             case '[' :
640             case ']' :
641             case '<' :
642             case '>' : return null;
643
644             default : {
645               status.setIndex(index+1);
646               final PState startState = (PState)Automaton_Pattern.this.addState(false);
647               startState.addTransition(
648                 null,
649                 new CharSet(source[index]),
650                 Automaton_Pattern.this.addState(true)
651               );
652               return startState;
653             }
654           }
655     }
656
657     public int maxLength() {return 2;}
658
659 }
660
661
662 final class TerminalFormat_LITERALSET implements TerminalFormat {
663
664     private static final int START = 0;
665     private static final int FIRSTCHAR = 1;
666     private static final int NORMAL = 2;
667     private static final int ESCAPED = 3;
668
669     public TerminalFormat_LITERALSET() {
670 // this.automaton = automaton;
671
//startState = automaton.addState(false);
672
//automaton.addTransition(new CharSet('.'),automaton.addState(true));
673
};
674
675     public Object JavaDoc parseObject(char[] source, ParsePosition status) {
676           int index = status.getIndex();
677           final int sourceLength = source.length;
678
679           ISet_char charSet = new CharSet();
680           StringBuffer JavaDoc chars = new StringBuffer JavaDoc();
681           boolean complement = false;
682           boolean intervall = false;
683           int state = START;
684           while (index<sourceLength) {
685             char ch = source[index];
686             switch(state) {
687               case START :
688                 switch(ch) {
689                   case '[' : state = FIRSTCHAR; break;
690                   default : return null;
691                 }
692                 break;
693               case FIRSTCHAR :
694                 switch(ch) {
695                   case ']' : return null;
696                   case '\\' : state = ESCAPED; break;
697                   case '^' : complement = true; state = NORMAL; break;
698                   default : chars.append(ch); state = NORMAL;
699                 }
700                 break;
701               case NORMAL :
702                 switch(ch) {
703                   case '\\' : state = ESCAPED; break;
704                   case ']' : { // END
705
index++;
706                     status.setIndex(index);
707
708                     charSet.addAll(chars.toString());
709                     if (complement) charSet.complement();
710
711                     final PState startState = (PState)Automaton_Pattern.this.addState(false);
712                     startState.addTransition(null,charSet,Automaton_Pattern.this.addState(true));
713                     return startState;
714                   }
715                   default :
716                     if (intervall) {
717                       char from = chars.charAt(chars.length()-1);
718                       if (from>ch) return null;
719                       for (char c=++from; c<=ch; c++) charSet.add(c);
720                       intervall = false;
721                     } else {
722                       if (ch=='-') {
723                         if (chars.length()==0) return null;
724                         intervall = true;
725                       } else chars.append(ch);
726                     }
727                     // STATE = NORMAL; (not necessary because state is NORMAL)
728
}
729                 break;
730               case ESCAPED :
731                 switch(ch) {
732                   default :
733                     if (intervall) {
734                       char from = (char)(((int)chars.charAt(chars.length()-1))+1);
735                       for (char c=from; c<=ch; c++) charSet.add(c);
736                       intervall = false;
737                     } else chars.append(ch);
738                     state = NORMAL;
739                 }
740                 break;
741               default :
742                 String JavaDoc message = "unknown state " + state;
743                 throw new RuntimeException JavaDoc(message);
744             }
745
746             index++;
747           }
748
749           return null;
750     }
751
752     public int maxLength() {return PScanner.UNLIMITED_MAX_LENGTH;}
753 }
754
755
756 final class TerminalFormat_GroupBegin implements TerminalFormat {
757
758   public TerminalFormat_GroupBegin() {}
759
760   public Object JavaDoc parseObject(char[] source, ParsePosition status) {
761     final int sourceLength = source.length;
762     int index = status.getIndex();
763     if (index>=sourceLength) {
764       String JavaDoc message = "";
765       throw new ArrayIndexOutOfBoundsException JavaDoc(message);
766     }
767
768     if (source[index]!='<') return null;
769
770     index++; final int startIndex = index;
771     while (index<sourceLength && source[index]!='>' && source[index]!='.') index++;
772     if (index==sourceLength) return null;
773     if (source[index]=='.') return null;
774
775     status.setIndex(index+1);
776
777 // if (startIndex==index) return Terminal_GroupBegin.INSTANCE;
778
return new Terminal_GroupBegin(new String JavaDoc(source,startIndex,index-startIndex));
779   }
780
781   public int maxLength() {return PScanner.UNLIMITED_MAX_LENGTH;}
782 }
783
784 final class TerminalFormat_GroupEnd implements TerminalFormat {
785
786   public TerminalFormat_GroupEnd() {}
787
788   public Object JavaDoc parseObject(char[] source, ParsePosition status) {
789     final int sourceLength = source.length;
790     int index = status.getIndex();
791     if (index>=sourceLength) {
792       String JavaDoc message = "";
793       throw new ArrayIndexOutOfBoundsException JavaDoc(message);
794     }
795
796     if (source[index]!='<') return null;
797     index++;
798     if (source[index]!='/') return null;
799
800     index++; final int startIndex = index;
801     while (index<sourceLength && source[index]!='>') index++;
802     if (index==sourceLength) return null;
803
804     status.setIndex(index+1);
805
806 // if (startIndex+1==index) return Terminal_GroupEnd.INSTANCE;
807
return new Terminal_GroupEnd(new String JavaDoc(source,startIndex,index-startIndex));
808   }
809
810   public int maxLength() {return PScanner.UNLIMITED_MAX_LENGTH;}
811 }
812
813
814 final class TerminalFormat_REPETITION implements TerminalFormat {
815
816   private final static int START = 0;
817   private final static int FROM_FIRSTCHAR = 1;
818   private final static int FROM_NORMAL = 2;
819   private final static int TO_FIRSTCHAR = 3;
820   private final static int TO_NORMAL = 4;
821
822   public TerminalFormat_REPETITION() {
823   };
824
825   public Object JavaDoc parseObject(char[] source, ParsePosition status) {
826     int index = status.getIndex();
827     final int sourceLength = source.length;
828
829     StringBuffer JavaDoc chars = new StringBuffer JavaDoc();
830     int from = 0;
831     int state = START;
832     while (index<sourceLength) {
833       char ch = source[index];
834       switch(state) {
835         case START :
836           switch(ch) {
837                   case '{' : state = FROM_FIRSTCHAR; break;
838                   default : return null;
839           }
840           break;
841         case FROM_FIRSTCHAR :
842           switch(ch) {
843                   case '0' : case '1' : case '2' : case '3' : case '4' :
844                   case '5' : case '6' : case '7' : case '8' : case '9' :
845                           chars.append(ch);
846                           state = FROM_NORMAL;
847                           break;
848                   default : return null;
849           }
850           break;
851         case FROM_NORMAL :
852           switch(ch) {
853                   case '0' : case '1' : case '2' : case '3' : case '4' :
854                   case '5' : case '6' : case '7' : case '8' : case '9' :
855                           chars.append(ch);
856                           //state = NORMAL; // not necessary because state is NORMAL
857
break;
858                   case ',' :
859                           from = Integer.parseInt(chars.toString());
860                           chars.setLength(0);
861                           state = TO_FIRSTCHAR;
862                           break;
863                   case '}' : // END
864
index++; status.setIndex(index);
865                           final int count = Integer.parseInt(chars.toString());
866                           return new Terminal_Repetition(count,count);
867                   default : return null;
868           }
869           break;
870         case TO_FIRSTCHAR :
871           switch(ch) {
872                   case '0' : case '1' : case '2' : case '3' : case '4' :
873                   case '5' : case '6' : case '7' : case '8' : case '9' :
874                           chars.append(ch);
875                           state = TO_NORMAL;
876                           break;
877                   case '*' : // may be END
878
index++;
879                           if (index==sourceLength) return null;
880                           if (source[index]!='}') return null;
881                           index++; status.setIndex(index);
882                           return new Terminal_Repetition(from,Terminal_Repetition.UNLIMITED);
883                   default : return null;
884           }
885           break;
886         case TO_NORMAL :
887           switch(ch) {
888                   case '0' : case '1' : case '2' : case '3' : case '4' :
889                   case '5' : case '6' : case '7' : case '8' : case '9' :
890                           chars.append(ch);
891                           state = TO_NORMAL;
892                           break;
893                   case '}' : // END
894
index++; status.setIndex(index);
895                           final int to = Integer.parseInt(chars.toString());
896                           return new Terminal_Repetition(from,to);
897                   default : return null;
898           }
899           break;
900       }
901
902       index++;
903     }
904
905     return null;
906   }
907
908
909   public int maxLength() {return PScanner.UNLIMITED_MAX_LENGTH;}
910 }
911
912 final class TerminalFormat_LABEL implements TerminalFormat {
913   public TerminalFormat_LABEL() {
914   };
915
916   public Object JavaDoc parseObject(char[] source, ParsePosition status) {
917     int startIndex = status.getIndex();
918     int index = startIndex;
919     if (source[index++]!='{') return null;
920     if (source[index++]!='=') return null;
921
922     while (index<source.length &&
923           ('A'<=source[index] && source[index]<='Z'
924         || 'a'<=source[index] && source[index]<='z'
925         || '0'<=source[index] && source[index]<='9')) ++index;
926
927     if (index==source.length) return null;
928     if (source[index]!='}') return null;
929
930     status.setIndex(index+1);
931     return new String JavaDoc(source,startIndex+2,index-startIndex-2);
932   }
933   public int maxLength() {return PScanner.UNLIMITED_MAX_LENGTH;}
934 }
935
936 final class TerminalFormat_RegExp implements TerminalFormat {
937   public TerminalFormat_RegExp() {
938   };
939
940   public Object JavaDoc parseObject(char[] source, ParsePosition status) {
941     int startIndex = status.getIndex();
942     int index = startIndex;
943     if (source[index++]!='{') return null;
944
945     if (('A'<=source[index] && source[index]<='Z' || 'a'<=source[index] && source[index]<='z')==false) return null;
946     ++index;
947     while (index<source.length &&
948           ('A'<=source[index] && source[index]<='Z'
949         || 'a'<=source[index] && source[index]<='z'
950         || '0'<=source[index] && source[index]<='9'
951         || source[index]=='_' || source[index]=='/' || source[index]=='-')) ++index;
952
953     if (index==source.length) return null;
954     if (source[index]!='}') return null;
955
956     status.setIndex(index+1);
957     return new Terminal_RegExp(new String JavaDoc(source,startIndex+1,index-startIndex-1));
958   }
959   public int maxLength() {return PScanner.UNLIMITED_MAX_LENGTH;}
960 }
961
962
963
964   protected PScanner scanner = this.newScanner();
965   protected PScanner newScanner() {
966     return new PScanner(
967       new TerminalFormat[] {
968         //new TerminalFormat_SPECIALLITERALS() // RegEx_SpecialLiteralsFormat();
969
new TerminalFormat_LITERALSET() // RegEx_LiteralSe<tFormat();
970
//,new TerminalFormat_STRING() // RegEx_StringFormat();
971
,new TerminalFormat_REPETITION() // RegEx_RepetitionFormat();
972
,new TerminalFormat_LABEL()
973         ,new TerminalFormat_GroupBegin()
974         ,new TerminalFormat_GroupEnd()
975         ,new TerminalFormat_LITERAL() // RegEx_LiteralFormat();
976
,new TerminalFormat_RegExp()
977       },
978       /*terminalFormatsAreExclusive=*/true
979     );
980   }
981
982 }
Popular Tags