1 8 9 package net.sourceforge.chaperon.build; 10 11 import net.sourceforge.chaperon.model.Violations; 12 import net.sourceforge.chaperon.model.pattern.Alternation; 13 import net.sourceforge.chaperon.model.pattern.BeginOfLine; 14 import net.sourceforge.chaperon.model.pattern.CharacterClass; 15 import net.sourceforge.chaperon.model.pattern.CharacterInterval; 16 import net.sourceforge.chaperon.model.pattern.CharacterSet; 17 import net.sourceforge.chaperon.model.pattern.CharacterString; 18 import net.sourceforge.chaperon.model.pattern.Concatenation; 19 import net.sourceforge.chaperon.model.pattern.EndOfLine; 20 import net.sourceforge.chaperon.model.pattern.Pattern; 21 import net.sourceforge.chaperon.model.pattern.PatternGroup; 22 import net.sourceforge.chaperon.model.pattern.UniversalCharacter; 23 import net.sourceforge.chaperon.process.PatternAutomaton; 24 25 31 public class PatternAutomatonBuilder 32 { 33 private Pattern pattern = null; 34 private PatternAutomaton automaton = null; 35 private Violations violations = new Violations(); 36 private int statecount = 0; 37 private int stateindex = 0; 38 private int groupcount = 0; 39 private int groupindex = 0; 40 41 46 public PatternAutomatonBuilder(Pattern pattern) 47 { 48 violations.addViolations(pattern.validate()); 49 50 if ((violations!=null) && (violations.getViolationCount()>0)) 51 throw new IllegalArgumentException ("Pattern is not valid: "+violations.getViolation(0)); 52 53 this.pattern = pattern; 54 55 statecount = getStateCount(pattern)+3; 56 stateindex = statecount-1; 57 58 groupcount = getGroupCount(pattern); 59 groupindex = groupcount; 60 61 PatternAutomaton automaton = new PatternAutomaton(statecount); 62 63 automaton.setGroupCount(groupcount+1); 64 65 int finalstate = stateindex--; 66 67 automaton.setFinalState(finalstate); 68 69 automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPEND); 70 automaton.setGroupIndex(stateindex, 0); 71 automaton.setTransitions(stateindex, new int[]{finalstate}); 72 73 int state = stateindex--; 74 75 state = traverse(automaton, pattern, state); 76 77 automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPSTART); 78 automaton.setGroupIndex(stateindex, 0); 79 automaton.setTransitions(stateindex, new int[]{state}); 80 81 automaton.setFirstState(stateindex); 82 83 this.automaton = automaton; 84 } 85 86 91 public PatternAutomaton getPatternAutomaton() 92 { 93 return automaton; 94 } 95 96 103 private int getGroupCount(Pattern element) 104 { 105 int groupcount = 0; 106 107 if (element instanceof Alternation) 108 { 109 Alternation alternation = (Alternation)element; 110 111 for (int i = 0; i<alternation.getPatternCount(); i++) 112 groupcount += getGroupCount(alternation.getPattern(i)); 113 } 114 else if (element instanceof Concatenation) 115 { 116 Concatenation concatenation = (Concatenation)element; 117 118 for (int i = 0; i<concatenation.getPatternCount(); i++) 119 groupcount += getGroupCount(concatenation.getPattern(i)); 120 } 121 else if (element instanceof PatternGroup) 122 { 123 groupcount++; 124 125 PatternGroup group = (PatternGroup)element; 126 127 for (int i = 0; i<group.getPatternCount(); i++) 128 groupcount += getGroupCount(group.getPattern(i)); 129 } 130 131 return groupcount; 132 } 133 134 141 private int getStateCount(Pattern element) 142 { 143 int factor = 1; 144 int offset = 0; 145 int statecount = 0; 146 147 if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==1)) 149 { 150 } 152 153 else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==1)) 155 offset = 1; 156 157 else if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==Integer.MAX_VALUE)) 159 offset = 1; 160 161 else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==Integer.MAX_VALUE)) 163 offset = 2; 164 165 else 167 { 168 factor = element.getMaxOccurs(); 169 offset = 1; 170 } 171 172 if (element instanceof Alternation) 173 { 174 Alternation alternation = (Alternation)element; 175 176 for (int i = 0; i<alternation.getPatternCount(); i++) 177 statecount += getStateCount(alternation.getPattern(i)); 178 179 if (alternation.getPatternCount()>1) 180 statecount++; 181 } 182 else if (element instanceof BeginOfLine) 183 statecount = 1; 184 else if (element instanceof CharacterClass) 185 { 186 CharacterClass characterclass = (CharacterClass)element; 187 188 for (int i = 0; i<characterclass.getCharacterClassElementCount(); i++) 189 { 190 if (characterclass.getCharacterClassElement(i) instanceof CharacterInterval) 191 statecount++; 192 else if (characterclass.getCharacterClassElement(i) instanceof CharacterSet) 193 { 194 CharacterSet set = (CharacterSet)characterclass.getCharacterClassElement(i); 195 196 statecount += set.getCharacters().length(); 197 } 198 } 199 200 statecount++; 201 } 202 else if (element instanceof CharacterString) 203 { 204 CharacterString string = (CharacterString)element; 205 206 statecount += string.getString().length(); 207 } 208 else if (element instanceof Concatenation) 209 { 210 Concatenation concatenation = (Concatenation)element; 211 212 for (int i = 0; i<concatenation.getPatternCount(); i++) 213 statecount += getStateCount(concatenation.getPattern(i)); 214 } 215 else if (element instanceof EndOfLine) 216 statecount = 1; 217 else if (element instanceof PatternGroup) 218 { 219 statecount = 2; 220 221 PatternGroup group = (PatternGroup)element; 222 223 for (int i = 0; i<group.getPatternCount(); i++) 224 statecount += getStateCount(group.getPattern(i)); 225 } 226 else if (element instanceof UniversalCharacter) 227 statecount = 1; 228 else 229 throw new IllegalArgumentException ("Pattern element not recognized"); 230 231 return (factor*statecount)+offset; 232 } 233 234 239 240 249 private int traverse(PatternAutomaton automaton, Pattern element, int laststate) 250 { 251 int firststate; 252 253 if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==1)) 255 firststate = evalPattern(automaton, element, laststate); 256 257 else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==1)) 259 { 260 int s1 = evalPattern(automaton, element, laststate); 261 automaton.setTransitions(stateindex, new int[]{s1, laststate}); 262 firststate = stateindex--; 263 } 264 265 else if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==Integer.MAX_VALUE)) 267 { 268 int s1 = stateindex--; 269 firststate = evalPattern(automaton, element, s1); 270 automaton.setTransitions(s1, new int[]{firststate, laststate}); 271 } 272 273 else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==Integer.MAX_VALUE)) 275 { 276 int s2 = stateindex--; 277 int s1 = evalPattern(automaton, element, s2); 278 automaton.setTransitions(s2, new int[]{s1, laststate}); 279 280 firststate = stateindex--; 281 automaton.setTransitions(firststate, new int[]{s1, laststate}); 282 } 283 284 else 286 { 287 int s2 = laststate; 288 for (int i = 0; i<element.getMinOccurs(); i++) 289 s2 = evalPattern(automaton, element, s2); 290 291 int s1 = s2; 292 293 for (int i = element.getMinOccurs(); i<element.getMaxOccurs(); i++) 294 { 295 s1 = evalPattern(automaton, element, s1); 296 if (i>element.getMinOccurs()) 297 automaton.addTransition(s1, s2); 298 } 299 300 firststate = stateindex--; 301 automaton.setTransitions(firststate, new int[]{s1, s2}); 302 } 303 304 if (element instanceof PatternGroup) 305 groupindex--; 306 307 return firststate; 308 } 309 310 319 private int evalPattern(PatternAutomaton automaton, Pattern element, int laststate) 320 { 321 if (element instanceof Alternation) 322 return evalAlternation(automaton, (Alternation)element, laststate); 323 else if (element instanceof BeginOfLine) 324 return evalBeginOfLine(automaton, (BeginOfLine)element, laststate); 325 else if (element instanceof CharacterClass) 326 return evalCharacterClass(automaton, (CharacterClass)element, laststate); 327 else if (element instanceof CharacterString) 328 return evalCharacterString(automaton, (CharacterString)element, laststate); 329 else if (element instanceof Concatenation) 330 return evalConcatenation(automaton, (Concatenation)element, laststate); 331 else if (element instanceof EndOfLine) 332 return evalEndOfLine(automaton, (EndOfLine)element, laststate); 333 else if (element instanceof PatternGroup) 334 return evalPatternGroup(automaton, (PatternGroup)element, laststate); 335 else if (element instanceof UniversalCharacter) 336 return evalUniversalCharacter(automaton, (UniversalCharacter)element, laststate); 337 else 338 throw new IllegalArgumentException ("Pattern element not recognized"); 339 } 340 341 350 private int evalAlternation(PatternAutomaton automaton, Alternation element, int laststate) 351 { 352 if (element.getPatternCount()==1) 353 return traverse(automaton, element.getPattern(0), laststate); 354 else 355 { 356 int nextstate = stateindex--; 357 int state; 358 359 for (int i = element.getPatternCount()-1; i>=0; i--) 360 { 361 state = traverse(automaton, element.getPattern(i), laststate); 362 automaton.addTransition(nextstate, state); 363 } 364 365 return nextstate; 366 } 367 } 368 369 378 private int evalBeginOfLine(PatternAutomaton automaton, BeginOfLine element, int laststate) 379 { 380 automaton.setType(stateindex, PatternAutomaton.TYPE_BOL); 381 automaton.setTransitions(stateindex, new int[]{laststate}); 382 return stateindex--; 383 } 384 385 394 private int evalCharacterClass(PatternAutomaton automaton, CharacterClass element, int laststate) 395 { 396 int state; 397 398 if (!element.isExclusive()) 399 { 400 int firststate = stateindex--; 401 402 for (int i = 0; i<element.getCharacterClassElementCount(); i++) 403 { 404 if (element.getCharacterClassElement(i) instanceof CharacterInterval) 405 { 406 CharacterInterval interval = (CharacterInterval)element.getCharacterClassElement(i); 407 408 automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH); 409 automaton.setInterval(stateindex, interval.getMinimum(), interval.getMaximum()); 410 automaton.addTransition(stateindex, laststate); 411 state = stateindex--; 412 automaton.addTransition(firststate, state); 413 } 414 else if (element.getCharacterClassElement(i) instanceof CharacterSet) 415 { 416 CharacterSet set = (CharacterSet)element.getCharacterClassElement(i); 417 String chars = set.getCharacters(); 418 419 for (int j = 0; j<chars.length(); j++) 420 { 421 automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH); 422 automaton.setInterval(stateindex, chars.charAt(j), chars.charAt(j)); 423 automaton.addTransition(stateindex, laststate); 424 state = stateindex--; 425 automaton.addTransition(firststate, state); 426 } 427 } 428 } 429 430 return firststate; 431 } 432 else 433 { 434 state = stateindex--; 435 automaton.setType(state, PatternAutomaton.TYPE_MATCHANY); 436 automaton.setTransitions(state, new int[]{laststate}); 437 for (int i = element.getCharacterClassElementCount()-1; i>=0; i--) 438 { 439 if (element.getCharacterClassElement(i) instanceof CharacterInterval) 440 { 441 CharacterInterval interval = (CharacterInterval)element.getCharacterClassElement(i); 442 443 automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH); 444 automaton.setInterval(stateindex, interval.getMinimum(), interval.getMaximum()); 445 automaton.setTransitions(stateindex, new int[]{state}); 446 state = stateindex--; 447 } 448 else if (element.getCharacterClassElement(i) instanceof CharacterSet) 449 { 450 CharacterSet set = (CharacterSet)element.getCharacterClassElement(i); 451 String chars = set.getCharacters(); 452 453 for (int j = 0; j<chars.length(); j++) 454 { 455 automaton.setType(stateindex, PatternAutomaton.TYPE_EXMATCH); 456 automaton.setInterval(stateindex, chars.charAt(j), chars.charAt(j)); 457 automaton.setType(stateindex, PatternAutomaton.TYPE_EXMATCH); 458 automaton.setTransitions(stateindex, new int[]{state}); 459 state = stateindex--; 460 } 461 } 462 } 463 464 return state; 465 } 466 } 467 468 477 private int evalCharacterString(PatternAutomaton automaton, CharacterString element, int laststate) 478 { 479 int state = laststate; 480 481 for (int i = element.getString().length()-1; i>=0; i--) 482 { 483 automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH); 484 automaton.setInterval(stateindex, element.getString().charAt(i), element.getString().charAt(i)); 485 automaton.setTransitions(stateindex, new int[]{state}); 486 state = stateindex--; 487 } 488 489 return state; 490 } 491 492 501 private int evalConcatenation(PatternAutomaton automaton, Concatenation element, int laststate) 502 { 503 int state = laststate; 504 505 for (int i = element.getPatternCount()-1; i>=0; i--) 506 state = traverse(automaton, element.getPattern(i), state); 507 508 return state; 509 } 510 511 520 private int evalEndOfLine(PatternAutomaton automaton, EndOfLine element, int laststate) 521 { 522 automaton.setType(stateindex, PatternAutomaton.TYPE_EOL); 523 automaton.setTransitions(stateindex, new int[]{laststate}); 524 return stateindex--; 525 } 526 527 536 private int evalPatternGroup(PatternAutomaton automaton, PatternGroup element, int laststate) 537 { 538 int endstate = stateindex--; 539 540 automaton.setType(endstate, PatternAutomaton.TYPE_GROUPEND); 541 automaton.setGroupIndex(endstate, groupindex); 542 automaton.setTransitions(endstate, new int[]{laststate}); 543 544 int nextstate = endstate; 545 546 for (int i = element.getPatternCount()-1; i>=0; i--) 547 nextstate = traverse(automaton, element.getPattern(i), nextstate); 548 549 automaton.setGroupIndex(endstate, groupindex); 550 551 automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPSTART); 552 automaton.setGroupIndex(stateindex, groupindex); 553 automaton.setTransitions(stateindex, new int[]{nextstate}); 554 return stateindex--; 555 } 556 557 566 private int evalUniversalCharacter(PatternAutomaton automaton, UniversalCharacter element, 567 int laststate) 568 { 569 automaton.setType(stateindex, PatternAutomaton.TYPE_MATCHANY); 570 automaton.setTransitions(stateindex, new int[]{laststate}); 571 return stateindex--; 572 } 573 } 574 | Popular Tags |