1 19 20 package org.netbeans.modules.languages.parser; 21 22 import java.util.Map ; 23 import org.netbeans.api.languages.ParseException; 24 import org.netbeans.api.languages.CharInput; 25 import java.util.Collections ; 26 import java.util.HashSet ; 27 import java.util.Iterator ; 28 import java.util.Set ; 29 import org.netbeans.modules.languages.Language.TokenType; 30 import org.netbeans.modules.languages.parser.StringInput; 31 32 public class Pattern <V> { 33 34 private static final Character STAR = new Character ((char) 0); 35 private static NodeFactory<Integer > nodeFactory = new NodeFactory<Integer > () { 36 private int counter = 1; 37 public Integer createNode () { 38 return Integer.valueOf (counter++); 39 } 40 }; 41 42 public static <V> Pattern<V> create () { 43 return new Pattern<V> (); 44 } 45 46 public static <V> Pattern<V> create (String input) throws ParseException { 47 if (input.length () == 0) throw new ParseException (); 48 return create (new StringInput (input, "")); 49 } 50 51 public static <V> Pattern<V> create (CharInput input) throws ParseException { 52 Pattern<V> p = createIn (input); 53 DG<Integer ,Character ,Integer ,V> ndg = DGUtils.<Integer ,Character ,Integer ,V>reduce (p.dg, nodeFactory); 54 return new Pattern<V> (ndg); 55 } 56 57 private static <V> Pattern<V> createIn (CharInput input) throws ParseException { 58 Pattern<V> pattern = new Pattern<V> (); 59 Pattern<V> last = null; 60 char ch = input.next (); 61 while (ch != 0) { 62 switch (ch) { 63 case ' ': 64 case '\t': 65 case '\n': 66 case '\r': 67 input.read (); 68 break; 69 case '*': 70 input.read (); 71 if (last == null) throw new ParseException (); 72 last = last.star (); 73 break; 74 case '?': 75 input.read (); 76 if (last == null) throw new ParseException (); 77 last = last.question (); 78 break; 79 case '+': 80 input.read (); 81 if (last == null) throw new ParseException (); 82 last = last.plus (); 83 break; 84 case '(': 85 input.read (); 86 if (last != null) pattern = pattern.append (last); 87 last = createIn (input); 88 if (input.read () != ')') 89 throw new ParseException (") expected: " + input); 90 break; 91 case '\'': 99 case '"': 100 input.read (); 101 if (last != null) pattern = pattern.append (last); 102 last = Pattern.create (); 103 StringBuilder sb = new StringBuilder (); 104 ch = input.next (); 105 while (ch != '"' && ch != '\'') { 106 if (ch == 0) 107 throw new ParseException (sb.toString ()); 108 if (ch == '\\') { 109 input.read (); 110 switch (input.next ()) { 111 case '\\': 112 input.read (); 113 last = last.append (new Pattern<V> ( 114 new Character ('\\') 115 )); 116 sb.append ('\\'); 117 break; 118 case 'n': 119 input.read (); 120 last = last.append (new Pattern<V> ( 121 new Character ('\n') 122 )); 123 sb.append ('\n'); 124 break; 125 case 'r': 126 input.read (); 127 last = last.append (new Pattern<V> ( 128 new Character ('\r') 129 )); 130 sb.append ('\r'); 131 break; 132 case 't': 133 input.read (); 134 last = last.append (new Pattern<V> ( 135 new Character ('\t') 136 )); 137 sb.append ('\t'); 138 break; 139 case '"': 140 input.read (); 141 last = last.append (new Pattern<V> ( 142 new Character ('"') 143 )); 144 sb.append ('"'); 145 break; 146 case '\'': 147 input.read (); 148 last = last.append (new Pattern<V> ( 149 new Character ('\'') 150 )); 151 sb.append ('\''); 152 break; 153 default: 154 throw new ParseException ("Unknown character after \\:" + input.toString ()); 155 } 156 } else { 157 Character charr = new Character (input.read ()); 158 last = last.append (new Pattern<V> (charr)); 159 sb.append (charr.charValue ()); 160 } 161 ch = input.next (); 162 } 163 input.read (); 164 break; 165 case '|': 166 input.read (); 167 if (last != null) pattern = pattern.append (last); 168 last = null; 169 pattern = pattern.merge (Pattern.<V>createIn (input)); 170 return pattern; 171 case '-': 172 if (last != null) pattern = pattern.append (last); 173 input.read (); 174 skipWhitespaces (input); 175 ch = input.next (); 176 if (ch != '\'' && ch != '"') 177 throw new ParseException (input.toString ()); 178 input.read (); 179 ch = input.next (); 180 if (ch == '\'' || ch == '"') 181 throw new ParseException (input.toString ()); 182 Character edge = new Character (input.next ()); 183 last = new Pattern<V> (true, Collections.<Character >singleton (edge)); 184 last = last.star ().append (new Pattern<V> (edge)); 185 input.read (); 186 ch = input.next (); 187 while (ch != '\'' && ch != '"') { 188 if (ch == 0) 189 throw new ParseException (input.toString ()); 190 last = last.plus (); 191 Integer endN = last.dg.getEnds ().iterator ().next (); 192 Integer newE = last.nodeFactory.createNode (); 193 last.dg.addNode (newE); 194 last.dg.addEdge (endN, newE, new Character (input.next ())); 195 last.dg.setEnds (Collections.singleton (newE)); 196 input.read (); 197 ch = input.next (); 198 } 199 input.read (); 200 break; 201 case ')': 202 if (last != null) pattern = pattern.append (last); 203 return pattern; 204 case '.': 205 input.read (); 206 if (last != null) pattern = pattern.append (last); 207 last = new Pattern<V> (Pattern.STAR); 208 break; 209 case '[': 210 input.read (); 211 if (last != null) pattern = pattern.append (last); 212 boolean not = false; 213 ch = input.next (); 214 if (ch == '^') { 215 input.read (); 216 ch = input.next (); 217 not = true; 218 } 219 Set <Character > set = new HashSet <Character > (); 220 char l = (char) 0; 221 boolean minus = false; 222 ch = input.next (); 223 while (ch != ']' && ch != 0) { 224 switch (ch) { 225 case ' ': 226 case '\t': 227 case '\n': 228 case '\r': 229 input.read (); 230 break; 231 case '\'': 232 case '"': 233 char ol = l; 234 if (l != 0 && !minus) 235 set.add (new Character (l)); 236 input.read (); 237 ch = input.next (); 238 if (ch == '\\') { 239 input.read (); 240 ch = input.next (); 241 switch (ch) { 242 case 'n': 243 l = '\n'; 244 break; 245 case 't': 246 l = '\t'; 247 break; 248 case 'r': 249 l = '\r'; 250 break; 251 case '\'': 252 l = '\''; 253 break; 254 case '\\': 255 l = '\\'; 256 break; 257 case '"': 258 l = '"'; 259 break; 260 default: 261 throw new ParseException (input.toString ()); 262 } input.read (); 264 } else l = input.read (); 266 ch = input.next (); 267 if (ch != '"' && ch != '\'') 268 throw new ParseException (input.toString ()); 269 input.read (); 270 if (minus) { 271 addInterval (set, ol, l); 272 l = 0; 273 } 274 minus = false; 275 break; case '-': 277 input.read (); 278 if (l == 0) throw new ParseException (input.toString ()); 279 minus = true; 280 break; 281 default: 291 throw new ParseException (input.toString ()); 292 } ch = input.next (); 294 } if (minus) throw new ParseException (); 296 if (l != 0) 297 set.add (new Character (l)); 298 input.read (); 299 last = new Pattern<V> (not, set); 300 break; 301 default: 302 throw new ParseException ("Unexpected char '" + input.next () + ":" + input.toString ()); 303 } ch = input.next (); 308 } if (last != null) pattern = pattern.append (last); 310 return pattern; 311 } 312 313 360 private static Set <Character > whitespace = new HashSet <Character > (); 361 static { 362 whitespace.add (new Character (' ')); 363 whitespace.add (new Character ('\n')); 364 whitespace.add (new Character ('\r')); 365 whitespace.add (new Character ('\t')); 366 } 367 368 private static void skipWhitespaces (CharInput input) { 369 while (whitespace.contains (new Character (input.next ()))) 370 input.read (); 371 } 372 373 private static void addInterval (Set <Character > set, char from, char to) 374 throws ParseException { 375 if (from > to) throw new ParseException (); 376 do { 377 set.add (new Character (from)); 378 from++; 379 } while (from <= to); 380 } 381 382 private DG<Integer ,Character ,Integer ,V> dg; 384 private Pattern (DG<Integer ,Character ,Integer ,V> dg) { 385 this.dg = dg; 386 } 387 388 private Pattern () { 389 dg = DG.<Integer ,Character ,Integer ,V>createDG (nodeFactory.createNode ()); 390 } 395 396 private Pattern (Pattern<V> p) { 397 dg = DGUtils.<Integer ,Character ,Integer ,V>cloneDG (p.dg, false, nodeFactory); 398 } 399 400 private Pattern (Character edge) { 401 Integer start = nodeFactory.createNode (); 402 dg = DG.<Integer ,Character ,Integer ,V>createDG (start); 403 Integer end = nodeFactory.createNode (); 404 dg.addNode (end); 405 dg.addEdge (start, end, edge); 406 dg.setEnds (Collections.<Integer >singleton (end)); 407 } 408 409 private Pattern (boolean not, Set <Character > edges) { 410 Integer start = nodeFactory.createNode (); 411 dg = DG.<Integer ,Character ,Integer ,V>createDG (start); 412 Integer end = nodeFactory.createNode (); 413 dg.addNode (end); 414 dg.setStart (start); 415 dg.setEnds (Collections.<Integer >emptySet ()); 416 Iterator <Character > it = edges.iterator (); 417 while (it.hasNext ()) { 418 Character edge = it.next (); 419 dg.addEdge (start, end, edge); 420 } 421 if (not) { 422 Integer failedState = nodeFactory.createNode (); 423 dg.addNode (failedState); 424 dg.addEdge (start, failedState, Pattern.STAR); 425 dg.addEnd (failedState); 426 } else 427 dg.addEnd (end); 428 } 429 430 public Pattern<V> clonePattern () { 431 return new Pattern<V> (this); 432 } 433 434 public Pattern<V> star () { 435 DG<Integer ,Character ,Integer ,V> ndg = DGUtils.<Integer ,Character ,Integer ,V>plus (dg, STAR, nodeFactory); 436 ndg = DGUtils.<Integer ,Character ,Integer ,V>merge (DG.<Integer ,Character ,Integer ,V>createDG (nodeFactory.createNode ()), ndg, STAR, nodeFactory); 437 Pattern<V> p = new Pattern<V> (ndg); 438 return p; 439 } 440 441 public Pattern<V> plus () { 442 DG<Integer ,Character ,Integer ,V> ndg = DGUtils.<Integer ,Character ,Integer ,V>plus (dg, STAR, nodeFactory); 443 Pattern<V> p = new Pattern<V> (ndg); 444 return p; 445 } 446 447 public Pattern<V> question () { 448 DG<Integer ,Character ,Integer ,V> ndg = DGUtils.<Integer ,Character ,Integer ,V>cloneDG (dg, true, nodeFactory); 449 ndg.addEnd (ndg.getStartNode ()); 450 Pattern<V> p = new Pattern<V> (ndg); 451 return p; 452 } 453 454 public Pattern<V> merge (Pattern<V> parser) { 455 DG<Integer ,Character ,Integer ,V> ndg = DGUtils.<Integer ,Character ,Integer ,V>merge (dg, parser.dg, STAR, nodeFactory); 456 Pattern<V> p = new Pattern<V> (ndg); 457 return p; 458 } 459 460 public Pattern<V> append (Pattern<V> parser) { 461 DG<Integer ,Character ,Integer ,V> ndg = DGUtils.<Integer ,Character ,Integer ,V>append (dg, parser.dg, STAR, nodeFactory); 462 Pattern<V> p = new Pattern<V> (ndg); 463 return p; 464 } 465 466 boolean matches (String text) { 467 int i = 0; 468 Integer state = dg.getStartNode (); 469 while (i < text.length ()) { 470 state = dg.getNode (state, new Character (text.charAt (i++))); 471 if (state == null) return false; 472 } 473 return dg.getEnds ().contains (state); 474 } 475 476 public Integer next (CharInput input) { 477 return next (dg.getStartNode (), input); 478 } 479 480 public Integer next (Integer state, CharInput input) { 481 int lastIndex = input.getIndex (); 482 Integer lastState = null; 483 while (state != null) { 484 if (dg.getEnds ().contains (state)) { 485 lastState = state; 486 lastIndex = input.getIndex (); 487 } 488 if (input.eof ()) break; 489 Integer newState = dg.getNode (state, new Character (input.next ())); 490 if (newState != null) 491 state = newState; 492 else 493 state = dg.getNode (state, STAR); 494 if (state != null) input.read (); 495 } 496 input.setIndex (lastIndex); 497 return lastState; 498 } 499 500 public String toString () { 501 return dg.toString (); 502 } 503 504 508 512 513 public Object read (CharInput input) { 514 if (input.eof ()) return null; 515 int originalIndex = input.getIndex (); 516 int lastIndex = -1; 517 TokenType lastTT = null; 518 Integer node = dg.getStartNode (); 519 while (!input.eof ()) { 520 Character edge = new Character (input.next ()); 521 Integer nnode = dg.getNode (node, edge); 522 if (nnode == null) { 523 edge = Pattern.STAR; 524 nnode = dg.getNode (node, edge); 525 } 526 527 if (input.getIndex () > originalIndex) { 528 TokenType bestTT = getBestTT (node); 529 if (bestTT != null) { 530 lastTT = bestTT; 531 lastIndex = input.getIndex (); 532 } 533 } 534 535 if (nnode == null || 536 ( dg.getEdges (nnode).isEmpty () && 537 dg.getProperties (nnode).isEmpty () 538 ) 539 ) { 540 if (lastTT == null) { 541 return null; 543 } 544 input.setIndex (lastIndex); 545 return lastTT; 546 } 547 548 input.read (); 549 node = nnode; 550 } 551 552 TokenType bestTT = getBestTT (node); 553 if (bestTT != null) { 554 lastTT = bestTT; 555 lastIndex = input.getIndex (); 556 } 557 558 if (lastTT == null) return null; 559 return lastTT; 560 } 561 562 private TokenType getBestTT (Integer node) { 563 Map tts = dg.getProperties (node); 564 TokenType best = null; 565 Iterator it = tts.keySet ().iterator (); 566 while (it.hasNext ()) { 567 Integer i = (Integer ) it.next (); 568 TokenType tt = (TokenType) tts.get (i); 569 if (best == null || best.getPriority () > tt.getPriority ()) 570 best = tt; 571 } 572 return best; 573 } 574 575 void mark (int priority, V r) { 576 Iterator <Integer > it = dg.getEnds ().iterator (); 577 while (it.hasNext ()) { 578 Integer s = it.next (); 579 dg.setProperty ( 580 s, 581 priority, 582 r 583 ); 584 } 585 } 586 } 587 | Popular Tags |