KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > languages > parser > Pattern


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.modules.languages.parser;
21
22 import java.util.Map JavaDoc;
23 import org.netbeans.api.languages.ParseException;
24 import org.netbeans.api.languages.CharInput;
25 import java.util.Collections JavaDoc;
26 import java.util.HashSet JavaDoc;
27 import java.util.Iterator JavaDoc;
28 import java.util.Set JavaDoc;
29 import org.netbeans.modules.languages.Language.TokenType;
30 import org.netbeans.modules.languages.parser.StringInput;
31
32 public class Pattern <V> {
33     
34     private static final Character JavaDoc STAR = new Character JavaDoc ((char) 0);
35     private static NodeFactory<Integer JavaDoc> nodeFactory = new NodeFactory<Integer JavaDoc> () {
36         private int counter = 1;
37         public Integer JavaDoc createNode () {
38             return Integer.valueOf (counter++);
39         }
40     };
41     
42     public static <V> Pattern<V> create () {
43         return new Pattern<V> ();
44     }
45     
46     public static <V> Pattern<V> create (String JavaDoc input) throws ParseException {
47         if (input.length () == 0) throw new ParseException ();
48         return create (new StringInput (input, ""));
49     }
50     
51     public static <V> Pattern<V> create (CharInput input) throws ParseException {
52         Pattern<V> p = createIn (input);
53         DG<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V> ndg = DGUtils.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>reduce (p.dg, nodeFactory);
54         return new Pattern<V> (ndg);
55     }
56     
57     private static <V> Pattern<V> createIn (CharInput input) throws ParseException {
58         Pattern<V> pattern = new Pattern<V> ();
59         Pattern<V> last = null;
60         char ch = input.next ();
61         while (ch != 0) {
62             switch (ch) {
63                 case ' ':
64                 case '\t':
65                 case '\n':
66                 case '\r':
67                     input.read ();
68                     break;
69                 case '*':
70                     input.read ();
71                     if (last == null) throw new ParseException ();
72                     last = last.star ();
73                     break;
74                 case '?':
75                     input.read ();
76                     if (last == null) throw new ParseException ();
77                     last = last.question ();
78                     break;
79                 case '+':
80                     input.read ();
81                     if (last == null) throw new ParseException ();
82                     last = last.plus ();
83                     break;
84                 case '(':
85                     input.read ();
86                     if (last != null) pattern = pattern.append (last);
87                     last = createIn (input);
88                     if (input.read () != ')')
89                         throw new ParseException (") expected: " + input);
90                     break;
91 // case '<':
92
// input.read ();
93
// if (last != null) pattern = pattern.append (last);
94
// last = new Pattern (readToken (input));
95
// if (input.read () != '>')
96
// throw new ParseException ("> expected: " + input);
97
// break;
98
case '\'':
99                 case '"':
100                     input.read ();
101                     if (last != null) pattern = pattern.append (last);
102                     last = Pattern.create ();
103                     StringBuilder JavaDoc sb = new StringBuilder JavaDoc ();
104                     ch = input.next ();
105                     while (ch != '"' && ch != '\'') {
106                         if (ch == 0)
107                             throw new ParseException (sb.toString ());
108                         if (ch == '\\') {
109                             input.read ();
110                             switch (input.next ()) {
111                                 case '\\':
112                                     input.read ();
113                                     last = last.append (new Pattern<V> (
114                                         new Character JavaDoc ('\\')
115                                     ));
116                                     sb.append ('\\');
117                                     break;
118                                 case 'n':
119                                     input.read ();
120                                     last = last.append (new Pattern<V> (
121                                         new Character JavaDoc ('\n')
122                                     ));
123                                     sb.append ('\n');
124                                     break;
125                                 case 'r':
126                                     input.read ();
127                                     last = last.append (new Pattern<V> (
128                                         new Character JavaDoc ('\r')
129                                     ));
130                                     sb.append ('\r');
131                                     break;
132                                 case 't':
133                                     input.read ();
134                                     last = last.append (new Pattern<V> (
135                                         new Character JavaDoc ('\t')
136                                     ));
137                                     sb.append ('\t');
138                                     break;
139                                 case '"':
140                                     input.read ();
141                                     last = last.append (new Pattern<V> (
142                                         new Character JavaDoc ('"')
143                                     ));
144                                     sb.append ('"');
145                                     break;
146                                 case '\'':
147                                     input.read ();
148                                     last = last.append (new Pattern<V> (
149                                         new Character JavaDoc ('\'')
150                                     ));
151                                     sb.append ('\'');
152                                     break;
153                                 default:
154                                     throw new ParseException ("Unknown character after \\:" + input.toString ());
155                             }
156                         } else {
157                             Character JavaDoc charr = new Character JavaDoc (input.read ());
158                             last = last.append (new Pattern<V> (charr));
159                             sb.append (charr.charValue ());
160                         }
161                         ch = input.next ();
162                     }
163                     input.read ();
164                     break;
165                 case '|':
166                     input.read ();
167                     if (last != null) pattern = pattern.append (last);
168                     last = null;
169                     pattern = pattern.merge (Pattern.<V>createIn (input));
170                     return pattern;
171                 case '-':
172                     if (last != null) pattern = pattern.append (last);
173                     input.read ();
174                     skipWhitespaces (input);
175                     ch = input.next ();
176                     if (ch != '\'' && ch != '"')
177                         throw new ParseException (input.toString ());
178                     input.read ();
179                     ch = input.next ();
180                     if (ch == '\'' || ch == '"')
181                         throw new ParseException (input.toString ());
182                     Character JavaDoc edge = new Character JavaDoc (input.next ());
183                     last = new Pattern<V> (true, Collections.<Character JavaDoc>singleton (edge));
184                     last = last.star ().append (new Pattern<V> (edge));
185                     input.read ();
186                     ch = input.next ();
187                     while (ch != '\'' && ch != '"') {
188                         if (ch == 0)
189                             throw new ParseException (input.toString ());
190                         last = last.plus ();
191                         Integer JavaDoc endN = last.dg.getEnds ().iterator ().next ();
192                         Integer JavaDoc newE = last.nodeFactory.createNode ();
193                         last.dg.addNode (newE);
194                         last.dg.addEdge (endN, newE, new Character JavaDoc (input.next ()));
195                         last.dg.setEnds (Collections.singleton (newE));
196                         input.read ();
197                         ch = input.next ();
198                     }
199                     input.read ();
200                     break;
201                 case ')':
202                     if (last != null) pattern = pattern.append (last);
203                     return pattern;
204                 case '.':
205                     input.read ();
206                     if (last != null) pattern = pattern.append (last);
207                     last = new Pattern<V> (Pattern.STAR);
208                     break;
209                 case '[':
210                     input.read ();
211                     if (last != null) pattern = pattern.append (last);
212                     boolean not = false;
213                     ch = input.next ();
214                     if (ch == '^') {
215                         input.read ();
216                         ch = input.next ();
217                         not = true;
218                     }
219                     Set JavaDoc<Character JavaDoc> set = new HashSet JavaDoc<Character JavaDoc> ();
220                     char l = (char) 0;
221                     boolean minus = false;
222                     ch = input.next ();
223                     while (ch != ']' && ch != 0) {
224                         switch (ch) {
225                             case ' ':
226                             case '\t':
227                             case '\n':
228                             case '\r':
229                                 input.read ();
230                                 break;
231                             case '\'':
232                             case '"':
233                                 char ol = l;
234                                 if (l != 0 && !minus)
235                                     set.add (new Character JavaDoc (l));
236                                 input.read ();
237                                 ch = input.next ();
238                                 if (ch == '\\') {
239                                     input.read ();
240                                     ch = input.next ();
241                                     switch (ch) {
242                                         case 'n':
243                                             l = '\n';
244                                             break;
245                                         case 't':
246                                             l = '\t';
247                                             break;
248                                         case 'r':
249                                             l = '\r';
250                                             break;
251                                         case '\'':
252                                             l = '\'';
253                                             break;
254                                         case '\\':
255                                             l = '\\';
256                                             break;
257                                         case '"':
258                                             l = '"';
259                                             break;
260                                         default:
261                                             throw new ParseException (input.toString ());
262                                     } // switch
263
input.read ();
264                                 } else // if '\\'
265
l = input.read ();
266                                 ch = input.next ();
267                                 if (ch != '"' && ch != '\'')
268                                     throw new ParseException (input.toString ());
269                                 input.read ();
270                                 if (minus) {
271                                     addInterval (set, ol, l);
272                                     l = 0;
273                                 }
274                                 minus = false;
275                                 break; // case '"'
276
case '-':
277                                 input.read ();
278                                 if (l == 0) throw new ParseException (input.toString ());
279                                 minus = true;
280                                 break;
281 // case '<':
282
// input.read ();
283
// if (minus) throw new ParseException (input.toString ());
284
// if (l != 0)
285
// set.add (new Character (l));
286
// set.add (readToken (input));
287
// if (input.read () != '>')
288
// throw new ParseException ("> expected: " + input);
289
// break;
290
default:
291                                 throw new ParseException (input.toString ());
292                         } // switch
293
ch = input.next ();
294                     } // while
295
if (minus) throw new ParseException ();
296                     if (l != 0)
297                         set.add (new Character JavaDoc (l));
298                     input.read ();
299                     last = new Pattern<V> (not, set);
300                     break;
301                 default:
302                     throw new ParseException ("Unexpected char '" + input.next () + ":" + input.toString ());
303 // input.read ();
304
// if (last != null) pattern = pattern.append (last);
305
// last = new Pattern (new Character (ch));
306
} // switch
307
ch = input.next ();
308         } // while
309
if (last != null) pattern = pattern.append (last);
310         return pattern;
311     }
312
313 // private static ASTToken readToken (CharInput input) throws ParseException {
314
// StringBuilder sb = new StringBuilder ();
315
// char ch = input.next ();
316
// while (ch != ',' && ch != '>') {
317
// if (ch == 0) throw new ParseException ("Unexpected end." + input.toString ());
318
// sb.append (ch);
319
// input.read ();
320
// ch = input.next ();
321
// }
322
// ch = input.next ();
323
// String type = sb.toString ().trim ();
324
// if (ch == '>') return ASTToken.create (type, null);
325
// input.read ();
326
// skipWhitespaces (input);
327
// sb = new StringBuilder ();
328
// ch = input.next ();
329
// boolean read = ch != '"' && ch != '\'';
330
// if (!read) {
331
// input.read ();
332
// ch = input.next ();
333
// }
334
// while (ch != '>' && ch != '"' && ch != '\'' && ch != ',') {
335
// if (ch == 0) throw new ParseException ("Unexpected end." + input.toString ());
336
// sb.append (ch);
337
// input.read ();
338
// ch = input.next ();
339
// }
340
// if (read && (ch == '"' || ch == '\'')) throw new ParseException ("Unexpected \":" + input.toString ());
341
// if (!read) input.read ();
342
// String identifier = null;
343
// String name = null;
344
// if (read) name = sb.toString ();
345
// else identifier = sb.toString ();
346
// if (!read && ch == ',') {
347
// ch = input.next ();
348
// sb = new StringBuilder ();
349
// while (ch != '>') {
350
// if (ch == 0) throw new ParseException ("Unexpected end." + input.toString ());
351
// sb.append (ch);
352
// input.read ();
353
// ch = input.next ();
354
// }
355
// name = sb.toString ();
356
// }
357
// return ASTToken.create (type, identifier);
358
// }
359

360     private static Set JavaDoc<Character JavaDoc> whitespace = new HashSet JavaDoc<Character JavaDoc> ();
361     static {
362         whitespace.add (new Character JavaDoc (' '));
363         whitespace.add (new Character JavaDoc ('\n'));
364         whitespace.add (new Character JavaDoc ('\r'));
365         whitespace.add (new Character JavaDoc ('\t'));
366     }
367     
368     private static void skipWhitespaces (CharInput input) {
369         while (whitespace.contains (new Character JavaDoc (input.next ())))
370             input.read ();
371     }
372     
373     private static void addInterval (Set JavaDoc<Character JavaDoc> set, char from, char to)
374     throws ParseException {
375         if (from > to) throw new ParseException ();
376         do {
377             set.add (new Character JavaDoc (from));
378             from++;
379         } while (from <= to);
380     }
381     
382     private DG<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V> dg;// = DG.<Integer,Character,K,V>createDG ();
383

384     private Pattern (DG<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V> dg) {
385         this.dg = dg;
386     }
387     
388     private Pattern () {
389         dg = DG.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>createDG (nodeFactory.createNode ());
390 // Integer start = nodeFactory.createNode ();
391
// dg.addNode (start);
392
// dg.setStart (start);
393
// dg.addEnd (start);
394
}
395
396     private Pattern (Pattern<V> p) {
397         dg = DGUtils.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>cloneDG (p.dg, false, nodeFactory);
398     }
399     
400     private Pattern (Character JavaDoc edge) {
401         Integer JavaDoc start = nodeFactory.createNode ();
402         dg = DG.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>createDG (start);
403         Integer JavaDoc end = nodeFactory.createNode ();
404         dg.addNode (end);
405         dg.addEdge (start, end, edge);
406         dg.setEnds (Collections.<Integer JavaDoc>singleton (end));
407     }
408
409     private Pattern (boolean not, Set JavaDoc<Character JavaDoc> edges) {
410         Integer JavaDoc start = nodeFactory.createNode ();
411         dg = DG.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>createDG (start);
412         Integer JavaDoc end = nodeFactory.createNode ();
413         dg.addNode (end);
414         dg.setStart (start);
415         dg.setEnds (Collections.<Integer JavaDoc>emptySet ());
416         Iterator JavaDoc<Character JavaDoc> it = edges.iterator ();
417         while (it.hasNext ()) {
418             Character JavaDoc edge = it.next ();
419             dg.addEdge (start, end, edge);
420         }
421         if (not) {
422             Integer JavaDoc failedState = nodeFactory.createNode ();
423             dg.addNode (failedState);
424             dg.addEdge (start, failedState, Pattern.STAR);
425             dg.addEnd (failedState);
426         } else
427             dg.addEnd (end);
428     }
429     
430     public Pattern<V> clonePattern () {
431         return new Pattern<V> (this);
432     }
433
434     public Pattern<V> star () {
435         DG<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V> ndg = DGUtils.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>plus (dg, STAR, nodeFactory);
436         ndg = DGUtils.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>merge (DG.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>createDG (nodeFactory.createNode ()), ndg, STAR, nodeFactory);
437         Pattern<V> p = new Pattern<V> (ndg);
438         return p;
439     }
440
441     public Pattern<V> plus () {
442         DG<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V> ndg = DGUtils.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>plus (dg, STAR, nodeFactory);
443         Pattern<V> p = new Pattern<V> (ndg);
444         return p;
445     }
446
447     public Pattern<V> question () {
448         DG<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V> ndg = DGUtils.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>cloneDG (dg, true, nodeFactory);
449         ndg.addEnd (ndg.getStartNode ());
450         Pattern<V> p = new Pattern<V> (ndg);
451         return p;
452     }
453
454     public Pattern<V> merge (Pattern<V> parser) {
455         DG<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V> ndg = DGUtils.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>merge (dg, parser.dg, STAR, nodeFactory);
456         Pattern<V> p = new Pattern<V> (ndg);
457         return p;
458     }
459
460     public Pattern<V> append (Pattern<V> parser) {
461         DG<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V> ndg = DGUtils.<Integer JavaDoc,Character JavaDoc,Integer JavaDoc,V>append (dg, parser.dg, STAR, nodeFactory);
462         Pattern<V> p = new Pattern<V> (ndg);
463         return p;
464     }
465
466     boolean matches (String JavaDoc text) {
467         int i = 0;
468         Integer JavaDoc state = dg.getStartNode ();
469         while (i < text.length ()) {
470             state = dg.getNode (state, new Character JavaDoc (text.charAt (i++)));
471             if (state == null) return false;
472         }
473         return dg.getEnds ().contains (state);
474     }
475
476     public Integer JavaDoc next (CharInput input) {
477         return next (dg.getStartNode (), input);
478     }
479     
480     public Integer JavaDoc next (Integer JavaDoc state, CharInput input) {
481         int lastIndex = input.getIndex ();
482         Integer JavaDoc lastState = null;
483         while (state != null) {
484             if (dg.getEnds ().contains (state)) {
485                 lastState = state;
486                 lastIndex = input.getIndex ();
487             }
488             if (input.eof ()) break;
489             Integer JavaDoc newState = dg.getNode (state, new Character JavaDoc (input.next ()));
490             if (newState != null)
491                 state = newState;
492             else
493                 state = dg.getNode (state, STAR);
494             if (state != null) input.read ();
495         }
496         input.setIndex (lastIndex);
497         return lastState;
498     }
499
500     public String JavaDoc toString () {
501         return dg.toString ();
502     }
503     
504 // public Object getValue (Object state, Object key) {
505
// return dg.getProperty (state, key);
506
// }
507

508 // DG<Integer,Character,K,V> getDG () {
509
// return dg;
510
// }
511

512     
513     public Object JavaDoc read (CharInput input) {
514         if (input.eof ()) return null;
515         int originalIndex = input.getIndex ();
516         int lastIndex = -1;
517         TokenType lastTT = null;
518         Integer JavaDoc node = dg.getStartNode ();
519         while (!input.eof ()) {
520             Character JavaDoc edge = new Character JavaDoc (input.next ());
521             Integer JavaDoc nnode = dg.getNode (node, edge);
522             if (nnode == null) {
523                 edge = Pattern.STAR;
524                 nnode = dg.getNode (node, edge);
525             }
526             
527             if (input.getIndex () > originalIndex) {
528                 TokenType bestTT = getBestTT (node);
529                 if (bestTT != null) {
530                     lastTT = bestTT;
531                     lastIndex = input.getIndex ();
532                 }
533             }
534             
535             if (nnode == null ||
536                 ( dg.getEdges (nnode).isEmpty () &&
537                   dg.getProperties (nnode).isEmpty ()
538                 )
539             ) {
540                 if (lastTT == null) {
541                     // error => reset position in CURRENT pattern (state)
542
return null;
543                 }
544                 input.setIndex (lastIndex);
545                 return lastTT;
546             }
547             
548             input.read ();
549             node = nnode;
550         }
551         
552         TokenType bestTT = getBestTT (node);
553         if (bestTT != null) {
554             lastTT = bestTT;
555             lastIndex = input.getIndex ();
556         }
557         
558         if (lastTT == null) return null;
559         return lastTT;
560     }
561     
562     private TokenType getBestTT (Integer JavaDoc node) {
563         Map JavaDoc tts = dg.getProperties (node);
564         TokenType best = null;
565         Iterator JavaDoc it = tts.keySet ().iterator ();
566         while (it.hasNext ()) {
567             Integer JavaDoc i = (Integer JavaDoc) it.next ();
568             TokenType tt = (TokenType) tts.get (i);
569             if (best == null || best.getPriority () > tt.getPriority ())
570                 best = tt;
571         }
572         return best;
573     }
574     
575     void mark (int priority, V r) {
576         Iterator JavaDoc<Integer JavaDoc> it = dg.getEnds ().iterator ();
577         while (it.hasNext ()) {
578             Integer JavaDoc s = it.next ();
579             dg.setProperty (
580                 s,
581                 priority,
582                 r
583             );
584         }
585     }
586 }
587
Popular Tags