KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > codehaus > groovy > antlr > parser > GroovyLexer


1 // $ANTLR 2.7.2: "groovy.g" -> "GroovyLexer.java"$
2

3 package org.codehaus.groovy.antlr.parser;
4 import org.codehaus.groovy.antlr.*;
5 import java.util.*;
6 import java.io.InputStream JavaDoc;
7 import java.io.Reader JavaDoc;
8 import antlr.InputBuffer;
9 import antlr.LexerSharedInputState;
10
11 import java.io.InputStream JavaDoc;
12 import antlr.TokenStreamException;
13 import antlr.TokenStreamIOException;
14 import antlr.TokenStreamRecognitionException;
15 import antlr.CharStreamException;
16 import antlr.CharStreamIOException;
17 import antlr.ANTLRException;
18 import java.io.Reader JavaDoc;
19 import java.util.Hashtable JavaDoc;
20 import antlr.CharScanner;
21 import antlr.InputBuffer;
22 import antlr.ByteBuffer;
23 import antlr.CharBuffer;
24 import antlr.Token;
25 import antlr.CommonToken;
26 import antlr.RecognitionException;
27 import antlr.NoViableAltForCharException;
28 import antlr.MismatchedCharException;
29 import antlr.TokenStream;
30 import antlr.ANTLRHashString;
31 import antlr.LexerSharedInputState;
32 import antlr.collections.impl.BitSet;
33 import antlr.SemanticException;
34
35 public class GroovyLexer extends antlr.CharScanner implements GroovyTokenTypes, TokenStream
36  {
37
38     /** flag for enabling the "assert" keyword */
39     private boolean assertEnabled = true;
40     /** flag for enabling the "enum" keyword */
41     private boolean enumEnabled = true;
42     /** flag for including whitespace tokens (for IDE preparsing) */
43     private boolean whitespaceIncluded = false;
44
45     /** Enable the "assert" keyword */
46     public void enableAssert(boolean shouldEnable) { assertEnabled = shouldEnable; }
47     /** Query the "assert" keyword state */
48     public boolean isAssertEnabled() { return assertEnabled; }
49     /** Enable the "enum" keyword */
50     public void enableEnum(boolean shouldEnable) { enumEnabled = shouldEnable; }
51     /** Query the "enum" keyword state */
52     public boolean isEnumEnabled() { return enumEnabled; }
53
54     /** Include whitespace tokens. Note that this breaks the parser. */
55     public void setWhitespaceIncluded(boolean z) { whitespaceIncluded = z; }
56     /** Are whitespace tokens included? */
57     public boolean isWhitespaceIncluded() { return whitespaceIncluded; }
58
59     {
60         // Initialization actions performed on construction.
61
setTabSize(1); // get rid of special tab interpretation, for IDEs and general clarity
62
}
63
64     /** Bumped when inside '[x]' or '(x)', reset inside '{x}'. See ONE_NL. */
65     protected int parenLevel = 0;
66     protected int suppressNewline = 0; // be really mean to newlines inside strings
67
protected static final int SCS_TYPE = 3, SCS_VAL = 4, SCS_LIT = 8, SCS_LIMIT = 16;
68     protected static final int SCS_SQ_TYPE = 0, SCS_TQ_TYPE = 1, SCS_RE_TYPE = 2;
69     protected int stringCtorState = 0; // hack string and regexp constructor boundaries
70
/** Push parenLevel here and reset whenever inside '{x}'. */
71     protected ArrayList parenLevelStack = new ArrayList();
72     protected int lastSigTokenType = EOF; // last returned non-whitespace token
73

74     protected void pushParenLevel() {
75         parenLevelStack.add(new Integer JavaDoc(parenLevel*SCS_LIMIT + stringCtorState));
76         parenLevel = 0;
77         stringCtorState = 0;
78     }
79     protected void popParenLevel() {
80         int npl = parenLevelStack.size();
81         if (npl == 0) return;
82         int i = ((Integer JavaDoc) parenLevelStack.remove(--npl)).intValue();
83         parenLevel = i / SCS_LIMIT;
84         stringCtorState = i % SCS_LIMIT;
85     }
86
87     protected void restartStringCtor(boolean expectLiteral) {
88         if (stringCtorState != 0) {
89             stringCtorState = (expectLiteral? SCS_LIT: SCS_VAL) + (stringCtorState & SCS_TYPE);
90         }
91     }
92     
93     protected boolean allowRegexpLiteral() {
94         return !isExpressionEndingToken(lastSigTokenType);
95     }
96
97     /** Return true for an operator or punctuation which can end an expression.
98      * Return true for keywords, identifiers, and literals.
99      * Return true for tokens which can end expressions (right brackets, ++, --).
100      * Return false for EOF and all other operator and punctuation tokens.
101      * Used to suppress the recognition of /foo/ as opposed to the simple division operator '/'.
102      */

103     // Cf. 'constant' and 'balancedBrackets' rules in the grammar.)
104
protected static boolean isExpressionEndingToken(int ttype) {
105         switch (ttype) {
106         case INC: // x++ / y
107
case DEC: // x-- / y
108
case RPAREN: // (x) / y
109
case RBRACK: // f[x] / y
110
case RCURLY: // f{x} / y
111
case STRING_LITERAL: // "x" / y
112
case STRING_CTOR_END: // "$x" / y
113
case NUM_INT: // 0 / y
114
case NUM_FLOAT: // 0f / y
115
case NUM_LONG: // 0l / y
116
case NUM_DOUBLE: // 0.0 / y
117
case NUM_BIG_INT: // 0g / y
118
case NUM_BIG_DECIMAL: // 0.0g / y
119
case IDENT: // x / y
120
// and a bunch of keywords (all of them; no sense picking and choosing):
121
case LITERAL_any:
122         case LITERAL_as:
123         case LITERAL_assert:
124         case LITERAL_boolean:
125         case LITERAL_break:
126         case LITERAL_byte:
127         case LITERAL_case:
128         case LITERAL_catch:
129         case LITERAL_char:
130         case LITERAL_class:
131         case LITERAL_continue:
132         case LITERAL_def:
133         case LITERAL_default:
134         case LITERAL_double:
135         case LITERAL_else:
136         case LITERAL_enum:
137         case LITERAL_extends:
138         case LITERAL_false:
139         case LITERAL_finally:
140         case LITERAL_float:
141         case LITERAL_for:
142         case LITERAL_if:
143         case LITERAL_implements:
144         case LITERAL_import:
145         case LITERAL_in:
146         case LITERAL_instanceof:
147         case LITERAL_int:
148         case LITERAL_interface:
149         case LITERAL_long:
150         case LITERAL_native:
151         case LITERAL_new:
152         case LITERAL_null:
153         case LITERAL_package:
154         case LITERAL_private:
155         case LITERAL_protected:
156         case LITERAL_public:
157         case LITERAL_return:
158         case LITERAL_short:
159         case LITERAL_static:
160         case LITERAL_super:
161         case LITERAL_switch:
162         case LITERAL_synchronized:
163         case LITERAL_this:
164         case LITERAL_threadsafe:
165         case LITERAL_throw:
166         case LITERAL_throws:
167         case LITERAL_transient:
168         case LITERAL_true:
169         case LITERAL_try:
170         case LITERAL_void:
171         case LITERAL_volatile:
172         case LITERAL_while:
173         case LITERAL_with:
174             return true;
175         default:
176             return false;
177         }
178     }
179
180     protected void newlineCheck(boolean check) throws RecognitionException {
181         if (check && suppressNewline > 0) {
182             require(suppressNewline == 0,
183                 "end of line reached within a simple string 'x' or \"x\" or /x/",
184                 "for multi-line literals, use triple quotes '''x''' or \"\"\"x\"\"\"");
185             suppressNewline = 0; // shut down any flood of errors
186
}
187         newline();
188     }
189     
190     protected boolean atValidDollarEscape() throws CharStreamException {
191         // '$' (('*')? ('{' | LETTER)) =>
192
int k = 1;
193         char lc = LA(k++);
194         if (lc != '$') return false;
195         lc = LA(k++);
196         if (lc == '*') lc = LA(k++);
197         return (lc == '{' || (lc != '$' && Character.isJavaIdentifierStart(lc)));
198     }
199
200     /** This is a bit of plumbing which resumes collection of string constructor bodies,
201      * after an embedded expression has been parsed.
202      * Usage: new GroovyRecognizer(new GroovyLexer(in).plumb()).
203      */

204     public TokenStream plumb() {
205         return new TokenStream() {
206             public Token nextToken() throws TokenStreamException {
207                 if (stringCtorState >= SCS_LIT) {
208                     // This goo is modeled upon the ANTLR code for nextToken:
209
int quoteType = (stringCtorState & SCS_TYPE);
210                     stringCtorState = 0; // get out of this mode, now
211
resetText();
212                     try {
213                         switch (quoteType) {
214                         case SCS_SQ_TYPE:
215                             mSTRING_CTOR_END(true, /*fromStart:*/false, false); break;
216                         case SCS_TQ_TYPE:
217                             mSTRING_CTOR_END(true, /*fromStart:*/false, true); break;
218                         case SCS_RE_TYPE:
219                             mREGEXP_CTOR_END(true, /*fromStart:*/false); break;
220                         default: assert(false);
221                         }
222                         lastSigTokenType = _returnToken.getType();
223                         return _returnToken;
224                     } catch (RecognitionException e) {
225                         throw new TokenStreamRecognitionException(e);
226                     } catch (CharStreamException cse) {
227                         if ( cse instanceof CharStreamIOException ) {
228                             throw new TokenStreamIOException(((CharStreamIOException)cse).io);
229                         }
230                         else {
231                             throw new TokenStreamException(cse.getMessage());
232                         }
233                     }
234                 }
235                 Token token = GroovyLexer.this.nextToken();
236                 int lasttype = token.getType();
237                 if (whitespaceIncluded) {
238                     switch (lasttype) { // filter out insignificant types
239
case WS:
240                     case ONE_NL:
241                     case SL_COMMENT:
242                     case ML_COMMENT:
243                         lasttype = lastSigTokenType; // back up!
244
}
245                 }
246                 lastSigTokenType = lasttype;
247                 return token;
248             }
249         };
250     }
251
252         // stuff to adjust ANTLR's tracing machinery
253
public static boolean tracing = false; // only effective if antlr.Tool is run with -traceLexer
254
public void traceIn(String JavaDoc rname) throws CharStreamException {
255         if (!GroovyLexer.tracing) return;
256         super.traceIn(rname);
257     }
258     public void traceOut(String JavaDoc rname) throws CharStreamException {
259         if (!GroovyLexer.tracing) return;
260         if (_returnToken != null) rname += tokenStringOf(_returnToken);
261         super.traceOut(rname);
262     }
263     private static java.util.HashMap JavaDoc ttypes;
264     private static String JavaDoc tokenStringOf(Token t) {
265         if (ttypes == null) {
266             java.util.HashMap JavaDoc map = new java.util.HashMap JavaDoc();
267             java.lang.reflect.Field JavaDoc[] fields = GroovyTokenTypes.class.getDeclaredFields();
268             for (int i = 0; i < fields.length; i++) {
269                 if (fields[i].getType() != int.class) continue;
270                 try {
271                     map.put(fields[i].get(null), fields[i].getName());
272                 } catch (IllegalAccessException JavaDoc ee) {
273                 }
274             }
275             ttypes = map;
276         }
277         Integer JavaDoc tt = new Integer JavaDoc(t.getType());
278         Object JavaDoc ttn = ttypes.get(tt);
279         if (ttn == null) ttn = "<"+tt+">";
280         return "["+ttn+",\""+t.getText()+"\"]";
281     }
282
283     protected GroovyRecognizer parser; // little-used link; TODO: get rid of
284
private void require(boolean z, String JavaDoc problem, String JavaDoc solution) throws SemanticException {
285         // TODO: Direct to a common error handler, rather than through the parser.
286
if (!z) parser.requireFailed(problem, solution);
287     }
288 public GroovyLexer(InputStream JavaDoc in) {
289     this(new ByteBuffer(in));
290 }
291 public GroovyLexer(Reader JavaDoc in) {
292     this(new CharBuffer(in));
293 }
294 public GroovyLexer(InputBuffer ib) {
295     this(new LexerSharedInputState(ib));
296 }
297 public GroovyLexer(LexerSharedInputState state) {
298     super(state);
299     caseSensitiveLiterals = true;
300     setCaseSensitive(true);
301     literals = new Hashtable JavaDoc();
302     literals.put(new ANTLRHashString("byte", this), new Integer JavaDoc(101));
303     literals.put(new ANTLRHashString("public", this), new Integer JavaDoc(112));
304     literals.put(new ANTLRHashString("case", this), new Integer JavaDoc(150));
305     literals.put(new ANTLRHashString("short", this), new Integer JavaDoc(103));
306     literals.put(new ANTLRHashString("break", this), new Integer JavaDoc(144));
307     literals.put(new ANTLRHashString("while", this), new Integer JavaDoc(138));
308     literals.put(new ANTLRHashString("new", this), new Integer JavaDoc(192));
309     literals.put(new ANTLRHashString("instanceof", this), new Integer JavaDoc(178));
310     literals.put(new ANTLRHashString("implements", this), new Integer JavaDoc(127));
311     literals.put(new ANTLRHashString("synchronized", this), new Integer JavaDoc(117));
312     literals.put(new ANTLRHashString("const", this), new Integer JavaDoc(40));
313     literals.put(new ANTLRHashString("float", this), new Integer JavaDoc(105));
314     literals.put(new ANTLRHashString("package", this), new Integer JavaDoc(78));
315     literals.put(new ANTLRHashString("return", this), new Integer JavaDoc(143));
316     literals.put(new ANTLRHashString("throw", this), new Integer JavaDoc(146));
317     literals.put(new ANTLRHashString("null", this), new Integer JavaDoc(195));
318     literals.put(new ANTLRHashString("def", this), new Integer JavaDoc(81));
319     literals.put(new ANTLRHashString("threadsafe", this), new Integer JavaDoc(116));
320     literals.put(new ANTLRHashString("protected", this), new Integer JavaDoc(113));
321     literals.put(new ANTLRHashString("class", this), new Integer JavaDoc(88));
322     literals.put(new ANTLRHashString("throws", this), new Integer JavaDoc(130));
323     literals.put(new ANTLRHashString("do", this), new Integer JavaDoc(41));
324     literals.put(new ANTLRHashString("strictfp", this), new Integer JavaDoc(42));
325     literals.put(new ANTLRHashString("super", this), new Integer JavaDoc(93));
326     literals.put(new ANTLRHashString("with", this), new Integer JavaDoc(139));
327     literals.put(new ANTLRHashString("transient", this), new Integer JavaDoc(114));
328     literals.put(new ANTLRHashString("native", this), new Integer JavaDoc(115));
329     literals.put(new ANTLRHashString("interface", this), new Integer JavaDoc(89));
330     literals.put(new ANTLRHashString("final", this), new Integer JavaDoc(37));
331     literals.put(new ANTLRHashString("any", this), new Integer JavaDoc(108));
332     literals.put(new ANTLRHashString("if", this), new Integer JavaDoc(136));
333     literals.put(new ANTLRHashString("double", this), new Integer JavaDoc(107));
334     literals.put(new ANTLRHashString("volatile", this), new Integer JavaDoc(118));
335     literals.put(new ANTLRHashString("as", this), new Integer JavaDoc(110));
336     literals.put(new ANTLRHashString("assert", this), new Integer JavaDoc(147));
337     literals.put(new ANTLRHashString("catch", this), new Integer JavaDoc(153));
338     literals.put(new ANTLRHashString("try", this), new Integer JavaDoc(151));
339     literals.put(new ANTLRHashString("goto", this), new Integer JavaDoc(39));
340     literals.put(new ANTLRHashString("enum", this), new Integer JavaDoc(90));
341     literals.put(new ANTLRHashString("int", this), new Integer JavaDoc(104));
342     literals.put(new ANTLRHashString("for", this), new Integer JavaDoc(141));
343     literals.put(new ANTLRHashString("extends", this), new Integer JavaDoc(92));
344     literals.put(new ANTLRHashString("boolean", this), new Integer JavaDoc(100));
345     literals.put(new ANTLRHashString("char", this), new Integer JavaDoc(102));
346     literals.put(new ANTLRHashString("private", this), new Integer JavaDoc(111));
347     literals.put(new ANTLRHashString("default", this), new Integer JavaDoc(126));
348     literals.put(new ANTLRHashString("false", this), new Integer JavaDoc(194));
349     literals.put(new ANTLRHashString("this", this), new Integer JavaDoc(128));
350     literals.put(new ANTLRHashString("static", this), new Integer JavaDoc(80));
351     literals.put(new ANTLRHashString("abstract", this), new Integer JavaDoc(38));
352     literals.put(new ANTLRHashString("continue", this), new Integer JavaDoc(145));
353     literals.put(new ANTLRHashString("finally", this), new Integer JavaDoc(152));
354     literals.put(new ANTLRHashString("else", this), new Integer JavaDoc(137));
355     literals.put(new ANTLRHashString("import", this), new Integer JavaDoc(79));
356     literals.put(new ANTLRHashString("in", this), new Integer JavaDoc(142));
357     literals.put(new ANTLRHashString("void", this), new Integer JavaDoc(99));
358     literals.put(new ANTLRHashString("switch", this), new Integer JavaDoc(140));
359     literals.put(new ANTLRHashString("true", this), new Integer JavaDoc(193));
360     literals.put(new ANTLRHashString("long", this), new Integer JavaDoc(106));
361 }
362
363 public Token nextToken() throws TokenStreamException {
364     Token theRetToken=null;
365 tryAgain:
366     for (;;) {
367         Token _token = null;
368         int _ttype = Token.INVALID_TYPE;
369         resetText();
370         try { // for char stream error handling
371
try { // for lexical error handling
372
switch ( LA(1)) {
373                 case '(':
374                 {
375                     mLPAREN(true);
376                     theRetToken=_returnToken;
377                     break;
378                 }
379                 case ')':
380                 {
381                     mRPAREN(true);
382                     theRetToken=_returnToken;
383                     break;
384                 }
385                 case '[':
386                 {
387                     mLBRACK(true);
388                     theRetToken=_returnToken;
389                     break;
390                 }
391                 case ']':
392                 {
393                     mRBRACK(true);
394                     theRetToken=_returnToken;
395                     break;
396                 }
397                 case '{':
398                 {
399                     mLCURLY(true);
400                     theRetToken=_returnToken;
401                     break;
402                 }
403                 case '}':
404                 {
405                     mRCURLY(true);
406                     theRetToken=_returnToken;
407                     break;
408                 }
409                 case ':':
410                 {
411                     mCOLON(true);
412                     theRetToken=_returnToken;
413                     break;
414                 }
415                 case ',':
416                 {
417                     mCOMMA(true);
418                     theRetToken=_returnToken;
419                     break;
420                 }
421                 case '~':
422                 {
423                     mBNOT(true);
424                     theRetToken=_returnToken;
425                     break;
426                 }
427                 case ';':
428                 {
429                     mSEMI(true);
430                     theRetToken=_returnToken;
431                     break;
432                 }
433                 case '$':
434                 {
435                     mDOLLAR(true);
436                     theRetToken=_returnToken;
437                     break;
438                 }
439                 case '\t': case '\u000c': case ' ': case '\\':
440                 {
441                     mWS(true);
442                     theRetToken=_returnToken;
443                     break;
444                 }
445                 case '\n': case '\r':
446                 {
447                     mNLS(true);
448                     theRetToken=_returnToken;
449                     break;
450                 }
451                 case '"': case '\'':
452                 {
453                     mSTRING_LITERAL(true);
454                     theRetToken=_returnToken;
455                     break;
456                 }
457                 case '0': case '1': case '2': case '3':
458                 case '4': case '5': case '6': case '7':
459                 case '8': case '9':
460                 {
461                     mNUM_INT(true);
462                     theRetToken=_returnToken;
463                     break;
464                 }
465                 case '@':
466                 {
467                     mAT(true);
468                     theRetToken=_returnToken;
469                     break;
470                 }
471                 default:
472                     if ((LA(1)=='>') && (LA(2)=='>') && (LA(3)=='>') && (LA(4)=='=')) {
473                         mBSR_ASSIGN(true);
474                         theRetToken=_returnToken;
475                     }
476                     else if ((LA(1)=='<') && (LA(2)=='=') && (LA(3)=='>')) {
477                         mCOMPARE_TO(true);
478                         theRetToken=_returnToken;
479                     }
480                     else if ((LA(1)=='>') && (LA(2)=='>') && (LA(3)=='=')) {
481                         mSR_ASSIGN(true);
482                         theRetToken=_returnToken;
483                     }
484                     else if ((LA(1)=='>') && (LA(2)=='>') && (LA(3)=='>') && (true)) {
485                         mBSR(true);
486                         theRetToken=_returnToken;
487                     }
488                     else if ((LA(1)=='<') && (LA(2)=='<') && (LA(3)=='=')) {
489                         mSL_ASSIGN(true);
490                         theRetToken=_returnToken;
491                     }
492                     else if ((LA(1)=='.') && (LA(2)=='.') && (LA(3)=='<')) {
493                         mRANGE_EXCLUSIVE(true);
494                         theRetToken=_returnToken;
495                     }
496                     else if ((LA(1)=='.') && (LA(2)=='.') && (LA(3)=='.')) {
497                         mTRIPLE_DOT(true);
498                         theRetToken=_returnToken;
499                     }
500                     else if ((LA(1)=='=') && (LA(2)=='=') && (LA(3)=='~')) {
501                         mREGEX_MATCH(true);
502                         theRetToken=_returnToken;
503                     }
504                     else if ((LA(1)=='*') && (LA(2)=='*') && (LA(3)=='=')) {
505                         mSTAR_STAR_ASSIGN(true);
506                         theRetToken=_returnToken;
507                     }
508                     else if ((LA(1)=='=') && (LA(2)=='=') && (true)) {
509                         mEQUAL(true);
510                         theRetToken=_returnToken;
511                     }
512                     else if ((LA(1)=='!') && (LA(2)=='=')) {
513                         mNOT_EQUAL(true);
514                         theRetToken=_returnToken;
515                     }
516                     else if ((LA(1)=='+') && (LA(2)=='=')) {
517                         mPLUS_ASSIGN(true);
518                         theRetToken=_returnToken;
519                     }
520                     else if ((LA(1)=='+') && (LA(2)=='+')) {
521                         mINC(true);
522                         theRetToken=_returnToken;
523                     }
524                     else if ((LA(1)=='-') && (LA(2)=='=')) {
525                         mMINUS_ASSIGN(true);
526                         theRetToken=_returnToken;
527                     }
528                     else if ((LA(1)=='-') && (LA(2)=='-')) {
529                         mDEC(true);
530                         theRetToken=_returnToken;
531                     }
532                     else if ((LA(1)=='*') && (LA(2)=='=')) {
533                         mSTAR_ASSIGN(true);
534                         theRetToken=_returnToken;
535                     }
536                     else if ((LA(1)=='%') && (LA(2)=='=')) {
537                         mMOD_ASSIGN(true);
538                         theRetToken=_returnToken;
539                     }
540                     else if ((LA(1)=='>') && (LA(2)=='>') && (true)) {
541                         mSR(true);
542                         theRetToken=_returnToken;
543                     }
544                     else if ((LA(1)=='>') && (LA(2)=='=')) {
545                         mGE(true);
546                         theRetToken=_returnToken;
547                     }
548                     else if ((LA(1)=='<') && (LA(2)=='<') && (true)) {
549                         mSL(true);
550                         theRetToken=_returnToken;
551                     }
552                     else if ((LA(1)=='<') && (LA(2)=='=') && (true)) {
553                         mLE(true);
554                         theRetToken=_returnToken;
555                     }
556                     else if ((LA(1)=='^') && (LA(2)=='=')) {
557                         mBXOR_ASSIGN(true);
558                         theRetToken=_returnToken;
559                     }
560                     else if ((LA(1)=='|') && (LA(2)=='=')) {
561                         mBOR_ASSIGN(true);
562                         theRetToken=_returnToken;
563                     }
564                     else if ((LA(1)=='|') && (LA(2)=='|')) {
565                         mLOR(true);
566