KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > el > lexer > ELLexer


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.modules.el.lexer;
21
22 import java.util.logging.Level JavaDoc;
23 import java.util.logging.Logger JavaDoc;
24 import org.netbeans.api.lexer.Token;
25 import org.netbeans.modules.el.lexer.api.ELTokenId;
26 import org.netbeans.spi.lexer.Lexer;
27 import org.netbeans.spi.lexer.LexerInput;
28 import org.netbeans.spi.lexer.LexerRestartInfo;
29 import org.netbeans.spi.lexer.TokenFactory;
30
31 /**
32  * Lexical analyzer for Expression Language.
33  * It does NOT recognizes the EL delimiters ${ } and #{ }
34  *
35  * @author Petr Pisl
36  * @author Marek Fukala
37  *
38  * @version 1.00
39  */

40
41 public class ELLexer implements Lexer<ELTokenId> {
42     
43     private static final Logger JavaDoc LOGGER = Logger.getLogger(ELLexer.class.getName());
44     private static final boolean LOG = Boolean.getBoolean("j2ee_lexer_debug"); //NOI18N
45

46     private static final int EOF = LexerInput.EOF;
47     
48     private final LexerInput input;
49     
50     private final TokenFactory<ELTokenId> tokenFactory;
51     
52     public Object JavaDoc state() {
53         return lexerState;
54     }
55     
56     /** Internal state of the lexical analyzer before entering subanalyzer of
57      * character references. It is initially set to INIT, but before first usage,
58      * this will be overwritten with state, which originated transition to
59      * charref subanalyzer.
60      */

61     private int lexerState = INIT;
62     
63     
64     /* Internal states used internally by analyzer. There
65      * can be any number of them declared by the analyzer.
66      */

67     private static final int INIT = 1; //initial lexer state
68
private static final int ISI_IDENTIFIER = 2;
69     private static final int ISI_CHAR = 3; // inside char constant
70
private static final int ISI_CHAR_A_BSLASH = 4; // inside char constant after backslash
71
private static final int ISI_STRING = 5; // inside a string " ... "
72
private static final int ISI_STRING_A_BSLASH = 6; // inside string "..." constant after backslash
73
private static final int ISI_CHAR_STRING = 7; // inside a string '...'
74
private static final int ISI_CHAR_STRING_A_BSLASH = 8; // inside string '...'contant after backslash
75
private static final int ISA_ZERO = 9; // after '0'
76
private static final int ISI_INT = 10; // integer number
77
private static final int ISI_OCTAL = 11; // octal number
78
private static final int ISI_DOUBLE = 12; // double number
79
private static final int ISI_DOUBLE_EXP = 13; // double number
80
private static final int ISI_HEX = 14; // hex number
81
private static final int ISA_DOT = 15; // after '.'
82
private static final int ISI_WHITESPACE = 16; // inside white space
83
private static final int ISA_EQ = 17; // after '='
84
private static final int ISA_GT = 18; // after '>'
85
private static final int ISA_LT = 19; // after '<'
86
//private static final int ISA_PLUS = 20; // after '+'
87
//private static final int ISA_MINUS = 21; // after '-'
88
//private static final int ISA_STAR = 22; // after '*'
89
private static final int ISA_PIPE = 23; // after '|'
90
private static final int ISA_AND = 24; // after '&'
91
private static final int ISA_EXCLAMATION = 25; // after '!'
92
private static final int ISI_BRACKET = 26; // after '['
93
private static final int ISI_BRACKET_A_WHITESPACE = 27;
94     private static final int ISI_BRACKET_A_IDENTIFIER = 28;
95     private static final int ISI_BRACKET_ISA_EQ = 29;
96     private static final int ISI_BRACKET_ISA_GT = 30;
97     private static final int ISI_BRACKET_ISA_LT =31;
98     private static final int ISI_BRACKET_ISA_PIPE = 32; // after '|'
99
private static final int ISI_BRACKET_ISA_AND = 33; // after '&'
100
private static final int ISI_BRACKET_ISA_ZERO = 34; // after '0'
101
private static final int ISI_BRACKET_ISA_DOT = 35; // after '.'
102
private static final int ISI_BRACKET_ISI_INT = 36; // after '.'
103
private static final int ISI_BRACKET_ISI_OCTAL = 37; // octal number
104
private static final int ISI_BRACKET_ISI_DOUBLE = 38; // double number
105
private static final int ISI_BRACKET_ISI_DOUBLE_EXP = 39; // double number
106
private static final int ISI_BRACKET_ISI_HEX = 40; // hex number
107
private static final int ISI_DOULE_EXP_ISA_SIGN = 41;
108     private static final int ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN = 42;
109     //private static final int ISA_PERCENT = 24; // after '%'
110

111     
112     public ELLexer(LexerRestartInfo<ELTokenId> info) {
113         this.input = info.input();
114         this.tokenFactory = info.tokenFactory();
115         if (info.state() == null) {
116             this.lexerState = INIT;
117         } else {
118             this.lexerState = ((Integer JavaDoc) info.state()).intValue();
119         }
120     }
121     
122     
123     /** This is core function of analyzer and it returns one of following numbers:
124      * a) token number of next token from scanned text
125      * b) EOL when end of line was found in scanned buffer
126      * c) EOT when there is no more chars available in scanned buffer.
127      *
128      * The function scans the active character and does one or more
129      * of the following actions:
130      * 1. change internal analyzer state (state = new-state)
131      * 2. return token ID (return token-ID)
132      * 3. adjust current position to signal different end of token;
133      * the character that offset points to is not included in the token
134      */

135     public Token<ELTokenId> nextToken() {
136         
137         int actChar;
138         
139         while (true) {
140             actChar = input.read();
141             
142             if (actChar == EOF) {
143                 if(input.readLengthEOF() == 1) {
144                     return null; //just EOL is read
145
} else {
146                     //there is something else in the buffer except EOL
147
//we will return last token now
148
input.backup(1); //backup the EOL, we will return null in next nextToken() call
149
break;
150                 }
151             }
152             
153             switch (lexerState) { // switch by the current internal state
154
case INIT:
155                     
156                     switch (actChar) {
157                         case '"':
158                             lexerState = ISI_STRING;
159                             break;
160                         case '\'':
161                             lexerState = ISI_CHAR;
162                             break;
163                         case '/':
164                             return token(ELTokenId.DIV);
165                         case '=':
166                             lexerState = ISA_EQ;
167                             break;
168                         case '>':
169                             lexerState = ISA_GT;
170                             break;
171                         case '<':
172                             lexerState = ISA_LT;
173                             break;
174                         case '+':
175                             return token(ELTokenId.PLUS);
176                         case '-':
177                             return token(ELTokenId.MINUS);
178                         case '*':
179                             return token(ELTokenId.MUL);
180                         case '|':
181                             lexerState = ISA_PIPE;
182                             break;
183                         case '&':
184                             lexerState = ISA_AND;
185                             break;
186                         case '[':
187                             return token(ELTokenId.LBRACKET);
188                         case ']':
189                             return token(ELTokenId.RBRACKET);
190                         case '%':
191                             return token(ELTokenId.MOD);
192                         case ':':
193                             return token(ELTokenId.COLON);
194                         case '!':
195                             lexerState = ISA_EXCLAMATION;
196                             break;
197                         case '(':
198                             return token(ELTokenId.LPAREN);
199                         case ')':
200                             return token(ELTokenId.RPAREN);
201                         case ',':
202                             return token(ELTokenId.COMMA);
203                         case '?':
204                             return token(ELTokenId.QUESTION);
205                         case '\n':
206                             return token(ELTokenId.EOL);
207                         case '0':
208                             lexerState = ISA_ZERO;
209                             break;
210                         case '.':
211                             lexerState = ISA_DOT;
212                             break;
213                         default:
214                             // Check for whitespace
215
if (Character.isWhitespace(actChar)) {
216                                 lexerState = ISI_WHITESPACE;
217                                 break;
218                             }
219                             
220                             // check whether it can be identifier
221
if (Character.isJavaIdentifierStart(actChar)){
222                                 lexerState = ISI_IDENTIFIER;
223                                 break;
224                             }
225                             // Check for digit
226
if (Character.isDigit(actChar)) {
227                                 lexerState = ISI_INT;
228                                 break;
229                             }
230                             return token(ELTokenId.INVALID_CHAR);
231                             //break;
232
}
233                     break;
234                     
235                     
236                 case ISI_WHITESPACE: // white space
237
if (!Character.isWhitespace(actChar)) {
238                         lexerState = INIT;
239                         input.backup(1);
240                         return token(ELTokenId.WHITESPACE);
241                     }
242                     break;
243                     
244                 case ISI_BRACKET:
245                     switch (actChar){
246                         case ']':
247                             lexerState = INIT;
248                             input.backup(1);
249                             return token(ELTokenId.IDENTIFIER);
250                         case '"':
251                             return token(ELTokenId.LBRACKET);
252                         case '\'':
253                             return token(ELTokenId.LBRACKET);
254                         case '/':
255                             return token(ELTokenId.DIV);
256                         case '+':
257                             return token(ELTokenId.PLUS);
258                         case '-':
259                             return token(ELTokenId.MINUS);
260                         case '*':
261                             return token(ELTokenId.MUL);
262                         case '[':
263                             return token(ELTokenId.LBRACKET);
264                         case '%':
265                             return token(ELTokenId.MOD);
266                         case ':':
267                             return token(ELTokenId.COLON);
268                         case '(':
269                             return token(ELTokenId.LPAREN);
270                         case ')':
271                             return token(ELTokenId.RPAREN);
272                         case ',':
273                             return token(ELTokenId.COMMA);
274                         case '?':
275                             return token(ELTokenId.QUESTION);
276                         case '=':
277                             lexerState = ISI_BRACKET_ISA_EQ;
278                             break;
279                         case '>':
280                             lexerState = ISI_BRACKET_ISA_GT;
281                             break;
282                         case '<':
283                             lexerState = ISI_BRACKET_ISA_LT;
284                             break;
285                         case '|':
286                             lexerState = ISI_BRACKET_ISA_PIPE;
287                             break;
288                         case '&':
289                             lexerState = ISI_BRACKET_ISA_AND;
290                             break;
291                         case '0':
292                             lexerState = ISI_BRACKET_ISA_ZERO;
293                             break;
294                         case '.':
295                             lexerState = ISI_BRACKET_ISA_DOT;
296                             break;
297                         default :
298                             // Check for whitespace
299
if (Character.isWhitespace(actChar)) {
300                                 lexerState = ISI_BRACKET_A_WHITESPACE;
301                                 break;
302                             }
303                             if (Character.isJavaIdentifierStart(actChar)){
304                                 // - System.out.print(" state->ISI_IDENTIFIER ");
305
lexerState = ISI_BRACKET_A_IDENTIFIER;
306                                 break;
307                             }
308                             // Check for digit
309
if (Character.isDigit(actChar)) {
310                                 lexerState = ISI_BRACKET_ISI_INT;
311                                 break;
312                             }
313                             return token(ELTokenId.INVALID_CHAR);
314                             //break;
315
}
316                     break;
317                     
318                 case ISI_BRACKET_A_WHITESPACE:
319                     if (!Character.isWhitespace(actChar)) {
320                         lexerState = ISI_BRACKET;
321                         input.backup(1);
322                         return token(ELTokenId.WHITESPACE);
323                     }
324                     break;
325                     
326                 case ISI_BRACKET_ISA_EQ:
327                 case ISA_EQ:
328                     switch (actChar) {
329                         case '=':
330                             lexerState = INIT;
331                             return token(ELTokenId.EQ_EQ);
332                         default:
333                             lexerState = (lexerState == ISI_BRACKET_ISA_EQ) ? ISI_BRACKET : INIT;
334                             input.backup(1);
335                             break;
336                     }
337                     break;
338                     
339                 case ISI_BRACKET_ISA_GT:
340                 case ISA_GT:
341                     switch (actChar) {
342                         case '=':
343                             lexerState = INIT;
344                             return token(ELTokenId.GT_EQ);
345                         default:
346                             lexerState = (lexerState == ISI_BRACKET_ISA_GT) ? ISI_BRACKET : INIT;
347                             input.backup(1);
348                             return token(ELTokenId.GT);
349                     }
350                     //break;
351
case ISI_BRACKET_ISA_LT:
352                 case ISA_LT:
353                     switch (actChar) {
354                         case '=':
355                             lexerState = INIT;
356                             return token(ELTokenId.LT_EQ);
357                         default:
358                             lexerState = (lexerState == ISI_BRACKET_ISA_LT) ? ISI_BRACKET : INIT;
359                             input.backup(1);
360                             return token(ELTokenId.LT);
361                     }
362                     //break;
363
case ISI_BRACKET_ISA_PIPE:
364                 case ISA_PIPE:
365                     switch (actChar) {
366                         case '|':
367                             lexerState = INIT;
368                             return token(ELTokenId.OR_OR);
369                         default:
370                             lexerState = (lexerState == ISI_BRACKET_ISA_PIPE) ? ISI_BRACKET : INIT;
371                             input.backup(1);
372                             break;
373                     }
374                     break;
375                 case ISI_BRACKET_ISA_AND:
376                 case ISA_AND:
377                     switch (actChar) {
378                         case '&':
379                             lexerState = INIT;
380                             return token(ELTokenId.AND_AND);
381                         default:
382                             lexerState = (lexerState == ISI_BRACKET_ISA_AND) ? ISI_BRACKET : INIT;
383                             input.backup(1);
384                             break;
385                     }
386                     break;
387                 case ISA_EXCLAMATION:
388                     switch (actChar) {
389                         case '=':
390                             lexerState = INIT;
391                             return token(ELTokenId.NOT_EQ);
392                         default:
393                             lexerState = INIT;
394                             input.backup(1);
395                             return token(ELTokenId.NOT);
396                     }
397                 case ISI_STRING:
398                     switch (actChar) {
399                         case '\\':
400                             lexerState = ISI_STRING_A_BSLASH;
401                             break;
402                         case '\n':
403                             lexerState = INIT;
404                             input.backup(1);
405                             return token(ELTokenId.STRING_LITERAL);
406                         case '"': // NOI18N
407
lexerState = INIT;
408                             return token(ELTokenId.STRING_LITERAL);
409                     }
410                     break;
411                 case ISI_STRING_A_BSLASH:
412                     lexerState = ISI_STRING;
413                     break;
414                 case ISI_BRACKET_A_IDENTIFIER:
415                 case ISI_IDENTIFIER:
416                     if (!(Character.isJavaIdentifierPart(actChar))){
417                         switch (lexerState){
418                             case ISI_IDENTIFIER:
419                                 lexerState = INIT; break;
420                             case ISI_BRACKET_A_IDENTIFIER:
421                                 lexerState = ISI_BRACKET;
422                                 break;
423                         }
424                         Token<ELTokenId> tid = matchKeyword(input);
425                         input.backup(1);
426                         if (tid == null){
427                             if (actChar == ':'){
428                                 tid = token(ELTokenId.TAG_LIB_PREFIX);
429                             } else{
430                                 tid = token(ELTokenId.IDENTIFIER);
431                             }
432                         }
433                         return tid;
434                     }
435                     break;
436                     
437                 case ISI_CHAR:
438                     switch (actChar) {
439                         case '\\':
440                             lexerState = ISI_CHAR_A_BSLASH;
441                             break;
442                         case '\n':
443                             lexerState = INIT;
444                             input.backup(1);
445                             return token(ELTokenId.CHAR_LITERAL);
446                         case '\'':
447                             lexerState = INIT;
448                             return token(ELTokenId.CHAR_LITERAL);
449                         default :
450                             char prevChar = input.readText().charAt(input.readLength() - 1);
451                             if (prevChar != '\'' && prevChar != '\\'){
452                                 lexerState = ISI_CHAR_STRING;
453                             }
454                     }
455                     break;
456                     
457                 case ISI_CHAR_A_BSLASH:
458                     switch (actChar) {
459                         case '\'':
460                         case '\\':
461                             break;
462                         default:
463                             input.backup(1);
464                             break;
465                     }
466                     lexerState = ISI_CHAR;
467                     break;
468                     
469                 case ISI_CHAR_STRING:
470                     // - System.out.print(" ISI_CHAR_STRING (");
471
switch (actChar) {
472                         case '\\':
473                             // - System.out.print(" state->ISI_CHAR_A_BSLASH )");
474
lexerState = ISI_CHAR_STRING_A_BSLASH;
475                             break;
476                         case '\n':
477                             lexerState = INIT;
478                             input.backup(1);
479                             return token(ELTokenId.STRING_LITERAL);
480                         case '\'':
481                             lexerState = INIT;
482                             return token(ELTokenId.STRING_LITERAL);
483                     }
484                     // - System.out.print(")");
485
break;
486                     
487                 case ISI_CHAR_STRING_A_BSLASH:
488                     switch (actChar) {
489                         case '\'':
490                         case '\\':
491                             break;
492                         default:
493                             input.backup(1);
494                             break;
495                     }
496                     lexerState = ISI_CHAR_STRING;
497                     break;
498                     
499                 case ISI_BRACKET_ISA_ZERO:
500                 case ISA_ZERO:
501                     switch (actChar) {
502                         case '.':
503                             lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE : ISI_DOUBLE;
504                             break;
505                         case 'x':
506                         case 'X':
507                             lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_HEX : ISI_HEX;
508                             break;
509                         case 'l':
510                         case 'L':
511                             lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
512                             return token(ELTokenId.LONG_LITERAL);
513                         case 'f':
514                         case 'F':
515                             lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
516                             return token(ELTokenId.FLOAT_LITERAL);
517                         case 'd':
518                         case 'D':
519                             lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
520                             return token(ELTokenId.DOUBLE_LITERAL);
521                         case '8': // it's error to have '8' and '9' in octal number
522
case '9':
523                             lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
524                             return token(ELTokenId.INVALID_OCTAL_LITERAL);
525                         case 'e':
526                         case 'E':
527                             lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_DOUBLE_EXP : ISI_DOUBLE_EXP;
528                             break;
529                         default:
530                             if (Character.isDigit(actChar)) { // '8' and '9' already handled
531
lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET_ISI_OCTAL : ISI_OCTAL;
532                                 break;
533                             }
534                             lexerState = (lexerState == ISI_BRACKET_ISA_ZERO) ? ISI_BRACKET : INIT;
535                             input.backup(1);
536                             return token(ELTokenId.INT_LITERAL);
537                     }
538                     break;
539                     
540                 case ISI_BRACKET_ISI_INT:
541                 case ISI_INT:
542                     switch (actChar) {
543                         case 'l':
544                         case 'L':
545                             lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
546                             return token(ELTokenId.LONG_LITERAL);
547                         case '.':
548                             lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET_ISI_DOUBLE : ISI_DOUBLE;
549                             break;
550                         case 'f':
551                         case 'F':
552                             lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
553                             return token(ELTokenId.FLOAT_LITERAL);
554                         case 'd':
555                         case 'D':
556                             lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
557                             return token(ELTokenId.DOUBLE_LITERAL);
558                         case 'e':
559                         case 'E':
560                             lexerState = ISI_DOUBLE_EXP;
561                             break;
562                         default:
563                             if (!(actChar >= '0' && actChar <= '9')) {
564                                 lexerState = (lexerState == ISI_BRACKET_ISI_INT) ? ISI_BRACKET : INIT;
565                                 input.backup(1);
566                                 return token(ELTokenId.INT_LITERAL);
567                             }
568                     }
569                     break;
570                     
571                 case ISI_BRACKET_ISI_OCTAL:
572                 case ISI_OCTAL:
573                     if (!(actChar >= '0' && actChar <= '7')) {
574                         lexerState = (lexerState == ISI_BRACKET_ISI_OCTAL) ? ISI_BRACKET : INIT;
575                         input.backup(1);
576                         return token(ELTokenId.OCTAL_LITERAL);
577                     }
578                     break;
579                     
580                 case ISI_BRACKET_ISI_DOUBLE:
581                 case ISI_DOUBLE:
582                     switch (actChar) {
583                         case 'f':
584                         case 'F':
585                             lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET : INIT;
586                             return token(ELTokenId.FLOAT_LITERAL);
587                         case 'd':
588                         case 'D':
589                             lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET : INIT;
590                             return token(ELTokenId.DOUBLE_LITERAL);
591                         case 'e':
592                         case 'E':
593                             lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET_ISI_DOUBLE_EXP : ISI_DOUBLE_EXP;
594                             break;
595                         default:
596                             if (!((actChar >= '0' && actChar <= '9')
597                             || actChar == '.')) {
598                                 lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE) ? ISI_BRACKET : INIT;
599                                 input.backup(1);
600                                 return token(ELTokenId.DOUBLE_LITERAL);
601                             }
602                     }
603                     break;
604                     
605                 case ISI_DOUBLE_EXP:
606                 case ISI_BRACKET_ISI_DOUBLE_EXP:
607                     switch (actChar) {
608                         case 'f':
609                         case 'F':
610                             lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET : INIT;
611                             return token(ELTokenId.FLOAT_LITERAL);
612                         case 'd':
613                         case 'D':
614                             lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET : INIT;
615                             return token(ELTokenId.DOUBLE_LITERAL);
616                         case '-':
617                         case '+':
618                             lexerState = ISI_DOULE_EXP_ISA_SIGN;
619                             break;
620                         default:
621                             if (!Character.isDigit(actChar)){
622                                 //|| ch == '-' || ch == '+')) {
623
lexerState = (lexerState == ISI_BRACKET_ISI_DOUBLE_EXP) ? ISI_BRACKET : INIT;
624                                 input.backup(1);
625                                 return token(ELTokenId.DOUBLE_LITERAL);
626                             }
627                     }
628                     break;
629                     
630                 case ISI_DOULE_EXP_ISA_SIGN:
631                 case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
632                     if (!Character.isDigit(actChar)){
633                         lexerState = (lexerState == ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN) ? ISI_BRACKET : INIT;
634                         input.backup(1);
635                         return token(ELTokenId.DOUBLE_LITERAL);
636                     }
637                     break;
638                     
639                 case ISI_BRACKET_ISI_HEX:
640                 case ISI_HEX:
641                     if (!((actChar >= 'a' && actChar <= 'f')
642                     || (actChar >= 'A' && actChar <= 'F')
643                     || Character.isDigit(actChar))
644                     ) {
645                         lexerState = (lexerState == ISI_BRACKET_ISI_HEX) ? ISI_BRACKET : INIT;
646                         input.backup(1);
647                         return token(ELTokenId.HEX_LITERAL);
648                     }
649                     break;
650                     
651                 case ISI_BRACKET_ISA_DOT:
652                 case ISA_DOT:
653                     if (Character.isDigit(actChar)) {
654                         lexerState = (lexerState == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET_ISI_DOUBLE : ISI_DOUBLE;
655                         
656                     } else { // only single dot
657
lexerState = (lexerState == ISI_BRACKET_ISA_DOT) ? ISI_BRACKET : INIT;
658                         input.backup(1);
659                         return token(ELTokenId.DOT);
660                     }
661                     break;
662                     
663             } // end of switch(state)
664

665         } //end of big while
666

667         /** At this stage there's no more text in the scanned buffer.
668          * Scanner first checks whether this is completely the last
669          * available buffer.
670          */

671         switch (lexerState) {
672             case INIT:
673                 if (input.readLength() == 0) {
674                     return null;
675                 }
676                 break;
677             case ISI_WHITESPACE:
678                 lexerState = INIT;
679                 return token(ELTokenId.WHITESPACE);
680             case ISI_IDENTIFIER:
681                 lexerState = INIT;
682                 Token<ELTokenId> kwd = matchKeyword(input);
683                 return (kwd != null) ? kwd : token(ELTokenId.IDENTIFIER);
684             case ISI_STRING:
685             case ISI_STRING_A_BSLASH:
686                 return token(ELTokenId.STRING_LITERAL); // hold the state
687
case ISI_CHAR:
688             case ISI_CHAR_A_BSLASH:
689                 return token(ELTokenId.CHAR_LITERAL);
690             case ISI_CHAR_STRING :
691             case ISI_CHAR_STRING_A_BSLASH :
692                 return token(ELTokenId.STRING_LITERAL);
693             case ISA_ZERO:
694             case ISI_INT:
695                 lexerState = INIT;
696                 return token(ELTokenId.INT_LITERAL);
697             case ISI_OCTAL:
698                 lexerState = INIT;
699                 return token(ELTokenId.OCTAL_LITERAL);
700             case ISI_DOUBLE:
701             case ISI_DOUBLE_EXP:
702             case ISI_DOULE_EXP_ISA_SIGN:
703             case ISI_BRACKET_ISI_DOULE_EXP_ISA_SIGN:
704                 lexerState = INIT;
705                 return token(ELTokenId.DOUBLE_LITERAL);
706             case ISI_HEX:
707                 lexerState = INIT;
708                 return token(ELTokenId.HEX_LITERAL);
709             case ISA_DOT:
710                 lexerState = INIT;
711                 return token(ELTokenId.DOT);
712             case ISA_EQ:
713                 lexerState = INIT;
714                 return token(ELTokenId.EQ_EQ);
715             case ISA_GT:
716                 lexerState = INIT;
717                 return token(ELTokenId.GT);
718             case ISA_LT:
719                 lexerState = INIT;
720                 return token(ELTokenId.LT);
721             case ISA_PIPE:
722                 lexerState = INIT;
723                 return token(ELTokenId.OR_OR);
724             case ISA_AND:
725                 lexerState = INIT;
726                 return token(ELTokenId.AND_AND);
727             case ISA_EXCLAMATION:
728                 lexerState = INIT;
729                 return token(ELTokenId.NOT);
730             case ISI_BRACKET:
731             case ISI_BRACKET_A_IDENTIFIER:
732                 lexerState = INIT;
733                 return token(ELTokenId.IDENTIFIER);
734             case ISI_BRACKET_A_WHITESPACE:
735                 lexerState = ISI_BRACKET;
736                 return token(ELTokenId.WHITESPACE);
737             case ISI_BRACKET_ISA_EQ:
738                 lexerState = ISI_BRACKET;
739                 return token(ELTokenId.EQ_EQ);
740             case ISI_BRACKET_ISA_GT:
741                 lexerState = ISI_BRACKET;
742                 return token(ELTokenId.GT_EQ);
743             case ISI_BRACKET_ISA_LT:
744                 lexerState = ISI_BRACKET;
745                 return token(ELTokenId.LT_EQ);
746             case ISI_BRACKET_ISA_AND:
747                 lexerState = ISI_BRACKET;
748                 return token(ELTokenId.AND_AND);
749             case ISI_BRACKET_ISA_PIPE:
750                 lexerState = ISI_BRACKET;
751                 return token(ELTokenId.OR_OR);
752             case ISI_BRACKET_ISA_DOT:
753                 lexerState = ISI_BRACKET;
754                 return token(ELTokenId.DOT);
755             case ISI_BRACKET_ISA_ZERO:
756             case ISI_BRACKET_ISI_INT:
757                 lexerState = ISI_BRACKET;
758                 return token(ELTokenId.INT_LITERAL);
759         }
760         
761         
762         return null;
763     }
764     
765     
766     public Token<ELTokenId> matchKeyword(LexerInput lexerInput) {
767         int len = lexerInput.readLength();
768         char[] buffer = new char[len];
769         String JavaDoc read = lexerInput.readText().toString();
770         read.getChars(0, read.length(), buffer, 0);
771         int offset = 0;
772         
773         if (len > 10)
774             return null;
775         if (len <= 1)
776             return null;
777         switch (buffer[offset++]) {
778             case 'a':
779                 if (len <= 2) return null;
780                 return (len == 3
781                         && buffer[offset++] == 'n'
782                         && buffer[offset++] == 'd')
783                         ? token(ELTokenId.AND_KEYWORD) : null;
784             case 'd':
785                 if (len <= 2) return null;
786                 return (len == 3
787                         && buffer[offset++] == 'i'
788                         && buffer[offset++] == 'v')
789                         ? token(ELTokenId.DIV_KEYWORD) : null;
790             case 'e':
791                 switch (buffer[offset++]) {
792                     case 'q':
793                         return (len == 2) ? token(ELTokenId.EQ_KEYWORD) : null;
794                     case 'm':
795                         return (len == 5
796                                 && buffer[offset++] == 'p'
797                                 && buffer[offset++] == 't'
798                                 && buffer[offset++] == 'y')
799                                 ? token(ELTokenId.EMPTY_KEYWORD) : null;
800                     default:
801                         return null;
802                 }
803             case 'f':
804                 return (len == 5
805                         && buffer[offset++] == 'a'
806                         && buffer[offset++] == 'l'
807                         && buffer[offset++] == 's'
808                         && buffer[offset++] == 'e')
809                         ? token(ELTokenId.FALSE_KEYWORD) : null;
810             case 'g':
811                 switch (buffer[offset++]){
812                     case 'e':
813                         return (len == 2) ? token(ELTokenId.GE_KEYWORD) : null;
814                     case 't':
815                         return (len == 2) ? token(ELTokenId.GT_KEYWORD) : null;
816                     default:
817                         return null;
818                 }
819             case 'l':
820                 switch (buffer[offset++]){
821                     case 'e':
822                         return (len == 2) ? token(ELTokenId.LE_KEYWORD) : null;
823                     case 't':
824                         return (len == 2) ? token(ELTokenId.LT_KEYWORD) : null;
825                     default:
826                         return null;
827                 }
828             case 'i':
829                 if (len <= 9) return null;
830                 return (len == 10
831                         && buffer[offset++] == 'n'
832                         && buffer[offset++] == 's'
833                         && buffer[offset++] == 't'
834                         && buffer[offset++] == 'a'
835                         && buffer[offset++] == 'n'
836                         && buffer[offset++] == 'c'
837                         && buffer[offset++] == 'e'
838                         && buffer[offset++] == 'o'
839                         && buffer[offset++] == 'f')
840                         ? token(ELTokenId.INSTANCEOF_KEYWORD) : null;
841             case 'm':
842                 if (len <= 2) return null;
843                 return (len == 3
844                         && buffer[offset++] == 'o'
845                         && buffer[offset++] == 'd')
846                         ? token(ELTokenId.MOD_KEYWORD) : null;
847             case 'n':
848                 switch (buffer[offset++]){
849                     case 'e':
850                         return (len == 2) ? token(ELTokenId.NE_KEYWORD) : null;
851                     case 'o':
852                         return (len == 3
853                                 && buffer[offset++] == 't')
854                                 ? token(ELTokenId.NOT_KEYWORD) : null;
855                     case 'u':
856                         return (len == 4
857                                 && buffer[offset++] == 'l'
858                                 && buffer[offset++] == 'l')
859                                 ? token(ELTokenId.NULL_KEYWORD) : null;
860                     default:
861                         return null;
862                 }
863             case 'o':
864                 return (len == 2
865                         && buffer[offset++] == 'r')
866                         ? token(ELTokenId.OR_KEYWORD) : null;
867             case 't':
868                 return (len == 4
869                         && buffer[offset++] == 'r'
870                         && buffer[offset++] == 'u'
871                         && buffer[offset++] == 'e')
872                         ? token(ELTokenId.TRUE_KEYWORD) : null;
873                 
874             default :
875                 return null;
876         }
877     }
878     
879     private Token<ELTokenId> token(ELTokenId tokenId) {
880         if(LOG) {
881             if(input.readLength() == 0) {
882                 LOGGER.log(Level.INFO, "[" + this.getClass().getSimpleName() + "] Found zero length token: "); //NOI18N
883
}
884             LOGGER.log(Level.INFO, "[" + this.getClass().getSimpleName() + "] token ('" + input.readText().toString() + "'; id=" + tokenId + ")\n"); //NOI18N
885
}
886         return tokenFactory.createToken(tokenId);
887     }
888     
889     public void release() {
890     }
891
892 }
893
Popular Tags