KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > lib > java > lexer > JavaLexer


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.lib.java.lexer;
21
22 import org.netbeans.api.java.lexer.JavaTokenId;
23 import org.netbeans.api.lexer.PartType;
24 import org.netbeans.api.lexer.Token;
25 import org.netbeans.spi.lexer.Lexer;
26 import org.netbeans.spi.lexer.LexerInput;
27 import org.netbeans.spi.lexer.LexerRestartInfo;
28 import org.netbeans.spi.lexer.TokenFactory;
29
30 /**
31  * Lexical analyzer for java language.
32  * <br/>
33  * It recognizes "version" attribute and expects <code>java.lang.Integer</code>
34  * value for it. The default value is Integer.valueOf(5). The lexer changes
35  * its behavior in the following way:
36  * <ul>
37  * <li> Integer.valueOf(4) - "assert" recognized as keyword (not identifier)
38  * <li> Integer.valueOf(5) - "enum" recognized as keyword (not identifier)
39  * </ul>
40  *
41  * @author Miloslav Metelka
42  * @version 1.00
43  */

44
45 public class JavaLexer implements Lexer<JavaTokenId> {
46     
47     private static final int EOF = LexerInput.EOF;
48
49     private final LexerInput input;
50     
51     private final TokenFactory<JavaTokenId> tokenFactory;
52     
53     private final int version;
54
55     public JavaLexer(LexerRestartInfo<JavaTokenId> info) {
56         this.input = info.input();
57         this.tokenFactory = info.tokenFactory();
58         assert (info.state() == null); // never set to non-null value in state()
59

60         Integer JavaDoc ver = (Integer JavaDoc)info.getAttributeValue("version");
61         this.version = (ver != null) ? ver.intValue() : 5; // Use Java 1.5 by default
62
}
63     
64     public Object JavaDoc state() {
65         return null; // always in default state after token recognition
66
}
67     
68     public Token<JavaTokenId> nextToken() {
69         while(true) {
70             int c = input.read();
71             switch (c) {
72                 case '"': // string literal
73
while (true)
74                         switch (input.read()) {
75                             case '"': // NOI18N
76
return token(JavaTokenId.STRING_LITERAL);
77                             case '\\':
78                                 input.read();
79                                 break;
80                             case '\r': input.consumeNewline();
81                             case '\n':
82                             case EOF:
83                                 return tokenFactory.createToken(JavaTokenId.STRING_LITERAL,
84                                         input.readLength(), PartType.START);
85                         }
86
87                 case '\'': // char literal
88
while (true)
89                         switch (input.read()) {
90                             case '\'': // NOI18N
91
return token(JavaTokenId.CHAR_LITERAL);
92                             case '\\':
93                                 input.read(); // read escaped char
94
break;
95                             case '\r': input.consumeNewline();
96                             case '\n':
97                             case EOF:
98                                 return tokenFactory.createToken(JavaTokenId.CHAR_LITERAL,
99                                         input.readLength(), PartType.START);
100                         }
101
102                 case '/':
103                     switch (input.read()) {
104                         case '/': // in single-line comment
105
while (true)
106                                 switch (input.read()) {
107                                     case '\r': input.consumeNewline();
108                                     case '\n':
109                                     case EOF:
110                                         return token(JavaTokenId.LINE_COMMENT);
111                                 }
112                         case '=': // found /=
113
return token(JavaTokenId.SLASHEQ);
114                         case '*': // in multi-line or javadoc comment
115
c = input.read();
116                             if (c == '*') { // either javadoc comment or empty multi-line comment /**/
117
c = input.read();
118                                     if (c == '/')
119                                         return token(JavaTokenId.BLOCK_COMMENT);
120                                     while (true) { // in javadoc comment
121
while (c == '*') {
122                                             c = input.read();
123                                             if (c == '/')
124                                                 return token(JavaTokenId.JAVADOC_COMMENT);
125                                             else if (c == EOF)
126                                                 return tokenFactory.createToken(JavaTokenId.JAVADOC_COMMENT,
127                                                         input.readLength(), PartType.START);
128                                         }
129                                         if (c == EOF)
130                                             return tokenFactory.createToken(JavaTokenId.JAVADOC_COMMENT,
131                                                         input.readLength(), PartType.START);
132                                         c = input.read();
133                                     }
134
135                             } else { // in multi-line comment (and not after '*')
136
while (true) {
137                                     c = input.read();
138                                     while (c == '*') {
139                                         c = input.read();
140                                         if (c == '/')
141                                             return token(JavaTokenId.BLOCK_COMMENT);
142                                         else if (c == EOF)
143                                             return tokenFactory.createToken(JavaTokenId.BLOCK_COMMENT,
144                                                     input.readLength(), PartType.START);
145                                     }
146                                     if (c == EOF)
147                                         return tokenFactory.createToken(JavaTokenId.BLOCK_COMMENT,
148                                                 input.readLength(), PartType.START);
149                                 }
150                             }
151                     } // end of switch()
152
input.backup(1);
153                     return token(JavaTokenId.SLASH);
154
155                 case '=':
156                     if (input.read() == '=')
157                         return token(JavaTokenId.EQEQ);
158                     input.backup(1);
159                     return token(JavaTokenId.EQ);
160
161                 case '>':
162                     switch (input.read()) {
163                         case '>': // after >>
164
switch (c = input.read()) {
165                                 case '>': // after >>>
166
if (input.read() == '=')
167                                         return token(JavaTokenId.GTGTGTEQ);
168                                     input.backup(1);
169                                     return token(JavaTokenId.GTGTGT);
170                                 case '=': // >>=
171
return token(JavaTokenId.GTGTEQ);
172                             }
173                             input.backup(1);
174                             return token(JavaTokenId.GTGT);
175                         case '=': // >=
176
return token(JavaTokenId.GTEQ);
177                     }
178                     input.backup(1);
179                     return token(JavaTokenId.GT);
180
181                 case '<':
182                     switch (input.read()) {
183                         case '<': // after <<
184
if (input.read() == '=')
185                                 return token(JavaTokenId.LTLTEQ);
186                             input.backup(1);
187                             return token(JavaTokenId.LTLT);
188                         case '=': // <=
189
return token(JavaTokenId.LTEQ);
190                     }
191                     input.backup(1);
192                     return token(JavaTokenId.LT);
193
194                 case '+':
195                     switch (input.read()) {
196                         case '+':
197                             return token(JavaTokenId.PLUSPLUS);
198                         case '=':
199                             return token(JavaTokenId.PLUSEQ);
200                     }
201                     input.backup(1);
202                     return token(JavaTokenId.PLUS);
203
204                 case '-':
205                     switch (input.read()) {
206                         case '-':
207                             return token(JavaTokenId.MINUSMINUS);
208                         case '=':
209                             return token(JavaTokenId.MINUSEQ);
210                     }
211                     input.backup(1);
212                     return token(JavaTokenId.MINUS);
213
214                 case '*':
215                     switch (input.read()) {
216                         case '/': // invalid comment end - */
217
return token(JavaTokenId.INVALID_COMMENT_END);
218                         case '=':
219                             return token(JavaTokenId.STAREQ);
220                     }
221                     input.backup(1);
222                     return token(JavaTokenId.STAR);
223
224                 case '|':
225                     switch (input.read()) {
226                         case '|':
227                             return token(JavaTokenId.BARBAR);
228                         case '=':
229                             return token(JavaTokenId.BAREQ);
230                     }
231                     input.backup(1);
232                     return token(JavaTokenId.BAR);
233
234                 case '&':
235                     switch (input.read()) {
236                         case '&':
237                             return token(JavaTokenId.AMPAMP);
238                         case '=':
239                             return token(JavaTokenId.AMPEQ);
240                     }
241                     input.backup(1);
242                     return token(JavaTokenId.AMP);
243
244                 case '%':
245                     if (input.read() == '=')
246                         return token(JavaTokenId.PERCENTEQ);
247                     input.backup(1);
248                     return token(JavaTokenId.PERCENT);
249
250                 case '^':
251                     if (input.read() == '=')
252                         return token(JavaTokenId.CARETEQ);
253                     input.backup(1);
254                     return token(JavaTokenId.CARET);
255
256                 case '!':
257                     if (input.read() == '!')
258                         return token(JavaTokenId.BANGEQ);
259                     input.backup(1);
260                     return token(JavaTokenId.BANG);
261
262                 case '.':
263                     if ((c = input.read()) == '.')
264                         if (input.read() == '.') { // ellipsis ...
265
return token(JavaTokenId.ELLIPSIS);
266                         } else
267                             input.backup(2);
268                     else if ('0' <= c && c <= '9') { // float literal
269
return finishNumberLiteral(input.read(), true);
270                     } else
271                         input.backup(1);
272                     return token(JavaTokenId.DOT);
273
274                 case '~':
275                     return token(JavaTokenId.TILDE);
276                 case ',':
277                     return token(JavaTokenId.COMMA);
278                 case ';':
279                     return token(JavaTokenId.SEMICOLON);
280                 case ':':
281                     return token(JavaTokenId.COLON);
282                 case '?':
283                     return token(JavaTokenId.QUESTION);
284                 case '(':
285                     return token(JavaTokenId.LPAREN);
286                 case ')':
287                     return token(JavaTokenId.RPAREN);
288                 case '[':
289                     return token(JavaTokenId.LBRACKET);
290                 case ']':
291                     return token(JavaTokenId.RBRACKET);
292                 case '{':
293                     return token(JavaTokenId.LBRACE);
294                 case '}':
295                     return token(JavaTokenId.RBRACE);
296                 case '@':
297                     return token(JavaTokenId.AT);
298
299                 case '0': // in a number literal
300
c = input.read();
301                     if (c == 'x' || c == 'X') { // in hexadecimal (possibly floating-point) literal
302
boolean inFraction = false;
303                         while (true) {
304                             switch (input.read()) {
305                                 case '0': case '1': case '2': case '3': case '4':
306                                 case '5': case '6': case '7': case '8': case '9':
307                                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
308                                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
309                                     break;
310                                 case '.': // hex float literal
311
if (!inFraction) {
312                                         inFraction = true;
313                                     } else { // two dots in the float literal
314
return token(JavaTokenId.FLOAT_LITERAL_INVALID);
315                                     }
316                                     break;
317                                 case 'p': case 'P': // binary exponent
318
return finishFloatExponent();
319                                 default:
320                                     input.backup(1);
321                                     // if float then before mandatory binary exponent => invalid
322
return token(inFraction ? JavaTokenId.FLOAT_LITERAL_INVALID
323                                             : JavaTokenId.INT_LITERAL);
324                             }
325                         } // end of while(true)
326
}
327                     return finishNumberLiteral(c, false);
328                     
329                 case '1': case '2': case '3': case '4':
330                 case '5': case '6': case '7': case '8': case '9':
331                     return finishNumberLiteral(input.read(), false);
332
333                     
334                 // Keywords lexing
335
case 'a':
336                     switch (c = input.read()) {
337                         case 'b':
338                             if ((c = input.read()) == 's'
339                              && (c = input.read()) == 't'
340                              && (c = input.read()) == 'r'
341                              && (c = input.read()) == 'a'
342                              && (c = input.read()) == 'c'
343                              && (c = input.read()) == 't')
344                                 return keywordOrIdentifier(JavaTokenId.ABSTRACT);
345                             break;
346                         case 's':
347                             if ((c = input.read()) == 's'
348                              && (c = input.read()) == 'e'
349                              && (c = input.read()) == 'r'
350                              && (c = input.read()) == 't')
351                                 return (version >= 4)
352                                         ? keywordOrIdentifier(JavaTokenId.ASSERT)
353                                         : finishIdentifier();
354                             break;
355                     }
356                     return finishIdentifier(c);
357
358                 case 'b':
359                     switch (c = input.read()) {
360                         case 'o':
361                             if ((c = input.read()) == 'o'
362                              && (c = input.read()) == 'l'
363                              && (c = input.read()) == 'e'
364                              && (c = input.read()) == 'a'
365                              && (c = input.read()) == 'n')
366                                 return keywordOrIdentifier(JavaTokenId.BOOLEAN);
367                             break;
368                         case 'r':
369                             if ((c = input.read()) == 'e'
370                              && (c = input.read()) == 'a'
371                              && (c = input.read()) == 'k')
372                                 return keywordOrIdentifier(JavaTokenId.BREAK);
373                             break;
374                         case 'y':
375                             if ((c = input.read()) == 't'
376                              && (c = input.read()) == 'e')
377                                 return keywordOrIdentifier(JavaTokenId.BYTE);
378                             break;
379                     }
380                     return finishIdentifier(c);
381
382                 case 'c':
383                     switch (c = input.read()) {
384                         case 'a':
385                             switch (c = input.read()) {
386                                 case 's':
387                                     if ((c = input.read()) == 'e')
388                                         return keywordOrIdentifier(JavaTokenId.CASE);
389                                     break;
390                                 case 't':
391                                     if ((c = input.read()) == 'c'
392                                      && (c = input.read()) == 'h')
393                                         return keywordOrIdentifier(JavaTokenId.CATCH);
394                                     break;
395                             }
396                             break;
397                         case 'h':
398                             if ((c = input.read()) == 'a'
399                              && (c = input.read()) == 'r')
400                                 return keywordOrIdentifier(JavaTokenId.CHAR);
401                             break;
402                         case 'l':
403                             if ((c = input.read()) == 'a'
404                              && (c = input.read()) == 's'
405                              && (c = input.read()) == 's')
406                                 return keywordOrIdentifier(JavaTokenId.CLASS);
407                             break;
408                         case 'o':
409                             if ((c = input.read()) == 'n') {
410                                 switch (c = input.read()) {
411                                     case 's':
412                                         if ((c = input.read()) == 't')
413                                             return keywordOrIdentifier(JavaTokenId.CONST);
414                                         break;
415                                     case 't':
416                                         if ((c = input.read()) == 'i'
417                                          && (c = input.read()) == 'n'
418                                          && (c = input.read()) == 'u'
419                                          && (c = input.read()) == 'e')
420                                             return keywordOrIdentifier(JavaTokenId.CONTINUE);
421                                         break;
422                                 }
423                             }
424                             break;
425                     }
426                     return finishIdentifier(c);
427
428                 case 'd':
429                     switch (c = input.read()) {
430                         case 'e':
431                             if ((c = input.read()) == 'f'
432                              && (c = input.read()) == 'a'
433                              && (c = input.read()) == 'u'
434                              && (c = input.read()) == 'l'
435                              && (c = input.read()) == 't')
436                                 return keywordOrIdentifier(JavaTokenId.DEFAULT);
437                             break;
438                         case 'o':
439                             switch (c = input.read()) {
440                                 case 'u':
441                                     if ((c = input.read()) == 'b'
442                                      && (c = input.read()) == 'l'
443                                      && (c = input.read()) == 'e')
444                                         return keywordOrIdentifier(JavaTokenId.DOUBLE);
445                                     break;
446                                 default:
447                                     return keywordOrIdentifier(JavaTokenId.DO, c);
448                             }
449                             break;
450                     }
451                     return finishIdentifier(c);
452
453                 case 'e':
454                     switch (c = input.read()) {
455                         case 'l':
456                             if ((c = input.read()) == 's'
457                              && (c = input.read()) == 'e')
458                                 return keywordOrIdentifier(JavaTokenId.ELSE);
459                             break;
460                         case 'n':
461                             if ((c = input.read()) == 'u'
462                              && (c = input.read()) == 'm')
463                                 return (version >= 5)
464                                         ? keywordOrIdentifier(JavaTokenId.ENUM)
465                                         : finishIdentifier();
466                             break;
467                         case 'x':
468                             if ((c = input.read()) == 't'
469                              && (c = input.read()) == 'e'
470                              && (c = input.read()) == 'n'
471                              && (c = input.read()) == 'd'
472                              && (c = input.read()) == 's')
473                                 return keywordOrIdentifier(JavaTokenId.EXTENDS);
474                             break;
475                     }
476                     return finishIdentifier(c);
477
478                 case 'f':
479                     switch (c = input.read()) {
480                         case 'a':
481                             if ((c = input.read()) == 'l'
482                              && (c = input.read()) == 's'
483                              && (c = input.read()) == 'e')
484                                 return keywordOrIdentifier(JavaTokenId.FALSE);
485                             break;
486                         case 'i':
487                             if ((c = input.read()) == 'n'
488                              && (c = input.read()) == 'a'
489                              && (c = input.read()) == 'l')
490                                 switch (c = input.read()) {
491                                     case 'l':
492                                         if ((c = input.read()) == 'y')
493                                             return keywordOrIdentifier(JavaTokenId.FINALLY);
494                                         break;
495                                     default:
496                                         return keywordOrIdentifier(JavaTokenId.FINAL, c);
497                                 }
498                             break;
499                         case 'l':
500                             if ((c = input.read()) == 'o'
501                              && (c = input.read()) == 'a'
502                              && (c = input.read()) == 't')
503                                 return keywordOrIdentifier(JavaTokenId.FLOAT);
504                             break;
505                         case 'o':
506                             if ((c = input.read()) == 'r')
507                                 return keywordOrIdentifier(JavaTokenId.FOR);
508                             break;
509                     }
510                     return finishIdentifier(c);
511
512                 case 'g':
513                     if ((c = input.read()) == 'o'
514                      && (c = input.read()) == 't'
515                      && (c = input.read()) == 'o')
516                         return keywordOrIdentifier(JavaTokenId.GOTO);
517                     return finishIdentifier(c);
518                     
519                 case 'i':
520                     switch (c = input.read()) {
521                         case 'f':
522                             return keywordOrIdentifier(JavaTokenId.IF);
523                         case 'm':
524                             if ((c = input.read()) == 'p') {
525                                 switch (c = input.read()) {
526                                     case 'l':
527                                         if ((c = input.read()) == 'e'
528                                          && (c = input.read()) == 'm'
529                                          && (c = input.read()) == 'e'
530                                          && (c = input.read()) == 'n'
531                                          && (c = input.read()) == 't'
532                                          && (c = input.read()) == 's')
533                                             return keywordOrIdentifier(JavaTokenId.IMPLEMENTS);
534                                         break;
535                                     case 'o':
536                                         if ((c = input.read()) == 'r'
537                                          && (c = input.read()) == 't')
538                                             return keywordOrIdentifier(JavaTokenId.IMPORT);
539                                         break;
540                                 }
541                             }
542                             break;
543                         case 'n':
544                             switch (c = input.read()) {
545                                 case 's':
546                                     if ((c = input.read()) == 't'
547                                      && (c = input.read()) == 'a'
548                                      && (c = input.read()) == 'n'
549                                      && (c = input.read()) == 'c'
550                                      && (c = input.read()) == 'e'
551                                      && (c = input.read()) == 'o'
552                                      && (c = input.read()) == 'f')
553                                         return keywordOrIdentifier(JavaTokenId.INSTANCEOF);
554                                     break;
555                                 case 't':
556                                     switch (c = input.read()) {
557                                         case 'e':
558                                             if ((c = input.read()) == 'r'
559                                              && (c = input.read()) == 'f'
560                                              && (c = input.read()) == 'a'
561                                              && (c = input.read()) == 'c'
562                                              && (c = input.read()) == 'e')
563                                                 return keywordOrIdentifier(JavaTokenId.INTERFACE);
564                                             break;
565                                         default:
566                                             return keywordOrIdentifier(JavaTokenId.INT, c);
567                                     }
568                                     break;
569                             }
570                             break;
571                     }
572                     return finishIdentifier(c);
573
574                 case 'l':
575                     if ((c = input.read()) == 'o'
576                      && (c = input.read()) == 'n'
577                      && (c = input.read()) == 'g')
578                         return keywordOrIdentifier(JavaTokenId.LONG);
579                     return finishIdentifier(c);
580
581                 case 'n':
582                     switch (c = input.read()) {
583                         case 'a':
584                             if ((c = input.read()) == 't'
585                              && (c = input.read()) == 'i'
586                              && (c = input.read()) == 'v'
587                              && (c = input.read()) == 'e')
588                                 return keywordOrIdentifier(JavaTokenId.NATIVE);
589                             break;
590                         case 'e':
591                             if ((c = input.read()) == 'w')
592                                 return keywordOrIdentifier(JavaTokenId.NEW);
593                             break;
594                         case 'u':
595                             if ((c = input.read()) == 'l'
596                              && (c = input.read()) == 'l')
597                                 return keywordOrIdentifier(JavaTokenId.NULL);
598                             break;
599                     }
600                     return finishIdentifier(c);
601
602                 case 'p':
603                     switch (c = input.read()) {
604                         case 'a':
605                             if ((c = input.read()) == 'c'
606                              && (c = input.read()) == 'k'
607                              && (c = input.read()) == 'a'
608                              && (c = input.read()) == 'g'
609                              && (c = input.read()) == 'e')
610                                 return keywordOrIdentifier(JavaTokenId.PACKAGE);
611                             break;
612                         case 'r':
613                             switch (c = input.read()) {
614                                 case 'i':
615                                     if ((c = input.read()) == 'v'
616                                      && (c = input.read()) == 'a'
617                                      && (c = input.read()) == 't'
618                                      && (c = input.read()) == 'e')
619                                         return keywordOrIdentifier(JavaTokenId.PRIVATE);
620                                     break;
621                                 case 'o':
622                                     if ((c = input.read()) == 't'
623                                      && (c = input.read()) == 'e'
624                                      && (c = input.read()) == 'c'
625                                      && (c = input.read()) == 't'
626                                      && (c = input.read()) == 'e'
627                                      && (c = input.read()) == 'd')
628                                         return keywordOrIdentifier(JavaTokenId.PROTECTED);
629                                     break;
630                             }
631                             break;
632                         case 'u':
633                             if ((c = input.read()) == 'b'
634                              && (c = input.read()) == 'l'
635                              && (c = input.read()) == 'i'
636                              && (c = input.read()) == 'c')
637                                 return keywordOrIdentifier(JavaTokenId.PUBLIC);
638                             break;
639                     }
640                     return finishIdentifier(c);
641
642                 case 'r':
643                     if ((c = input.read()) == 'e'
644                      && (c = input.read()) == 't'
645                      && (c = input.read()) == 'u'
646                      && (c = input.read()) == 'r'
647                      && (c = input.read()) == 'n')
648                         return keywordOrIdentifier(JavaTokenId.RETURN);
649                     return finishIdentifier(c);
650
651                 case 's':
652                     switch (c = input.read()) {
653                         case 'h':
654                             if ((c = input.read()) == 'o'
655                              && (c = input.read()) == 'r'
656                              && (c = input.read()) == 't')
657                                 return keywordOrIdentifier(JavaTokenId.SHORT);
658                             break;
659                         case 't':
660                             switch (c = input.read()) {
661                                 case 'a':
662                                     if ((c = input.read()) == 't'
663                                      && (c = input.read()) == 'i'
664                                      && (c = input.read()) == 'c')
665                                         return keywordOrIdentifier(JavaTokenId.STATIC);
666                                     break;
667                                 case 'r':
668                                     if ((c = input.read()) == 'i'
669                                      && (c = input.read()) == 'c'
670                                      && (c = input.read()) == 't'
671                                      && (c = input.read()) == 'f'
672                                      && (c = input.read()) == 'p')
673                                         return keywordOrIdentifier(JavaTokenId.STRICTFP);
674                                     break;
675                             }
676                             break;
677                         case 'u':
678                             if ((c = input.read()) == 'p'
679                              && (c = input.read()) == 'e'
680                              && (c = input.read()) == 'r')
681                                 return keywordOrIdentifier(JavaTokenId.SUPER);
682                             break;
683                         case 'w':
684                             if ((c = input.read()) == 'i'
685                              && (c = input.read()) == 't'
686                              && (c = input.read()) == 'c'
687                              && (c = input.read()) == 'h')
688                                 return keywordOrIdentifier(JavaTokenId.SWITCH);
689                             break;
690                         case 'y':
691                             if ((c = input.read()) == 'n'
692                              && (c = input.read()) == 'c'
693                              && (c = input.read()) == 'h'
694                              && (c = input.read()) == 'r'
695                              && (c = input.read()) == 'o'
696                              && (c = input.read()) == 'n'
697                              && (c = input.read()) == 'i'
698                              && (c = input.read()) == 'z'
699                              && (c = input.read()) == 'e'
700                              && (c = input.read()) == 'd')
701                                 return keywordOrIdentifier(JavaTokenId.SYNCHRONIZED);
702                             break;
703                     }
704                     return finishIdentifier(c);
705
706                 case 't':
707                     switch (c = input.read()) {
708                         case 'h':
709                             switch (c = input.read()) {
710                                 case 'i':
711                                     if ((c = input.read()) == 's')
712                                         return keywordOrIdentifier(JavaTokenId.THIS);
713                                     break;
714                                 case 'r':
715                                     if ((c = input.read()) == 'o'
716                                      && (c = input.read()) == 'w')
717                                         switch (c = input.read()) {
718                                             case 's':
719                                                 return keywordOrIdentifier(JavaTokenId.THROWS);
720                                             default:
721                                                 return keywordOrIdentifier(JavaTokenId.THROW, c);
722                                         }
723                                     break;
724                             }
725                             break;
726                         case 'r':
727                             switch (c = input.read()) {
728                                 case 'a':
729                                     if ((c = input.read()) == 'n'
730                                      && (c = input.read()) == 's'
731                                      && (c = input.read()) == 'i'
732                                      && (c = input.read()) == 'e'
733                                      && (c = input.read()) == 'n'
734                                      && (c = input.read()) == 't')
735                                         return keywordOrIdentifier(JavaTokenId.TRANSIENT);
736                                     break;
737                                 case 'u':
738                                     if ((c = input.read()) == 'e')
739                                         return keywordOrIdentifier(JavaTokenId.TRUE);
740                                     break;
741                                 case 'y':
742                                     return keywordOrIdentifier(JavaTokenId.TRY);
743                             }
744                             break;
745                     }
746                     return finishIdentifier(c);
747
748                 case 'v':
749                     if ((c = input.read()) == 'o') {
750                         switch (c = input.read()) {
751                             case 'i':
752                                 if ((c = input.read()) == 'd')
753                                     return keywordOrIdentifier(JavaTokenId.VOID);
754                                 break;
755                             case 'l':
756                                 if ((c = input.read()) == 'a'
757                                  && (c = input.read()) == 't'
758                                  && (c = input.read()) == 'i'
759                                  && (c = input.read()) == 'l'
760                                  && (c = input.read()) == 'e')
761                                     return keywordOrIdentifier(JavaTokenId.VOLATILE);
762                                 break;
763                         }
764                     }
765                     return finishIdentifier(c);
766
767                 case 'w':
768                     if ((c = input.read()) == 'h'
769                      && (c = input.read()) == 'i'
770                      && (c = input.read()) == 'l'
771                      && (c = input.read()) == 'e')
772                         return keywordOrIdentifier(JavaTokenId.WHILE);
773                     return finishIdentifier(c);
774
775                 // Rest of lowercase letters starting identifiers
776
case 'h': case 'j': case 'k': case 'm': case 'o':
777                 case 'q': case 'u': case 'x': case 'y': case 'z':
778                 // Uppercase letters starting identifiers
779
case 'A': case 'B': case 'C': case 'D': case 'E':
780                 case 'F': case 'G': case 'H': case 'I': case 'J':
781                 case 'K': case 'L': case 'M': case 'N': case 'O':
782                 case 'P': case 'Q': case 'R': case 'S': case 'T':
783                 case 'U': case 'V': case 'W': case 'X': case 'Y':
784                 case 'Z':
785                 case '$': case '_':
786                     return finishIdentifier();
787                     
788                 // All Character.isWhitespace(c) below 0x80 follow
789
// ['\t' - '\r'] and [0x1c - ' ']
790
case '\t':
791                 case '\n':
792                 case 0x0b:
793                 case '\f':
794                 case '\r':
795                 case 0x1c:
796                 case 0x1d:
797                 case 0x1e:
798                 case 0x1f:
799                     return finishWhitespace();
800                 case ' ':
801                     c = input.read();
802                     if (c == EOF || !Character.isWhitespace(c)) { // Return single space as flyweight token
803
input.backup(1);
804                         return tokenFactory.getFlyweightToken(JavaTokenId.WHITESPACE, " ");
805                     }
806                     return finishWhitespace();
807
808                 case EOF:
809                     return null;
810
811                 default:
812                     if (c >= 0x80) { // lowSurr ones already handled above
813
c = translateSurrogates(c);
814                         if (Character.isJavaIdentifierStart(c))
815                             return finishIdentifier();
816                         if (Character.isWhitespace(c))
817                             return finishWhitespace();
818                     }
819
820                     // Invalid char
821
return token(JavaTokenId.ERROR);
822             } // end of switch (c)
823
} // end of while(true)
824
}
825     
826     private int translateSurrogates(int c) {
827         if (Character.isHighSurrogate((char)c)) {
828             int lowSurr = input.read();
829             if (lowSurr != EOF && Character.isLowSurrogate((char)lowSurr)) {
830                 // c and lowSurr form the integer unicode char.
831
c = Character.toCodePoint((char)c, (char)lowSurr);
832             } else {
833                 // Otherwise it's error: Low surrogate does not follow the high one.
834
// Leave the original character unchanged.
835
// As the surrogates do not belong to any
836
// specific unicode category the lexer should finally
837
// categorize them as a lexical error.
838
input.backup(1);
839             }
840         }
841         return c;
842     }
843
844     private Token<JavaTokenId> finishWhitespace() {
845         while (true) {
846             int c = input.read();
847             // There should be no surrogates possible for whitespace
848
// so do not call translateSurrogates()
849
if (c == EOF || !Character.isWhitespace(c)) {
850                 input.backup(1);
851                 return tokenFactory.createToken(JavaTokenId.WHITESPACE);
852             }
853         }
854     }
855     
856     private Token<JavaTokenId> finishIdentifier() {
857         return finishIdentifier(input.read());
858     }
859     
860     private Token<JavaTokenId> finishIdentifier(int c) {
861         while (true) {
862             if (c == EOF || !Character.isJavaIdentifierPart(c = translateSurrogates(c))) {
863                 // For surrogate 2 chars must be backed up
864
input.backup((c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) ? 2 : 1);
865                 return tokenFactory.createToken(JavaTokenId.IDENTIFIER);
866             }
867             c = input.read();
868         }
869     }
870
871     private Token<JavaTokenId> keywordOrIdentifier(JavaTokenId keywordId) {
872         return keywordOrIdentifier(keywordId, input.read());
873     }
874
875     private Token<JavaTokenId> keywordOrIdentifier(JavaTokenId keywordId, int c) {
876         // Check whether the given char is non-ident and if so then return keyword
877
if (c == EOF || !Character.isJavaIdentifierPart(c = translateSurrogates(c))) {
878             // For surrogate 2 chars must be backed up
879
input.backup((c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) ? 2 : 1);
880             return token(keywordId);
881         } else // c is identifier part
882
return finishIdentifier();
883     }
884     
885     private Token<JavaTokenId> finishNumberLiteral(int c, boolean inFraction) {
886         while (true) {
887             switch (c) {
888                 case '.':
889                     if (!inFraction) {
890                         inFraction = true;
891                     } else { // two dots in the literal
892
return token(JavaTokenId.FLOAT_LITERAL_INVALID);
893                     }
894                     break;
895                 case 'l': case 'L': // 0l or 0L
896
return token(JavaTokenId.LONG_LITERAL);
897                 case 'd': case 'D':
898                     return token(JavaTokenId.DOUBLE_LITERAL);
899                 case 'f': case 'F':
900                     return token(JavaTokenId.FLOAT_LITERAL);
901                 case '0': case '1': case '2': case '3': case '4':
902                 case '5': case '6': case '7': case '8': case '9':
903                     break;
904                 case 'e': case 'E': // exponent part
905
return finishFloatExponent();
906                 default:
907                     input.backup(1);
908                     return token(inFraction ? JavaTokenId.DOUBLE_LITERAL
909                             : JavaTokenId.INT_LITERAL);
910             }
911             c = input.read();
912         }
913     }
914     
915     private Token<JavaTokenId> finishFloatExponent() {
916         int c = input.read();
917         if (c == '+' || c == '-') {
918             c = input.read();
919         }
920         if (c < '0' || '9' < c)
921             return token(JavaTokenId.FLOAT_LITERAL_INVALID);
922         do {
923             c = input.read();
924         } while ('0' <= c && c <= '9'); // reading exponent
925
switch (c) {
926             case 'd': case 'D':
927                 return token(JavaTokenId.DOUBLE_LITERAL);
928             case 'f': case 'F':
929                 return token(JavaTokenId.FLOAT_LITERAL);
930             default:
931                 input.backup(1);
932                 return token(JavaTokenId.DOUBLE_LITERAL);
933         }
934     }
935     
936     private Token<JavaTokenId> token(JavaTokenId id) {
937         String JavaDoc fixedText = id.fixedText();
938         return (fixedText != null)
939                 ? tokenFactory.getFlyweightToken(id, fixedText)
940                 : tokenFactory.createToken(id);
941     }
942     
943     public void release() {
944     }
945
946 }
947
Popular Tags