KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > aspectj > compiler > base > parser > JavaTokenizer


1 /* -*- Mode: Java; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  *
3  * This file is part of the compiler and core tools for the AspectJ(tm)
4  * programming language; see http://aspectj.org
5  *
6  * The contents of this file are subject to the Mozilla Public License
7  * Version 1.1 (the "License"); you may not use this file except in
8  * compliance with the License. You may obtain a copy of the License at
9  * either http://www.mozilla.org/MPL/ or http://aspectj.org/MPL/.
10  *
11  * Software distributed under the License is distributed on an "AS IS" basis,
12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13  * for the specific language governing rights and limitations under the
14  * License.
15  *
16  * The Original Code is AspectJ.
17  *
18  * The Initial Developer of the Original Code is Xerox Corporation. Portions
19  * created by Xerox Corporation are Copyright (C) 1999-2002 Xerox Corporation.
20  * All Rights Reserved.
21  *
22  * Contributor(s):
23  */

24
25 package org.aspectj.compiler.base.parser;
26
27 import org.aspectj.compiler.base.ast.*;
28
29 import org.aspectj.compiler.base.*;
30
31 import java.util.*;
32 import java.math.BigDecimal JavaDoc;
33 import java.math.BigInteger JavaDoc;
34 import java.io.*;
35
36 class Comment extends Token {
37     public Comment(String JavaDoc text) {
38         super(MULTI_LINE_COMMENT, text, false);
39         //System.out.println("comment: " + text);
40
}
41 }
42 /**
43 class FormalComment extends Token {
44     public FormalComment(String text) {
45         super(FORMAL_COMMENT, text, false);
46     }
47 }
48 **/

49
50 class Literal extends Token {
51     public Literal(String JavaDoc image, int kind) {
52         super(kind, image, false);
53     }
54 }
55
56
57 class LongLiteral extends Literal {
58     public int radix;
59     public LongLiteral(String JavaDoc image, int radix) {
60         super(image, INTEGER_LITERAL);
61         this.radix = radix;
62     }
63
64     public String JavaDoc toString() {
65         return "LongLiteral("+image+")";
66     }
67
68     public long parseLong(SourceLocation source, boolean negative) {
69         long ret = 0;
70         String JavaDoc s = image;
71         if (radix == 10) {
72             try {
73                 if (negative) s = "-"+s;
74                 ret = Long.parseLong(s, radix);
75             } catch (NumberFormatException JavaDoc nfe) {
76                 //!!! handle MIN_VALUE
77
//System.out.println(nfe);
78
source.showError("invalid long decimal literal");
79             }
80         } else {
81             final int N = s.length();
82             if (N == 0 && radix == 16) {
83                 source.showError("0x must be followed by digits");
84             }
85             final int shift = radix == 16 ? 4 : 3;
86             final long limit = ~(0xffffffffffffffffL >>> shift);
87             for (int i=0; i < N; i++) {
88                 if ((ret & limit) != 0) {
89                     source.showError("long literals must be no more than 64-bit");
90                     break;
91                 }
92                 ret <<= shift;
93                 int digit = Character.digit(s.charAt(i), radix);
94                 ret = ret | digit;
95             }
96             if (negative) ret = -ret;
97         }
98         return ret;
99     }
100
101
102     public Expr getExpr(SourceLocation source) {
103         return new LongLiteralExpr(source, parseLong(source, false));
104     }
105     public Expr getNegativeExpr(SourceLocation source) {
106         return new LongLiteralExpr(source, parseLong(source, true));
107     }
108 }
109
110 class IntegerLiteral extends Literal {
111     public int radix;
112     public IntegerLiteral(String JavaDoc image, int radix) {
113         super(image, INTEGER_LITERAL);
114         this.radix = radix;
115         //value = (int)LongLiteral.parse(image, radix);
116
}
117
118     public String JavaDoc toString() {
119         return "IntegerLiteral("+image+")";
120     }
121
122     public int parseInt(SourceLocation source, boolean negative) {
123         int ret = 0;
124         String JavaDoc s = image;
125         if (radix == 10) {
126             if (s.length() > 1 && s.charAt(0) == '0') {
127                 source.showError("invalid octal literal");
128                 return 0;
129             }
130             
131             try {
132                 if (negative) s = "-"+s;
133                 ret = Integer.parseInt(s, radix);
134             } catch (NumberFormatException JavaDoc nfe) {
135                 //!!! handle MIN_VALUE
136
//System.out.println(nfe);
137
source.showError("invalid int decimal literal");
138             }
139         } else {
140             final int N = s.length();
141             if (N == 0 && radix == 16) {
142                 source.showError("0x must be followed by digits");
143             }
144                     
145             final int shift = radix == 16 ? 4 : 3;
146             final int limit = ~(0xffffffff >>> shift);
147             for (int i=0; i < N; i++) {
148                 if ((ret & limit) != 0) {
149                     source.showError("integer literal must be no more than 32-bit");
150                     break;
151                 }
152                 ret <<= shift;
153                 int digit = Character.digit(s.charAt(i), radix);
154                 ret = ret | digit;
155             }
156             if (negative) ret = -ret;
157         }
158         return ret;
159     }
160
161
162     public Expr getExpr(SourceLocation source) {
163         return new IntLiteralExpr(source, parseInt(source, false));
164     }
165     public Expr getNegativeExpr(SourceLocation source) {
166         return new IntLiteralExpr(source, parseInt(source, true));
167     }
168 }
169
170 class FloatLiteral extends Literal {
171     public float value;
172     public FloatLiteral(String JavaDoc image) {
173         super(image, FLOATING_POINT_LITERAL);
174     }
175
176     public String JavaDoc getValueImage() {
177         if (image.endsWith("F") || image.endsWith("f")) {
178             return image.substring(0, image.length()-1);
179         } else {
180             return image;
181         }
182     }
183
184     public Expr getExpr(SourceLocation source) {
185         Expr ret;
186         try {
187             value = Float.valueOf(getValueImage()).floatValue();
188             ret = new FloatLiteralExpr(source,value);
189             if (value == 0.0f) {
190                 BigDecimal JavaDoc bd = new BigDecimal JavaDoc(getValueImage());
191                 if (bd.compareTo(new BigDecimal JavaDoc("0")) != 0) {
192                     ret.showError("non-zero literal rounds to 0");
193                 }
194             } else if (value == Float.POSITIVE_INFINITY ||
195                        value == Float.NEGATIVE_INFINITY)
196             {
197                 ret.showError("floating point literal rounds to IEEE infinity");
198             }
199         } catch (NumberFormatException JavaDoc nfe) {
200             source.showError("illegal floating point literal format");
201             ret = new FloatLiteralExpr(source,0.0f);
202         }
203         return ret;
204     }
205 }
206
207 class DoubleLiteral extends Literal {
208     public double value;
209     public DoubleLiteral(String JavaDoc image) {
210         super(image, FLOATING_POINT_LITERAL);
211     }
212     public String JavaDoc getValueImage() {
213         if (image.endsWith("D") || image.endsWith("d")) {
214             return image.substring(0, image.length()-1);
215         } else {
216             return image;
217         }
218     }
219
220     public Expr getExpr(SourceLocation source) {
221         Expr ret;
222         
223         try {
224             value = Double.valueOf(getValueImage()).doubleValue();
225             ret = new DoubleLiteralExpr(source,value);
226             if (value == 0.0d) {
227                 BigDecimal JavaDoc bd = new BigDecimal JavaDoc(getValueImage());
228                 if (bd.compareTo(new BigDecimal JavaDoc("0")) != 0) {
229                     ret.showError("non-zero literal rounds to 0");
230                 }
231             } else if (value == Double.POSITIVE_INFINITY ||
232                        value == Double.NEGATIVE_INFINITY)
233             {
234                 ret.showError("floating point literal rounds to IEEE infinity");
235             }
236         } catch (NumberFormatException JavaDoc nfe) {
237             source.showError("illegal floating point literal format");
238             ret = new DoubleLiteralExpr(source,0.0);
239         }
240         return ret;
241     }
242 }
243
244 class CharacterLiteral extends Literal {
245     public char value;
246     public CharacterLiteral(char value) {
247         super("\""+value+"\"", CHARACTER_LITERAL);
248         this.value = value;
249     }
250
251     public static void quoteCharacter(char ch, StringBuffer JavaDoc buf) {
252         //if (ch >= 32 && ch <= 127) {
253
// buf.append(ch); return;
254
//}
255

256         switch (ch) {
257         case '\b': buf.append("\\b"); return;
258         case '\t': buf.append("\\t"); return;
259         case '\n': buf.append("\\n"); return;
260         case '\f': buf.append("\\f"); return;
261         case '\r': buf.append("\\r"); return;
262         case '\"': buf.append("\\\""); return;
263         case '\'': buf.append("\\\'"); return;
264         case '\\': buf.append("\\\\"); return;
265         }
266
267         buf.append(ch);
268
269         //buf.append("\\u");
270
//String hex = Integer.toHexString(ch);
271
//for(int i=0; i<(4-hex.length()); i++) buf.append('0');
272
//buf.append(hex);
273
}
274
275     public String JavaDoc toJavaSource() {
276         StringBuffer JavaDoc buf = new StringBuffer JavaDoc("'");
277         quoteCharacter(value, buf);
278         buf.append("'");
279
280         //System.out.println("char: "+buf+", "+(int)value);
281

282         return buf.toString();
283     }
284
285     public Expr getExpr(SourceLocation source) {
286         return new IntLiteralExpr(source,source.getCompiler().getTypeManager().charType, toJavaSource(), (int)value);
287     }
288 }
289
290 class StringLiteral extends Literal {
291     public String JavaDoc value;
292     public StringLiteral(String JavaDoc value) {
293         super(value, STRING_LITERAL);
294         this.value = value;
295     }
296
297     public String JavaDoc toString() {
298         return "StringLiteral("+value+")";
299     }
300
301     public Expr getExpr(SourceLocation source) {
302         return new StringLiteralExpr(source,value);
303     }
304 }
305
306
307 public class JavaTokenizer extends CompilerObject implements JavaConstants {
308     void cleanup(boolean fully) {
309         specialToken = null;
310         text = null;
311     }
312
313     char[] text;
314     int index = 0;
315
316     int startIndex = 0;
317     //int startLine = 0;
318
//int startLineStart = 0;
319

320     //int line = 0;
321
//int lineStart = 0;
322
public Token specialToken = null;
323
324     protected final void newLine() {
325         //line++;
326
//lineStart = index;
327
}
328
329     String JavaDoc getImage() {
330         return new String JavaDoc(text, startIndex, index-startIndex);
331     }
332
333     String JavaDoc getImage(int startOffset, int endOffset) {
334         return new String JavaDoc(text, startIndex+startOffset,
335                           index-(startIndex+startOffset)+endOffset);
336     }
337
338     public final Token getNextToken() {
339         return nextToken();
340     }
341
342
343     public Token nextToken() {
344         Token token;
345
346         try {
347             token = internalNextToken();
348         } catch (ArrayIndexOutOfBoundsException JavaDoc e) {
349             token = new Token(EOF, "EOF", false);
350         } catch (ParseException pe) {
351             throw pe;
352         } catch (RuntimeException JavaDoc re) {
353             System.err.println("rt error at: "+index+", "+sourceInfo.getLine(index));
354             throw re;
355         } catch (Error JavaDoc er) {
356             System.err.println("error at: "+index+", "+sourceInfo.getLine(index));
357             throw er;
358         }
359
360         token.startPosition = startIndex;
361         token.endPosition = index;
362         token.specialToken = specialToken;
363         specialToken = null;
364
365         startIndex = index;
366         //startLineStart = lineStart;
367
//startLine = line;
368
return token;
369     }
370
371     private final char peekChar() {
372         return peekChar(0);
373     }
374
375     private final char peekChar(int offset) {
376         //if ((index + offset) >= nChars) throw new ArrayIndexOutOfBoundsException();
377
return text[index+offset];
378     }
379
380     private final char eatChar() {
381         //if (index >= nChars) throw new ArrayIndexOutOfBoundsException();
382
return text[index++];
383     }
384
385     private final void skipChars(int n) {
386         index += n;
387     }
388
389     private final void skipChar() {
390         index++;
391     }
392
393
394     public Token internalNextToken() {
395         for (;;) {
396             //System.out.println(""+index+": "+text[index]);
397
switch(peekChar()) {
398             // line terminators (\r is handled previously)
399
case '\n':
400                 eatChar();
401                 startIndex++;
402                 //newLine();
403
break;
404
405             // white space
406
case ' ':
407             case '\t':
408             case '\f':
409                 eatChar();
410                 startIndex++;
411                 break;
412
413             // comments
414
case '/':
415             {
416                 char peekChar = peekChar(1);
417                 Comment comment;
418                 if (peekChar == '/') {
419                     comment = getSingleLineComment();
420                 } else if (peekChar == '*') {
421                     try {
422                         if (peekChar(2) == '*') {
423                             comment = getDocumentationComment();
424                         } else {
425                             comment = getTraditionalComment();
426                         }
427                     } catch (ArrayIndexOutOfBoundsException JavaDoc e) {
428                         throw new ParseException(startIndex, sourceInfo,
429                                                  "unclosed comment");
430                     }
431                 } else {
432                     return getToken();
433                 }
434                 if (comment != null) {
435                     comment.specialToken = specialToken;
436                     comment.startPosition = startIndex;
437                     comment.endPosition = index;
438                     specialToken = comment;
439                 }
440
441                 startIndex = index;
442                 break;
443             }
444
445             // numeric literals
446
case '0': case '1': case '2': case '3': case '4':
447             case '5': case '6': case '7': case '8': case '9':
448                 return getNumericLiteral();
449
450             case '.':
451             {
452                 char peekChar = peekChar(1);
453                 if (peekChar >= '0' && peekChar <= '9') {
454                     return getNumericLiteral();
455                 } else {
456                     return getToken();
457                 }
458             }
459
460             // string literals
461
case '\"':
462                 return getStringLiteral();
463
464             case '\'':
465                 return getCharacterLiteral();
466
467             default:
468
469                 // lump everything else together
470
// separators, operators, boolean literals, null, keywords, identifiers
471
return getToken();
472             }
473         }
474     }
475
476     public Token scanTillMatched(int kind) {
477         int depth = 1; // assume left brace was already eated
478
for (;;) {
479             switch(eatChar()) {
480             // line terminators (\r is handled previously)
481
// and white space
482
case '\n':
483             case ' ':
484             case '\t':
485             case '\f':
486                 continue;
487
488             // comments
489
case '/':
490             {
491                 char peekChar = peekChar();
492                 if (peekChar == '/') {
493                     while (eatChar() != '\n')
494                         ;
495                 } else if (peekChar == '*') {
496                         // skip the initial /*
497
skipChar();
498                     // read until */ is found
499
while (true) {
500                         if (eatChar() == '*' && peekChar() == '/')
501                             break;
502                     }
503                     skipChar();
504                 }
505                 continue;
506             }
507
508             // string literals
509
case '\"':
510                 skipStringLiteral();
511                 continue;
512
513             case '\'':
514                 skipCharacterLiteral();
515                 continue;
516
517             default:
518                 continue;
519
520             // Now, the most intresting port - braces and nested depth
521
case '{':
522                 if (kind == RBRACE)
523                     depth++;
524                 continue;
525             case '}':
526                 if (kind == RBRACE) {
527                     depth--;
528                     if (depth == 0) {
529                         Token token = new Token(RBRACE, "}", false);
530                         token.startPosition = index;
531                         token.endPosition = index;
532                         token.specialToken = null;
533                         specialToken = null;
534                         startIndex = index;
535                         return token;
536                     }
537                 }
538                 continue;
539             }
540         }
541     }
542
543     private Comment getSingleLineComment() {
544         // skip the initial //
545
skipChars(2); //index += 2;
546

547         // read to end of line
548
while (eatChar() != '\n') {}
549
550         //newLine();
551

552         Comment ret = new Comment(getImage());
553         ret.kind = SINGLE_LINE_COMMENT;
554         return ret;
555     }
556
557     private Comment getTraditionalComment() {
558         // skip the initial /* or /** if documentation comment
559
skipChars(2); //index += 2;
560

561         // read until */ is found
562
while (true) {
563             char ch = eatChar();
564             if (ch == '*' && peekChar() == '/') break;
565             // newLine();
566
}
567         eatChar(); //index++;
568

569         Comment ret = new Comment(getImage());
570         ret.kind = MULTI_LINE_COMMENT;
571         return ret;
572     }
573
574     private Comment getDocumentationComment() {
575         if (peekChar(3) == '/') {
576             skipChars(4);
577             Comment ret = new Comment("");
578             ret.kind = FORMAL_COMMENT;
579             return ret;
580         } else {
581             skipChars(1);
582             Comment ret = getTraditionalComment();
583             ret.kind = FORMAL_COMMENT;
584             return ret;
585         }
586     }
587
588     private ParseException tokenError() {
589         return tokenError("illegal character");
590     }
591
592     private ParseException tokenError(String JavaDoc message) {
593         return new ParseException(index, sourceInfo, message);
594     }
595
596     private Token getNumericLiteral() {
597         char ch = eatChar(); //text[index++];
598
int radix = 10;
599         int startOffset = 0;
600
601         if (ch == '0') {
602             char peekChar = peekChar(); //text[index];
603
if (peekChar == 'x' || peekChar == 'X') {
604                 eatChar(); //index += 1;
605
ch = eatChar(); //text[index++];
606
radix = 16;
607                 startOffset = 2;
608             } else if (peekChar >= '0' && peekChar <= '7') {
609                 radix = 8;
610                 startOffset = 1;
611             } else {
612                 startOffset=0;
613             }
614         }
615
616         // we know that it's either decimal or float literal
617
// read through part that matches a decimal literal
618
while (true) {
619             if (Character.digit(ch, radix) == -1) break;
620             ch = eatChar(); //text[index++];
621
}
622
623         // now decide if this was a decimal or a float literal
624
switch(ch) {
625         case 'l': case 'L': return new LongLiteral(getImage(startOffset, -1), radix);
626         case 'f': case 'F':
627         case 'd': case 'D':
628         case 'e': case 'E': case '.':
629             if (radix == 16) {
630                 throw tokenError();
631             }
632             radix = 10;
633             skipChars(-1); //index--;
634
return finishFloatLiteral();
635         }
636         skipChars(-1); //index--;
637
return new IntegerLiteral(getImage(startOffset, 0), radix);
638     }
639
640     private Token finishFloatLiteral() {
641         boolean seenDot = false;
642         boolean seenExponent = false;
643         loop: while (true) {
644             switch(eatChar()) {
645             case 'f': case 'F':
646                 return new FloatLiteral(getImage()); //0, -1));
647
case 'd': case 'D':
648                 return new DoubleLiteral(getImage()); //0, -1));
649
case 'e': case 'E':
650                 if (seenExponent) {
651                     skipChars(-1); //index--;
652
break loop;
653                 }
654                 seenExponent = true;
655                 if (peekChar() == '+') skipChars(1); //index += 1;
656
if (peekChar() == '-') skipChars(1); //index += 1;
657
break;
658             case '.':
659                 if (seenDot || seenExponent) {
660                     skipChars(-1); //index--;
661
break loop;
662                 }
663                 seenDot = true;
664                 break;
665             case '0': case '1': case '2': case '3': case '4':
666             case '5': case '6': case '7': case '8': case '9':
667                 break;
668             default:
669                 skipChars(-1); //index--;
670
break loop;
671             }
672         }
673         return new DoubleLiteral(getImage());
674     }
675
676     private Token getStringLiteral() {
677         // skip the initial "
678
skipChars(1); //index += 1;
679
// build up the "real" value of the string in here
680
//??? could optimize by doing this more lazily
681
StringBuffer JavaDoc contents = new StringBuffer JavaDoc();
682
683         while (true) {
684             char ch = eatChar();
685             switch (ch) {
686             case '\"':
687                 return new StringLiteral(contents.toString());
688             case '\\':
689                 ch = processCharacterEscape();
690                 break;
691             case '\n':
692                 throw tokenError("newline not allowed in string literal");
693             }
694             contents.append(ch);
695         }
696     }
697
698     private void skipStringLiteral() {
699         // don't skip initial
700
while (true) {
701             char ch = eatChar();
702             switch (ch) {
703             case '\"':
704                 return;
705             case '\\':
706                 ch = processCharacterEscape();
707                 break;
708             case '\n':
709                 throw tokenError("newline not allowed in string literal");
710             }
711         }
712     }
713
714     private Token getCharacterLiteral() {
715         // skip the initial '
716
skipChars(1);
717         return new CharacterLiteral(getCharacterLiteralInternal());
718     }
719
720     private void skipCharacterLiteral() {
721         // don't skip initial
722
getCharacterLiteralInternal();
723     }
724
725     private char getCharacterLiteralInternal() {
726         char ch = eatChar();
727
728         switch (ch) {
729         case '\'':
730             throw tokenError("empty character literal");
731         case '\\':
732             ch = processCharacterEscape();
733             break;
734         case '\n':
735             throw tokenError("newline not allowed in character literal");
736         }
737
738         if (eatChar() != '\'') {
739             throw tokenError("character literal too long");
740         }
741         return ch;
742     }
743
744     private char processCharacterEscape() {
745         char ch = eatChar();
746         switch (ch) {
747         case 'b': return '\b';
748         case 't': return '\t';
749         case 'n': return '\n';
750         case 'f': return '\f';
751         case 'r': return '\r';
752         case '\"': return '\"';
753         case '\'': return '\'';
754         case '\\': return '\\';
755         }
756         if (ch < '0' || ch > '7') {
757             throw tokenError("octal escape must use values 0-7");
758         }
759
760         // this giberrish handles octal escapes
761
char ch1 = peekChar();
762         if (ch1 >= '0' && ch1 <= '7') {
763             eatChar();
764             char ch2 = peekChar(); //text[index+1];
765
if (ch < '4' && (ch2 >= '0' && ch2 <= '7')) {
766                 eatChar(); //index++;
767
//if (ch > '3') throw tokenError("illegal value for octal escape");
768
return (char)((ch2-'0')+(ch1-'0')*8+(ch-'0')*64);
769             } else {
770                 return (char)((ch1-'0') + (ch-'0')*8);
771             }
772         } else {
773             return (char)(ch-'0');
774         }
775     }
776
777     // use a regex-style table here for extensibility
778
private Token getToken() {
779         // first follow the FSM
780
State state = beginState;
781
782         while (!state.isEndState()) {
783             state = state.forCharacter(eatChar());
784             if (state == null) return getIdentifier();
785         }
786         skipChars(-1); //index -= 1;
787

788         if (state.isKeyword() && Character.isJavaIdentifierPart(peekChar())) {
789             while(Character.isJavaIdentifierPart(eatChar())) { }
790             skipChars(-1); //index -= 1;
791
return new Token(IDENTIFIER, getImage(), true);
792         }
793
794         return ((EndState)state).getToken();
795     }
796
797     private Token getIdentifier() {
798         index = startIndex;
799
800         if (!Character.isJavaIdentifierStart(eatChar())) {
801             throw tokenError("identifier expected");
802         }
803
804         while(Character.isJavaIdentifierPart(eatChar())) { }
805         skipChars(-1);
806         return new Token(IDENTIFIER, getImage(), true);
807     }
808
809     //XXX awful hack
810
private static State beginState = new ArrayState();
811
812     static Map knownOperators = new HashMap();
813     private static final int OFFSET = 100;
814
815     public static int addOperator(String JavaDoc image) {
816         return addOperator(-1, image);
817     }
818
819     public static int addOperator(int kindV, String JavaDoc image) {
820         Integer JavaDoc kind = (Integer JavaDoc)knownOperators.get(image);
821         if (kind != null) return kind.intValue();
822
823         if (kindV == -1) kindV = knownOperators.size()+OFFSET;
824         kind = new Integer JavaDoc(kindV);
825         EndState endState = new EndState(kind.intValue(), image, isIdentifier(image));
826         knownOperators.put(image, kind);
827
828         beginState.addEndState(image, 0, endState);
829
830         return kind.intValue();
831     }
832
833     public static final boolean isIdentifier(String JavaDoc image) {
834         int n = image.length();
835         if (n == 0) return false;
836         if (!Character.isJavaIdentifierStart(image.charAt(0))) return false;
837         for(int i=1; i<n; i++) {
838             if (!Character.isJavaIdentifierPart(image.charAt(i))) return false;
839         }
840         return true;
841     }
842
843
844     private abstract static class State {
845         public State forCharacter(char ch) {
846             return null;
847         }
848
849         public boolean isEndState() {
850             return false;
851         }
852
853         public boolean isKeyword() {
854             return false;
855         }
856
857         public Object JavaDoc getData() {
858             return null;
859         }
860
861         public abstract State addEndState(String JavaDoc image, int index, EndState endState);
862     }
863
864     private static class EndState extends State {
865         private int kind;
866         private String JavaDoc image;
867         private boolean isIdentifier;
868
869         public EndState(int kind, String JavaDoc image, boolean isIdentifier) {
870             this.kind = kind;
871             this.image = image;
872             this.isIdentifier = isIdentifier;
873         }
874
875         public Token getToken() {
876             return new Token(kind, image, isIdentifier);
877         }
878
879         public State forCharacter(char ch) {
880             return null;
881         }
882
883         public boolean isEndState() {
884             return true;
885         }
886
887         public boolean isKeyword() {
888             return isIdentifier;
889         }
890
891         public State addEndState(String JavaDoc image, int index, EndState endState) {
892             if (image.length() >= index) {
893                 return endState;
894             }
895
896             ArrayState newState = new ArrayState();
897             newState.defaultState = this;
898             return newState.addEndState(image, index, endState);
899         }
900     }
901
902     private static class ArrayState extends State {
903         private static final int MAX_CHAR = 128;
904         private State[] states = new State[MAX_CHAR];
905         State defaultState = null;
906
907         public State addEndState(String JavaDoc image, int index, EndState endState) {
908             if (index >= image.length()) {
909                 defaultState = endState;
910                 return this;
911             }
912
913             char ch = image.charAt(index);
914
915             State newState = states[ch];
916             if (newState == null) {
917                 if (image.length() >= index) {
918                     newState = new ArrayState();
919                 } else {
920                     newState = endState;
921                 }
922             }
923             states[ch] = newState.addEndState(image, index+1, endState);
924
925             return this;
926         }
927
928         public State forCharacter(char ch) {
929             if (ch >= 0 && ch < MAX_CHAR) {
930                 State state = states[ch];
931                 if (state != null) return state;
932             }
933             return defaultState;
934         }
935     }
936
937
938     // test code
939
{
940   //int SINGLE_LINE_COMMENT = 9;
941
//int FORMAL_COMMENT = 10;
942
//int MULTI_LINE_COMMENT = 11;
943
//int INTEGER_LITERAL = 13;
944
//int DECIMAL_LITERAL = 14;
945
//int HEX_LITERAL = 15;
946
//int OCTAL_LITERAL = 16;
947
//int FLOATING_POINT_LITERAL = 17;
948
//int EXPONENT = 18;
949
//int CHARACTER_LITERAL = 19;
950
//int STRING_LITERAL = 20;
951

952         addOperator(LPAREN, "(");
953         addOperator(RPAREN, ")");
954         addOperator(LBRACE, "{");
955         addOperator(RBRACE, "}");
956         addOperator(LBRACKET, "[");
957         addOperator(RBRACKET, "]");
958         addOperator(SEMICOLON, ";");
959         addOperator(COMMA, ",");
960         addOperator(DOT, ".");
961         addOperator(ASSIGN, "=");
962         addOperator(GT, ">");
963         addOperator(LT, "<");
964         addOperator(BANG, "!");
965         addOperator(TILDE, "~");
966         addOperator(HOOK, "?");
967         addOperator(COLON, ":");
968         addOperator(EQ, "==");
969         addOperator(LE, "<=");
970         addOperator(GE, ">=");
971         addOperator(NE, "!=");
972         addOperator(SC_OR, "||");
973         addOperator(SC_AND, "&&");
974         addOperator(INCR, "++");
975         addOperator(DECR, "--");
976         addOperator(PLUS, "+");
977         addOperator(MINUS, "-");
978         addOperator(STAR, "*");
979         addOperator(SLASH, "/");
980         addOperator(BIT_AND, "&");
981         addOperator(BIT_OR, "|");
982         addOperator(XOR, "^");
983         addOperator(REM, "%");
984         addOperator(LSHIFT, "<<");
985         addOperator(RSIGNEDSHIFT, ">>");
986         addOperator(RUNSIGNEDSHIFT, ">>>");
987         addOperator(PLUSASSIGN, "+=");
988         addOperator(MINUSASSIGN, "-=");
989         addOperator(STARASSIGN, "*=");
990         addOperator(SLASHASSIGN, "/=");
991         addOperator(ANDASSIGN, "&=");
992         addOperator(ORASSIGN, "|=");
993         addOperator(XORASSIGN, "^=");
994         addOperator(REMASSIGN, "%=");
995         addOperator(LSHIFTASSIGN, "<<=");
996         addOperator(RSIGNEDSHIFTASSIGN, ">>=");
997         addOperator(RUNSIGNEDSHIFTASSIGN, ">>>=");
998         addOperator(INSTANCEOF, "instanceof");
999         addOperator(NEW, "new");
1000
1001        addOperator(PUBLIC,"public");
1002        addOperator(PRIVATE,"private");
1003        addOperator(PROTECTED,"protected");
1004        addOperator(STATIC,"static");
1005        addOperator(FINAL,"final");
1006        addOperator(SYNCHRONIZED,"synchronized");
1007        addOperator(VOLATILE,"volatile");
1008        addOperator(TRANSIENT,"transient");
1009        addOperator(NATIVE,"native");
1010        addOperator(ABSTRACT,"abstract");
1011        addOperator(STRICT,"strictfp");
1012        addOperator(CONST,"const");
1013
1014        addOperator(CLASS,"class");
1015        addOperator(INTERFACE,"interface");
1016
1017        addOperator(THROWS,"throws");
1018        addOperator(TRY,"try");
1019        addOperator(CATCH,"catch");
1020        addOperator(FINALLY,"finally");
1021
1022        addOperator(IF,"if");
1023        addOperator(WHILE,"while");
1024        addOperator(DO,"do");
1025        addOperator(FOR,"for");
1026        addOperator(SWITCH,"switch");
1027        addOperator(CASE,"case");
1028        addOperator(DEFAULT,"default");
1029        addOperator(BREAK,"break");
1030        addOperator(CONTINUE,"continue");
1031        addOperator(RETURN,"return");
1032        addOperator(THROW,"throw");
1033        addOperator(GOTO,"goto");
1034
1035        //addOperator(BOOLEAN,"boolean");
1036
//addOperator(BYTE,"byte");
1037
//addOperator(CHAR,"char");
1038
//addOperator(SHORT,"short");
1039
//addOperator(INT,"int");
1040
//addOperator(LONG,"long");
1041
//addOperator(FLOAT,"float");
1042
//addOperator(DOUBLE,"double");
1043
//addOperator(VOID,"void");
1044

1045        //addOperator(THIS,"this");
1046
//addOperator(SUPER,"super");
1047
//addOperator(CLASS,"class");
1048
//int AT = 69;
1049
//int IDENTIFIER = 70;
1050
//int LETTER = 71;
1051
//int DIGIT = 72;
1052

1053    }
1054
1055    private static final int FIRST_LINE_GUESS = 500;
1056    private static final int LINE_GUESS_MULTIPLE = 4;
1057    int nChars;
1058    
1059    private static final int BUF_SIZE = 4096;
1060
1061    /**
1062     * Original @author <a HREF="mailto:nakamura@mamezou.com">NAKAMURA Tadashi</a>
1063     * Modified and integrated by Jim Hugunin
1064     */

1065    char[] readCharArray(File file) throws IOException {
1066        FileInputStream fstream = new FileInputStream(file);
1067        Reader reader;
1068        if (getOptions().encoding == null) {
1069            reader = new InputStreamReader(new BufferedInputStream(fstream));
1070        } else {
1071            reader = new InputStreamReader(new BufferedInputStream(fstream),
1072                                           getOptions().encoding);
1073        }
1074        reader = new BufferedReader(reader);
1075        
1076        try {
1077            int length;
1078            final char[] buf = new char[BUF_SIZE];
1079            CharArrayWriter caw = new CharArrayWriter();
1080            
1081            while ((length = reader.read(buf, 0, BUF_SIZE)) != -1) {
1082                caw.write(buf, 0, length);
1083            }
1084            caw.write('\n'); // adding a newline to make things easier
1085
return caw.toCharArray();
1086        } finally {
1087            fstream.close();
1088        }
1089    }
1090    
1091    private char[] processUnicodeEscapes(char[] text) {
1092        CharArrayWriter writer = null;
1093        final int N = text.length;
1094        outer: for(int i=0; i<N; i++) {
1095            char ch = text[i];
1096            
1097            if (ch == '\\') {
1098                boolean oddSlashes = true;
1099                int startIndex = i;
1100                while (++i < N) {
1101                    ch = text[i];
1102                    if (ch == '\\') {
1103                        oddSlashes = !oddSlashes;
1104                    } else if (ch == 'u' && oddSlashes) {
1105                        // we now know we need to handle unicode escapes
1106
if (writer == null) {
1107                            writer = new CharArrayWriter();
1108                            writer.write(text, 0, i-1);
1109                        }
1110                        // handle multiple u's
1111
while (text[++i] == 'u') {}
1112                        i -= 1;
1113
1114                        if (i+4 > N) {
1115                            throw new ParseException(i, null,
1116                            "invalid unicode escape");
1117                        }
1118                        int v = Character.digit(text[++i], 16);
1119                        v = v*16 + Character.digit(text[++i], 16);
1120                        v = v*16 + Character.digit(text[++i], 16);
1121                        v = v*16 + Character.digit(text[++i], 16);
1122                        ch = (char)v;
1123                        writer.write(ch);
1124                        continue outer;
1125                    } else {
1126                        break;
1127                    }
1128                }
1129                if (writer != null) writer.write(text, startIndex, i+1-startIndex);
1130            } else {
1131                if (writer != null) writer.write(ch);
1132            }
1133        }
1134        if (writer != null) return writer.toCharArray();
1135        else return text;
1136    }
1137        
1138    public void setSourceInfo(SourceInfo si) throws IOException {
1139        char[] text = readCharArray(si.getFile());
1140        text = processUnicodeEscapes(text);
1141        si.setText(text);
1142
1143        sourceInfo = si;
1144        this.text = text;
1145        this.startIndex = this.index = 0;
1146        this.nChars = text.length;
1147    }
1148
1149    public void setOffset(int offset) {
1150        this.startIndex = this.index = offset;
1151    }
1152
1153    public SourceInfo sourceInfo = null;
1154
1155
1156    public JavaTokenizer(JavaCompiler compiler) {super(compiler);}
1157
1158/*
1159    public static void main(String[] args) throws IOException {
1160        long t0 = System.currentTimeMillis();
1161        JavaTokenizer tokenizer = new JavaTokenizer(new File(args[0]));
1162
1163        Token tok;
1164        while ((tok = tokenizer.nextToken()) != null) {
1165            System.out.println(tok.startPosition+":"+tok.endPosition+" "+tok);
1166        }
1167        long t1 = System.currentTimeMillis();
1168        System.err.println(((t1-t0)/1000.0)+" seconds.");
1169    }
1170*/

1171}
1172
1173
Popular Tags