KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > edu > umd > cs > findbugs > Tokenizer


1 /*
2  * FindBugs - Find bugs in Java programs
3  * Copyright (C) 2003,2004 University of Maryland
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */

19
20 package edu.umd.cs.findbugs;
21
22 import java.io.IOException JavaDoc;
23 import java.io.PushbackReader JavaDoc;
24 import java.io.Reader JavaDoc;
25 import java.util.BitSet JavaDoc;
26
27 /**
28  * A simple tokenizer for Java source text.
29  * This is not intended to be a compliant lexer;
30  * instead, it is for quick and dirty scanning.
31  *
32  * @author David Hovemeyer
33  * @see Token
34  */

35 public class Tokenizer {
36     private static final BitSet JavaDoc whiteSpace = new BitSet JavaDoc();
37
38     static {
39         whiteSpace.set(' ');
40         whiteSpace.set('\t');
41         whiteSpace.set('\r');
42         whiteSpace.set('\f');
43     }
44
45     private static final BitSet JavaDoc single = new BitSet JavaDoc();
46
47     static {
48         single.set('!');
49         single.set('%');
50         single.set('^');
51         single.set('&');
52         single.set('*');
53         single.set('(');
54         single.set(')');
55         single.set('-');
56         single.set('+');
57         single.set('=');
58         single.set('[');
59         single.set(']');
60         single.set('{');
61         single.set('}');
62         single.set('|');
63         single.set(':');
64         single.set(';');
65         single.set(',');
66         single.set('.');
67         single.set('<');
68         single.set('>');
69         single.set('?');
70         single.set('~');
71     }
72
73     private PushbackReader JavaDoc reader;
74
75     /**
76      * Constructor.
77      *
78      * @param reader the Reader for the Java source text
79      */

80     public Tokenizer(Reader JavaDoc reader) {
81         this.reader = new PushbackReader JavaDoc(reader);
82     }
83
84     /**
85      * Get the next Token in the stream.
86      *
87      * @return the Token
88      */

89     public Token next() throws IOException JavaDoc {
90         skipWhitespace();
91         int c = reader.read();
92         if (c < 0)
93             return new Token(Token.EOF);
94         else if (c == '\n')
95             return new Token(Token.EOL);
96         else if (c == '\'' || c == '"')
97             return munchString(c);
98         else if (c == '/')
99             return maybeComment();
100         else if (single.get(c))
101             return new Token(Token.SINGLE, String.valueOf((char) c));
102         else {
103             reader.unread(c);
104             return parseWord();
105         }
106     }
107
108     private void skipWhitespace() throws IOException JavaDoc {
109         for (; ;) {
110             int c = reader.read();
111             if (c < 0) break;
112             if (!whiteSpace.get(c)) {
113                 reader.unread(c);
114                 break;
115             }
116         }
117     }
118
119     private Token munchString(int delimiter) throws IOException JavaDoc {
120         final int SCAN = 0;
121         final int ESCAPE = 1;
122         final int DONE = 2;
123
124         StringBuffer JavaDoc result = new StringBuffer JavaDoc();
125         result.append((char) delimiter);
126         int state = SCAN;
127
128             while (state != DONE) {
129                 int c = reader.read();
130                 if (c < 0)
131                     break;
132                 result.append((char) c);
133                 switch (state) {
134                 case SCAN:
135                     if (c == delimiter)
136                         state = DONE;
137                     else if (c == '\\')
138                         state = ESCAPE;
139                     break;
140                 case ESCAPE:
141                     state = SCAN;
142                     break;
143                 }
144             }
145         return new Token(Token.STRING, result.toString());
146     }
147
148     private Token maybeComment() throws IOException JavaDoc {
149         int c = reader.read();
150         if (c == '/') {
151             // Single line comment
152
StringBuffer JavaDoc result = new StringBuffer JavaDoc();
153             result.append("//");
154             for (; ;) {
155                 c = reader.read();
156                 if (c < 0)
157                     break;
158                 else if (c == '\n') {
159                     reader.unread(c);
160                     break;
161                 }
162                 result.append((char) c);
163             }
164             return new Token(Token.COMMENT, result.toString());
165         } else if (c == '*') {
166             // C-style multiline comment
167
StringBuffer JavaDoc result = new StringBuffer JavaDoc();
168             result.append("/*");
169             final int SCAN = 0;
170             final int STAR = 1;
171             final int DONE = 2;
172             int state = SCAN;
173             while (state != DONE) {
174                 c = reader.read();
175                 if (c < 0)
176                     state = DONE;
177                 else
178                     result.append((char) c);
179                 switch (state) {
180                 case SCAN:
181                     if (c == '*')
182                         state = STAR;
183                     break;
184                 case STAR:
185                     if (c == '/')
186                         state = DONE;
187                     else if (c != '*')
188                         state = SCAN;
189                     break;
190                 case DONE:
191                     break;
192                 }
193             }
194             return new Token(Token.COMMENT, result.toString());
195         } else {
196             if (c >= 0)
197                 reader.unread(c);
198             return new Token(Token.SINGLE, "/");
199         }
200     }
201
202     private Token parseWord() throws IOException JavaDoc {
203         StringBuffer JavaDoc result = new StringBuffer JavaDoc();
204         for (; ;) {
205             int c = reader.read();
206             if (c < 0)
207                 break;
208             if (whiteSpace.get(c) || c == '\n' || single.get(c)) {
209                 reader.unread(c);
210                 break;
211             }
212             result.append((char) c);
213         }
214         return new Token(Token.WORD, result.toString());
215     }
216 }
217
218 // vim:ts=4
219
Popular Tags