KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > antlr > CharScanner


1 package antlr;
2
3 /* ANTLR Translator Generator
4  * Project led by Terence Parr at http://www.jGuru.com
5  * Software rights: http://www.antlr.org/RIGHTS.html
6  *
7  * $Id: //depot/code/org.antlr/main/main/antlr/CharScanner.java#9 $
8  */

9
10 import java.util.Hashtable JavaDoc;
11
12 import antlr.collections.impl.BitSet;
13
14 import java.io.IOException JavaDoc;
15
16 public abstract class CharScanner implements TokenStream {
17     static final char NO_CHAR = 0;
18     public static final char EOF_CHAR = (char)-1;
19     protected ANTLRStringBuffer text; // text of current token
20

21     protected boolean saveConsumedInput = true; // does consume() save characters?
22
protected Class JavaDoc tokenObjectClass; // what kind of tokens to create?
23
protected boolean caseSensitive = true;
24     protected boolean caseSensitiveLiterals = true;
25     protected Hashtable JavaDoc literals; // set by subclass
26

27     /** Tab chars are handled by tab() according to this value; override
28      * method to do anything weird with tabs.
29      */

30     protected int tabsize = 8;
31
32     protected Token _returnToken = null; // used to return tokens w/o using return val.
33

34     // Hash string used so we don't new one every time to check literals table
35
protected ANTLRHashString hashString;
36
37     protected LexerSharedInputState inputState;
38
39     /** Used during filter mode to indicate that path is desired.
40      * A subsequent scan error will report an error as usual if
41      * acceptPath=true;
42      */

43     protected boolean commitToPath = false;
44
45     /** Used to keep track of indentdepth for traceIn/Out */
46     protected int traceDepth = 0;
47
48     public CharScanner() {
49         text = new ANTLRStringBuffer();
50         hashString = new ANTLRHashString(this);
51         setTokenObjectClass("antlr.CommonToken");
52     }
53
54     public CharScanner(InputBuffer cb) { // SAS: use generic buffer
55
this();
56         inputState = new LexerSharedInputState(cb);
57     }
58
59     public CharScanner(LexerSharedInputState sharedState) {
60         this();
61         inputState = sharedState;
62     }
63
64     public void append(char c) {
65         if (saveConsumedInput) {
66             text.append(c);
67         }
68     }
69
70     public void append(String JavaDoc s) {
71         if (saveConsumedInput) {
72             text.append(s);
73         }
74     }
75
76     public void commit() {
77         inputState.input.commit();
78     }
79
80     public void consume() throws CharStreamException {
81         if (inputState.guessing == 0) {
82             char c = LA(1);
83             if (caseSensitive) {
84                 append(c);
85             }
86             else {
87                 // use input.LA(), not LA(), to get original case
88
// CharScanner.LA() would toLower it.
89
append(inputState.input.LA(1));
90             }
91             if (c == '\t') {
92                 tab();
93             }
94             else {
95                 inputState.column++;
96             }
97         }
98         inputState.input.consume();
99     }
100
101     /** Consume chars until one matches the given char */
102     public void consumeUntil(int c) throws CharStreamException {
103         while (LA(1) != EOF_CHAR && LA(1) != c) {
104             consume();
105         }
106     }
107
108     /** Consume chars until one matches the given set */
109     public void consumeUntil(BitSet set) throws CharStreamException {
110         while (LA(1) != EOF_CHAR && !set.member(LA(1))) {
111             consume();
112         }
113     }
114
115     public boolean getCaseSensitive() {
116         return caseSensitive;
117     }
118
119     public final boolean getCaseSensitiveLiterals() {
120         return caseSensitiveLiterals;
121     }
122
123     public int getColumn() {
124         return inputState.column;
125     }
126
127     public void setColumn(int c) {
128         inputState.column = c;
129     }
130
131     public boolean getCommitToPath() {
132         return commitToPath;
133     }
134
135     public String JavaDoc getFilename() {
136         return inputState.filename;
137     }
138
139     public InputBuffer getInputBuffer() {
140         return inputState.input;
141     }
142
143     public LexerSharedInputState getInputState() {
144         return inputState;
145     }
146
147     public void setInputState(LexerSharedInputState state) {
148         inputState = state;
149     }
150
151     public int getLine() {
152         return inputState.line;
153     }
154
155     /** return a copy of the current text buffer */
156     public String JavaDoc getText() {
157         return text.toString();
158     }
159
160     public Token getTokenObject() {
161         return _returnToken;
162     }
163
164     public char LA(int i) throws CharStreamException {
165         if (caseSensitive) {
166             return inputState.input.LA(i);
167         }
168         else {
169             return toLower(inputState.input.LA(i));
170         }
171     }
172
173     protected Token makeToken(int t) {
174         try {
175             Token tok = (Token)tokenObjectClass.newInstance();
176             tok.setType(t);
177             tok.setColumn(inputState.tokenStartColumn);
178             tok.setLine(inputState.tokenStartLine);
179             // tracking real start line now: tok.setLine(inputState.line);
180
return tok;
181         }
182         catch (InstantiationException JavaDoc ie) {
183             panic("can't instantiate token: " + tokenObjectClass);
184         }
185         catch (IllegalAccessException JavaDoc iae) {
186             panic("Token class is not accessible" + tokenObjectClass);
187         }
188         return Token.badToken;
189     }
190
191     public int mark() {
192         return inputState.input.mark();
193     }
194
195     public void match(char c) throws MismatchedCharException, CharStreamException {
196         if (LA(1) != c) {
197             throw new MismatchedCharException(LA(1), c, false, this);
198         }
199         consume();
200     }
201
202     public void match(BitSet b) throws MismatchedCharException, CharStreamException {
203         if (!b.member(LA(1))) {
204             throw new MismatchedCharException(LA(1), b, false, this);
205         }
206         else {
207             consume();
208         }
209     }
210
211     public void match(String JavaDoc s) throws MismatchedCharException, CharStreamException {
212         int len = s.length();
213         for (int i = 0; i < len; i++) {
214             if (LA(1) != s.charAt(i)) {
215                 throw new MismatchedCharException(LA(1), s.charAt(i), false, this);
216             }
217             consume();
218         }
219     }
220
221     public void matchNot(char c) throws MismatchedCharException, CharStreamException {
222         if (LA(1) == c) {
223             throw new MismatchedCharException(LA(1), c, true, this);
224         }
225         consume();
226     }
227
228     public void matchRange(char c1, char c2) throws MismatchedCharException, CharStreamException {
229         if (LA(1) < c1 || LA(1) > c2) throw new MismatchedCharException(LA(1), c1, c2, false, this);
230         consume();
231     }
232
233     public void newline() {
234         inputState.line++;
235         inputState.column = 1;
236     }
237
238     /** advance the current column number by an appropriate amount
239      * according to tab size. This method is called from consume().
240      */

241     public void tab() {
242         int c = getColumn();
243         int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
244
setColumn( nc );
245     }
246
247     public void setTabSize( int size ) {
248         tabsize = size;
249     }
250
251     public int getTabSize() {
252         return tabsize;
253     }
254
255     public void panic() {
256         System.err.println("CharScanner: panic");
257         System.exit(1);
258     }
259
260     public void panic(String JavaDoc s) {
261         System.err.println("CharScanner; panic: " + s);
262         System.exit(1);
263     }
264
265     /** Parser error-reporting function can be overridden in subclass */
266     public void reportError(RecognitionException ex) {
267         System.err.println(ex);
268     }
269
270     /** Parser error-reporting function can be overridden in subclass */
271     public void reportError(String JavaDoc s) {
272         if (getFilename() == null) {
273             System.err.println("error: " + s);
274         }
275         else {
276             System.err.println(getFilename() + ": error: " + s);
277         }
278     }
279
280     /** Parser warning-reporting function can be overridden in subclass */
281     public void reportWarning(String JavaDoc s) {
282         if (getFilename() == null) {
283             System.err.println("warning: " + s);
284         }
285         else {
286             System.err.println(getFilename() + ": warning: " + s);
287         }
288     }
289
290     public void resetText() {
291         text.setLength(0);
292         inputState.tokenStartColumn = inputState.column;
293         inputState.tokenStartLine = inputState.line;
294     }
295
296     public void rewind(int pos) {
297         inputState.input.rewind(pos);
298         setColumn(inputState.tokenStartColumn);
299     }
300
301     public void setCaseSensitive(boolean t) {
302         caseSensitive = t;
303     }
304
305     public void setCommitToPath(boolean commit) {
306         commitToPath = commit;
307     }
308
309     public void setFilename(String JavaDoc f) {
310         inputState.filename = f;
311     }
312
313     public void setLine(int line) {
314         inputState.line = line;
315     }
316
317     public void setText(String JavaDoc s) {
318         resetText();
319         text.append(s);
320     }
321
322     public void setTokenObjectClass(String JavaDoc cl) {
323         try {
324             tokenObjectClass = Class.forName(cl);
325         }
326         catch (ClassNotFoundException JavaDoc ce) {
327             panic("ClassNotFoundException: " + cl);
328         }
329     }
330
331     // Test the token text against the literals table
332
// Override this method to perform a different literals test
333
public int testLiteralsTable(int ttype) {
334         hashString.setBuffer(text.getBuffer(), text.length());
335         Integer JavaDoc literalsIndex = (Integer JavaDoc)literals.get(hashString);
336         if (literalsIndex != null) {
337             ttype = literalsIndex.intValue();
338         }
339         return ttype;
340     }
341
342     /** Test the text passed in against the literals table
343      * Override this method to perform a different literals test
344      * This is used primarily when you want to test a portion of
345      * a token.
346      */

347     public int testLiteralsTable(String JavaDoc text, int ttype) {
348         ANTLRHashString s = new ANTLRHashString(text, this);
349         Integer JavaDoc literalsIndex = (Integer JavaDoc)literals.get(s);
350         if (literalsIndex != null) {
351             ttype = literalsIndex.intValue();
352         }
353         return ttype;
354     }
355
356     // Override this method to get more specific case handling
357
public char toLower(char c) {
358         return Character.toLowerCase(c);
359     }
360
361     public void traceIndent() {
362         for (int i = 0; i < traceDepth; i++)
363             System.out.print(" ");
364     }
365
366     public void traceIn(String JavaDoc rname) throws CharStreamException {
367         traceDepth += 1;
368         traceIndent();
369         System.out.println("> lexer " + rname + "; c==" + LA(1));
370     }
371
372     public void traceOut(String JavaDoc rname) throws CharStreamException {
373         traceIndent();
374         System.out.println("< lexer " + rname + "; c==" + LA(1));
375         traceDepth -= 1;
376     }
377
378     /** This method is called by YourLexer.nextToken() when the lexer has
379      * hit EOF condition. EOF is NOT a character.
380      * This method is not called if EOF is reached during
381      * syntactic predicate evaluation or during evaluation
382      * of normal lexical rules, which presumably would be
383      * an IOException. This traps the "normal" EOF condition.
384      *
385      * uponEOF() is called after the complete evaluation of
386      * the previous token and only if your parser asks
387      * for another token beyond that last non-EOF token.
388      *
389      * You might want to throw token or char stream exceptions
390      * like: "Heh, premature eof" or a retry stream exception
391      * ("I found the end of this file, go back to referencing file").
392      */

393     public void uponEOF() throws TokenStreamException, CharStreamException {
394     }
395 }
396
Popular Tags