KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > lexer > demo > antlr > CalcLexer


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.modules.lexer.demo.antlr;
21
22 import antlr.LexerSharedInputState;
23 import antlr.CharStreamException;
24 import antlr.TokenStreamException;
25 import org.netbeans.api.lexer.Language;
26 import org.netbeans.api.lexer.Lexer;
27 import org.netbeans.api.lexer.LexerInput;
28 import org.netbeans.api.lexer.Token;
29 import org.netbeans.api.lexer.TokenId;
30 import org.netbeans.spi.lexer.AbstractLexer;
31 import org.netbeans.spi.lexer.antlr.AntlrToken;
32 import org.netbeans.spi.lexer.util.IntegerCache;
33 import org.netbeans.spi.lexer.util.LexerInputReader;
34
35 /**
36  * Wrapper for antlr generated {@link antlr.CharScanner}.
37  * <BR>Please read <A HREF="http://lexer.netbeans.org/doc/antlr.html">
38  * to get additional information related to this source.
39  *
40  * <P>Most of the tokens
41  * returned from the scanner are just accepted and passed on
42  * but e.g. error tokens are created by assembling one or more scanner tokens into one
43  * extended error token. That's done because it's nicer to produce
44  * just one error token than multiple successive error tokens.
45  *
46  * @author Miloslav Metelka
47  * @version 1.00
48  */

49
50 final class CalcLexer extends AbstractLexer {
51     
52     private static final CalcLanguage language = CalcLanguage.get();
53     
54     private CalcScanner scanner;
55     
56     private LexerInput lexerInput;
57     
58     public CalcLexer() {
59         this.scanner = new CalcScanner((LexerSharedInputState)null);
60     }
61     
62     /*
63      * Default implementation returns null already.
64      *
65     protected Object getLexerState() {
66         return null;
67     }
68      */

69
70     public void restart(LexerInput input, Object JavaDoc state) {
71         super.restart(input, state);
72
73         this.lexerInput = input;
74
75         // Assign a new input state to the scanner for the given lexer inputs
76
LexerSharedInputState inputState = null;
77         if (lexerInput != null) {
78             inputState = new LexerSharedInputState(new LexerInputReader(lexerInput));
79         }
80         scanner.setInputState(inputState);
81         if (inputState != null) {
82             scanner.resetText();
83         }
84         
85         // state argument ignored - should always be null
86
}
87
88     protected final LexerInput getLexerInput() { // this method is necessary for AbstractLexer
89
return lexerInput;
90     }
91
92     protected final Language getLanguage() { // this method is necessary for AbstractLexer
93
return language;
94     }
95
96     /**
97      * Fetch next token from underlying antlr scanner.
98      * <BR>The intId of the token that was found can be set
99      * into the given tokenData parameter
100      * by <CODE>TokenData.setTokenIntId()</CODE> in case there was
101      * a valid token found.
102      * <P>Token length of the fetched token can be set into tokenData
103      * by <CODE>TokenData.setTokenLength()</CODE>.
104      * If the token intId or length is not assigned in <CODE>fetchToken()</CODE>
105      * it must be assigned later during either
106      * {@link #ordinaryToken(OrdinaryTokenData)}
107      * or {@link #extendedToken(ExtendedTokenData)} depending
108      * which of these two gets called.
109      * @param tokenData mutable info about the token being fetched.
110      * @return true if a valid token was found or false
111      * if there are no more tokens on the input (in which case a call
112      * to <CODE>TokenData.setTokenIntId()</CODE> is not necessary).
113      */

114     protected boolean fetchToken(TokenData tokenData) {
115         try {
116             antlr.Token antlrToken = scanner.nextToken();
117             if (antlrToken != null) {
118                 int intId = antlrToken.getType();
119                 if (intId == CalcScannerTokenTypes.EOF) {
120                     return false;
121                 }
122                 tokenData.setTokenIntId(antlrToken.getType());
123                 
124                 int len;
125                 if (antlrToken instanceof AntlrToken) {
126                     len = ((AntlrToken)antlrToken).getLength();
127                 } else {
128                     String JavaDoc text = antlrToken.getText();
129                     len = text.length();
130                 }
131
132                 tokenData.setTokenLength(len);
133                 
134             } else { // antlrToken is null
135
return false; // no more tokens from scanner
136
}
137                 
138         } catch (TokenStreamException e) {
139             /* Input that could not be recognized by antlr.
140              * According to the Calc grammar this should
141              * only occur if there are incomplete
142              * multi-line-comment
143              * at the end of the input
144              * or a generic error caused by characters
145              * not conforming to the grammar.
146              */

147             boolean useScannerTextTokenLength = true;
148             
149             // check for incomplete token - use the state variable
150
int incompleteIntId;
151             int state = scanner.getState();
152             switch (state) {
153                 case 0:
154                     incompleteIntId = CalcLanguage.ERROR_INT;
155                     break;
156                     
157                 case CalcScannerTokenTypes.INCOMPLETE_ML_COMMENT:
158                     // the following construction in Calc.g causes to get here
159
//
160
// ML_COMMENT : INCOMPLETE_ML_COMMENT { state = CalcScannerTokenTypes.INCOMPLETE_ML_COMMENT; }
161
// ( { LA(2) != '/' }? '*'
162
// | ~('*')
163
// )*
164
// "*/" { state = 0; }
165
// ;
166
//
167
incompleteIntId = state;
168
169                     // The scanner would not include
170
// the last char when adding non-star character to the end of input
171
// ending by "/**"
172
// when useScannerTextTokenLength is left to be true
173
// Therefore lexerInput-based tokenLength is used instead.
174
useScannerTextTokenLength = false;
175                     break;
176                     
177                 default:
178                     throw new IllegalStateException JavaDoc(); // unhandled case
179

180             }
181             scanner.resetState();
182
183             tokenData.setTokenIntId(incompleteIntId);
184             
185             int scannerTextTokenLength = scanner.getText().length();
186             int tokenLength = useScannerTextTokenLength
187                     ? scannerTextTokenLength
188                     : tokenData.getDefaultTokenLength();
189                     
190             // Sync scanner with lexerInput if necessary
191
if (scannerTextTokenLength > tokenLength) { // Should not happen
192
throw new IllegalStateException JavaDoc("Internal lexer error"); // NOI18N
193
}
194             while (scannerTextTokenLength < tokenLength) {
195                 scannerConsumeChar();
196                 scannerTextTokenLength++;
197             }
198
199             // Make sure that token contains at least one char
200
tokenLength = increaseTokenLengthIfEmpty(tokenLength);
201             tokenData.setTokenLength(tokenLength);
202
203             scanner.resetText();
204         }
205         
206         return true;
207     }
208     
209     private int increaseTokenLengthIfEmpty(int tokenLength) {
210         if (tokenLength == 0) { // single char unaccepted by scanner
211
scannerConsumeChar();
212             tokenLength++;
213         }
214         return tokenLength;
215     }
216     
217     private void scannerConsumeChar() {
218         try {
219             scanner.consume();
220         } catch (CharStreamException e) {
221             throw new IllegalStateException JavaDoc();
222         }
223     }
224
225     /**
226      * Called after a token was successfully fetched
227      * by {@link #fetchToken(TokenData)} to possibly
228      * start an extended token mode
229      * by {@link OrdinaryTokenData#startExtendedToken()}
230      * <P>When extended token mode is started
231      * the {@link #extendedToken(ExtendedTokenData, boolean)}
232      * is called after each future {@link #fetchToken(TokenData) instead
233      * of <CODE>ordinaryToken()</CODE> (that would be called
234      * in non-extended mode by default).
235      * @param tokenData mutable info holding information
236      * about previously fetched token.
237      * @see OrdinaryTokenData
238      */

239     protected void ordinaryToken(OrdinaryTokenData tokenData) {
240         
241         /*
242          * Now possibly update the tokenIntId for tokens
243          * that do not have direct counterparts in the language
244          * and start extended tokens for errors
245          * and multi-line-comments.
246          */

247         int tokenIntId = tokenData.getTokenIntId();
248         switch (tokenIntId) { // check for types that start extended token
249
case CalcLanguage.ERROR_INT:
250                 // All errors are attempted to be concatenated together
251
tokenData.startExtendedToken();
252                 break;
253
254         }
255         
256     }
257     
258     /**
259      * Called in extended token mode after a token was successfully fetched
260      * by {@link #fetchToken(TokenData)} to possibly update
261      * the extended token identification or finish
262      * the extended token being put together.
263      *
264      * <P>Please note that the <CODE>extendedToken()</CODE> is not called
265      * after extended token mode gets started
266      * by <CODE>OrdinaryTokenData.startExtendedToken()</CODE>
267      * in <CODE>ordinaryToken()</CODE> until another <CODE>fetchToken()</CODE>
268      * is done. The sequence is:<pre>
269      * fetchToken()
270      * ordinaryToken() -> possibly startExtendedToken()
271      * fetchToken()
272      * extendedToken()
273      * fetchToken()
274      * extendedToken()
275      * fetchToken()
276      * extendedToken() -> possibly finishExtendedToken(true)
277      * fetchToken()
278      * ordinaryToken()
279      * fetchToken()
280      * ordinaryToken()
281      * ...
282      * </pre>
283      *
284      * @param tokenData mutable compound info about the token
285      * that was previously fetched and about the extended token
286      * that is being put together.
287      * @param fetchedTokenExists true if the last fetched token
288      * was valid i.e. the <CODE>fetchToken()</CODE> returned true.
289      * False if there are no more tokens to fetch from the input.
290      * <BR>If the parameter is false then this method
291      * must mandatorily finish the extended token
292      * by calling <CODE>finishExtendedToken()</CODE>.
293      * @see ExtendedTokenData
294      */

295     protected void extendedToken(ExtendedTokenData tokenData,
296     boolean fetchedTokenExists) {
297         
298         int extendedTokenIntId = tokenData.getExtendedTokenIntId();
299         int tokenIntId = tokenData.getTokenIntId(); // fetched token id
300
switch (extendedTokenIntId) {
301             case CalcLanguage.ERROR_INT:
302                 if (!fetchedTokenExists
303                     || tokenIntId != CalcLanguage.ERROR_INT
304                 ) {
305                     /* The fetched token is not the error token
306                      * or there are no more tokens on the input.
307                      * Finish the extended token and exclude
308                      * the current token from it.
309                      */

310                     tokenData.finishExtendedToken(false);
311                 }
312                 break;
313
314             default: // there should be no other extended tokens supported
315
throw new IllegalStateException JavaDoc("Unsupported extended token");
316
317         }
318         
319     }
320     
321     public String JavaDoc toString() {
322         String JavaDoc scannerText = scanner.getText();
323         return super.toString() + ", scannerText=\"" + scannerText
324             + "\";length=" + scannerText.length();
325     }
326
327 }
328
Popular Tags