KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > lexer > demo > javacc > CalcLexer


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.modules.lexer.demo.javacc;
21
22 import org.netbeans.api.lexer.Language;
23 import org.netbeans.api.lexer.Lexer;
24 import org.netbeans.api.lexer.LexerInput;
25 import org.netbeans.api.lexer.TokenId;
26 import org.netbeans.api.lexer.Token;
27 import org.netbeans.spi.lexer.AbstractLexer;
28 import org.netbeans.spi.lexer.javacc.LexerInputCharStream;
29 import org.netbeans.spi.lexer.javacc.TokenMgrError;
30 import org.netbeans.spi.lexer.util.IntegerCache;
31
32 /**
33  * Wrapper around javacc's generated token manager.
34  * <BR>Please read <A HREF="http://lexer.netbeans.org/doc/javacc.html">
35  * to get additional information related to this source.
36  *
37  * <P>Most of the tokens
38  * returned from the token manager are just accepted and passed on
39  * but several token types are created by assembling several tokens into one
40  * extended token.
41  * <br>For example block comment is assembled by first recognizing initial
42  * slash-star as a tokenmanager's token and then recognizing the rest of the comment.
43  * <BR>Error tokens are recognized as single characters by tokenmanagers
44  * and assembled together by CalcLexer so that they form just one extended
45  * token rather than many single-char successive error tokens.
46  *
47  * @author Miloslav Metelka
48  * @version 1.00
49  */

50
51 final class CalcLexer extends AbstractLexer {
52
53     private static final CalcLanguage language = CalcLanguage.get();
54     
55     private static final Integer JavaDoc[] integerStates = IntegerCache.getTable(CalcLanguage.MAX_STATE);
56     
57     private CalcTokenManager tokenManager;
58     
59     private LexerInputCharStream charStream;
60
61     private LexerInput lexerInput;
62     
63     public CalcLexer() {
64         this.charStream = new LexerInputCharStream();
65         this.tokenManager = new CalcTokenManager(charStream);
66     }
67     
68     protected Object JavaDoc getLexerState() {
69         int s = tokenManager.curLexState;
70         // Default state is returned as null others like Integer instances
71
return (s == tokenManager.defaultLexState)
72             ? null
73             : integerStates[s];
74             
75         /* BTW in this particular case (with this Calc grammar
76          * and this CalcLexer) there could be just
77          * return null;
78          * Although there are some extra tokenmanager's states
79          * (e.g. when tokenmanager recognizes slash-star
80          * in the block-comment and goes into non-default internal state)
81          * all those subtokens (after which
82          * the tokenmanager goes into non-deafult state)
83          * are immediately merged with the subtoken(s)
84          * that follow them and after these subtokens the tokenmanager
85          * is always in default state again. As the lexer framework
86          * only asks the lexer for its state at token boundaries
87          * (not on tokenmanager's subtoken boundaries) it would be fine
88          * to return null.
89          */

90     }
91
92     public void restart(LexerInput input, Object JavaDoc state) {
93         super.restart(input, state);
94
95         this.lexerInput = input;
96         /* It's necessary to update the lexerInput
97          * in the charStream that the tokenManager uses.
98          * The LexerInputCharStream is a wrapper
99          * around lexerInput to look like CharStream.
100          */

101         charStream.setLexerInput(lexerInput);
102
103         // Reinit the tokenManager so that it acts like a fresh instance
104
tokenManager.ReInit(charStream,
105             (state != null) // see getLexerState() for info about which states can be returned
106
? ((Integer JavaDoc)state).intValue() // non-default state
107
: tokenManager.defaultLexState // default state
108
);
109     }
110     
111     protected final LexerInput getLexerInput() { // this method is necessary for AbstractLexer
112
return lexerInput;
113     }
114     
115     protected final Language getLanguage() { // this method is necessary for AbstractLexer
116
return language;
117     }
118
119
120     /**
121      * Fetch next token from underlying javacc tokenmanager.
122      * <BR>The intId of the token that was found can be set
123      * into the given tokenData parameter
124      * by <CODE>TokenData.setTokenIntId()</CODE> in case there was
125      * a valid token found.
126      * <P>Token length of the fetched token can be set into tokenData
127      * by <CODE>TokenData.setTokenLength()</CODE>.
128      * If the token intId or length is not assigned in <CODE>fetchToken()</CODE>
129      * it must be assigned later during either
130      * {@link #ordinaryToken(OrdinaryTokenData)}
131      * or {@link #extendedToken(ExtendedTokenData)} depending
132      * which of these two gets called.
133      * @param tokenData mutable info about the token being fetched.
134      * @return true if a valid token was found or false
135      * if there are no more tokens on the input (in which case a call
136      * to <CODE>TokenData.setTokenIntId()</CODE> is not necessary).
137      */

138     protected boolean fetchToken(TokenData tokenData) {
139         try {
140             // Get javacc token from tokenmanager
141
org.netbeans.spi.lexer.javacc.Token javaccToken = tokenManager.getNextToken();
142             if (javaccToken != null) {
143                 int tokenKind = javaccToken.kind;
144                 tokenData.setTokenIntId(tokenKind);
145                 tokenData.setTokenLength(tokenData.getDefaultTokenLength());
146                 return (tokenKind != CalcConstants.EOF); // EOF presents no characters
147

148             } else { // javaccToken is null
149
return false; // no more tokens from tokenManager
150
}
151                 
152         } catch (TokenMgrError e) {
153             if (e.getErrorCode() == TokenMgrError.LEXICAL_ERROR) {
154                 if (tokenData.inExtendedToken()) {
155                     switch (tokenData.getExtendedTokenIntId()) {
156                         case CalcConstants.INCOMPLETE_ML_COMMENT:
157                             // This should only happen at the end of input
158
tokenData.setTokenIntId(CalcConstants.EOF);
159                             // Lookahead will be non-zero (for no chars the lexical
160
// error would not be thrown)
161
tokenData.setTokenLength(tokenData.getTextLookahead());
162                             return true; // there are chars -> valid token exists
163

164                     }
165                 }
166                 
167                 // Fallback for other ERRORS
168
// System.out.println("msg=" + e.getMessage());
169
throw new IllegalStateException JavaDoc("Internal lexer error");
170                 
171             } else { // non-lexical type of error
172
throw e;
173             }
174         }
175     }
176
177
178     /**
179      * Called after a token was successfully fetched
180      * by {@link #fetchToken(TokenData)} to possibly
181      * start an extended token mode
182      * by {@link OrdinaryTokenData#startExtendedToken()}
183      * <P>When extended token mode is started
184      * the {@link #extendedToken(ExtendedTokenData, boolean)}
185      * is called after each future {@link #fetchToken(TokenData) instead
186      * of <CODE>ordinaryToken()</CODE> (that would be called
187      * in non-extended mode by default).
188      * @param tokenData mutable info holding information
189      * about previously fetched token.
190      * @see OrdinaryTokenData
191      */

192     protected void ordinaryToken(OrdinaryTokenData tokenData) {
193
194         /*
195          * Start extended tokens for errors
196          * and multi-line-comments.
197          */

198         int tokenIntId = tokenData.getTokenIntId();
199         switch (tokenIntId) { // check for types that start extended token
200
case CalcConstants.ERROR:
201                 /* All errors which are recognized as single chars
202                  * by tokenManager will be concatenated together.
203                  */

204                 tokenData.startExtendedToken();
205                 break;
206
207             case CalcConstants.INCOMPLETE_ML_COMMENT: // "/*" was found by tokenManager
208
/* Multi-line-comment token is recognized by first matching "/*"
209                  * by tokenManager. TokenManager then goes into an extra state
210                  * in which it recognizes all the chars up to star-slash including.
211                  * The recognized token forms the rest of the multi-line-comment
212                  * token then. Both tokens from tokenManager are concatenated
213                  * into a single extended token and returned from nextToken()
214                  * implementation in <CODE>AbstractLexer</CODE>.
215                  * Here the extended token is started. The rest of the matching
216                  * is in extendedToken().
217                  * Here it's possible that the tokenManager throws
218                  * lexical error if it finds end-of-input before
219                  * matching the closing star-slash.
220                  */

221                 tokenData.startExtendedToken();
222                 break;
223         }
224         
225     }
226     
227     /**
228      * Called in extended token mode after a token was successfully fetched
229      * by {@link #fetchToken(TokenData)} to possibly update
230      * the extended token identification or finish
231      * the extended token being put together.
232      *
233      * <P>Please note that the <CODE>extendedToken()</CODE> is not called
234      * after extended token mode gets started
235      * by <CODE>OrdinaryTokenData.startExtendedToken()</CODE>
236      * in <CODE>ordinaryToken()</CODE> until another <CODE>fetchToken()</CODE>
237      * is done. The sequence is:<pre>
238      * fetchToken()
239      * ordinaryToken() -> possibly startExtendedToken()
240      * fetchToken()
241      * extendedToken()
242      * fetchToken()
243      * extendedToken()
244      * fetchToken()
245      * extendedToken() -> possibly finishExtendedToken(true)
246      * fetchToken()
247      * ordinaryToken()
248      * fetchToken()
249      * ordinaryToken()
250      * ...
251      * </pre>
252      *
253      * @param tokenData mutable compound info about the token
254      * that was previously fetched and about the extended token
255      * that is being put together.
256      * @param fetchedTokenExists true if the last fetched token
257      * was valid i.e. the <CODE>fetchToken()</CODE> returned true.
258      * False if there are no more tokens to fetch from the input.
259      * <BR>If the parameter is false then this method
260      * must mandatorily finish the extended token
261      * by calling <CODE>finishExtendedToken()</CODE>.
262      * @see ExtendedTokenData
263      */

264     protected void extendedToken(ExtendedTokenData tokenData,
265     boolean fetchedTokenExists) {
266         
267         int extendedTokenIntId = tokenData.getExtendedTokenIntId();
268         int tokenIntId = tokenData.getTokenIntId(); // fetched token id
269

270         switch (extendedTokenIntId) {
271             case CalcConstants.ERROR:
272                 if (!fetchedTokenExists
273                     || tokenIntId != CalcConstants.ERROR
274                 ) {
275                     /* The fetched token is not the error token
276                      * or there are no more tokens on the input.
277                      * Finish the extended token and exclude
278                      * the current token from it.
279                      */

280                     tokenData.finishExtendedToken(false);
281                 }
282                 break;
283
284             case CalcConstants.INCOMPLETE_ML_COMMENT:
285                 /* Three possibilities exist:
286                  * 1) fetchedTokenExists == true && tokenIntId == CalcConstants.ML_COMMENT
287                  * Lexer recognized end of the multi-line comment token
288                  * and returned CalcConstants.ML_COMMENT.
289                  *
290                  * In this case we change the extended token
291                  * to be CalcConstants.ML_COMMENT.
292                  *
293                  * 2) fetchedTokenExists == true && tokenIntId == CalcConstants.EOF
294                  * There was some additional text after "/*" but EOF was reached
295                  * before matching the closing star-slash and therefore
296                  * the token manager has thrown a lexical error wchich was catched in
297                  * the fetchToken() and reported as an artificial CalcConstants.EOF token.
298                  *
299                  * In this case we leave the extended token
300                  * to be CalcConstants.INCOMPLETE_ML_COMMENT.
301                  *
302                  * 3) fetchedTokenExists == false
303                  * There was just "/*" and no more characters after it (EOF was reached).
304                  *
305                  * In this case we leave the extended token
306                  * to be CalcConstants.INCOMPLETE_ML_COMMENT.
307                  */

308
309                 if (fetchedTokenExists && tokenIntId == CalcConstants.ML_COMMENT) { // Token exists
310
tokenData.updateExtendedTokenIntId(tokenIntId);
311                 }
312                 tokenData.finishExtendedToken(fetchedTokenExists);
313                 break;
314
315             default: // there should be no other extended tokens supported
316
throw new IllegalStateException JavaDoc("Unsupported extended token");
317
318         }
319         
320     }
321
322
323 }
324
Popular Tags