KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > fri > patterns > interpreter > parsergenerator > lexer > LexerImpl


1 package fri.patterns.interpreter.parsergenerator.lexer;
2
3 import java.util.*;
4 import java.io.*;
5 import fri.patterns.interpreter.parsergenerator.Lexer;
6 import fri.patterns.interpreter.parsergenerator.Token;
7
8 /**
9     This Lexer must be created using LexerBuilder. It knows token and ignored terminals.
10     To get this Lexer working the <i>setTerminals()</i> call must be called at least once.
11     When using the Lexer standalone, the client must do this, else the Parser will
12     call that method.
13     <p>
14     This lexer can be reused, but it can not be loaded with other syntaxes after it
15     has been built for one.
16     
17     @author (c) 2002, Fritz Ritzberger
18 */

19
20 public class LexerImpl implements
21     Lexer,
22     StrategyFactoryMethod,
23     Serializable
24 {
25     protected Strategy strategy;
26     private List ignoredSymbols;
27     private Map charConsumers;
28     private transient InputText input;
29     private List listeners;
30     private transient boolean debug;
31         
32     /**
33         Creates a Lexer from token- and ignored symbols, and a map of character consumers (built by LexerBuilder).
34         @param ignoredSymbols list of Strings containing ignored symbols to scan. These are NOT enclosed in `backquotes` like tokens.
35         @param charConsumers map with key = nonterminal and value = Consumer.
36     */

37     public LexerImpl(List ignoredSymbols, Map charConsumers) {
38         setConsumers(ignoredSymbols, charConsumers);
39     }
40     
41     /** Do-nothing constructor for subclasses (currently unused). */
42     protected LexerImpl() {
43     }
44
45     /** Implements Lexer. Adds the passed token listener to listener list. */
46     public void addTokenListener(Lexer.TokenListener tokenListener) {
47         if (listeners == null)
48             listeners = new ArrayList(1);
49         listeners.add(tokenListener);
50     }
51     /** Implements Lexer. Removes the passed token listener from listener list. */
52     public void removeTokenListener(Lexer.TokenListener tokenListener) {
53         if (listeners != null)
54             listeners.remove(tokenListener);
55     }
56     
57
58     private void setConsumers(List ignoredSymbols, Map charConsumers) {
59         this.charConsumers = charConsumers; // store for check at setTerminals()
60
this.ignoredSymbols = ignoredSymbols; // need to know which token should be ignored
61

62         for (int i = 0; ignoredSymbols != null && i < ignoredSymbols.size(); i++) { // ignored symbols will not be passed by the parser
63
String JavaDoc sym = (String JavaDoc) ignoredSymbols.get(i);
64             Consumer cc = (Consumer) charConsumers.get(sym);
65             ensureStrategy().addIgnoringConsumer(sym, cc);
66         }
67         
68         // propagate this LexerImpl as StrategyFactoryMethod to ConsumerAlternatives
69
for (Iterator it = charConsumers.entrySet().iterator(); it.hasNext(); ) {
70             Consumer c = (Consumer) ((Map.Entry) it.next()).getValue();
71             if (c instanceof ConsumerAlternatives) {
72                 ((ConsumerAlternatives) c).setStrategyFactoryMethod(this);
73             }
74         }
75     }
76
77     private Strategy ensureStrategy() {
78         if (strategy == null)
79             strategy = newStrategy();
80         return strategy;
81     }
82     
83     /** Implements StrategyFactoryMethod. To be overridden to create a derived Strategy implementation. */
84     public Strategy newStrategy() {
85         return new Strategy();
86     }
87     
88     /**
89         When false, the sort order (significance) of scan items without fixed start character decide what token is returned.
90         When true (default), the scan item (without fixed start character) that scnas longest wins.
91     */

92     public void setCompeteForLongestInput(boolean competeForLongestInput) {
93         ensureStrategy().setCompeteForLongestInput(competeForLongestInput);
94     }
95     
96
97     // implementing Lexer
98

99     /**
100         Implements Lexer: set the input to be scanned. If text is InputStream, no Reader
101         will be used (characters will not be converted).
102         @param text text to scan, as String, StringBuffer, File, InputStream, Reader.
103     */

104     public void setInput(Object JavaDoc text)
105         throws IOException
106     {
107         input = new InputText(text);
108     }
109
110     /**
111         Implements Lexer: Parser call to pass all tokens symbols (all enclosed in `backquote`) and literals ("xyz").
112         @param terminals List of String containing "literals" and `lexertokens`.
113     */

114     public void setTerminals(List terminals) {
115         for (int i = 0; i < terminals.size(); i++) {
116             String JavaDoc symbol = (String JavaDoc) terminals.get(i);
117             
118             // check if it is a terminal as this is a public call
119
if (symbol.length() <= 2 || Token.isTerminal(symbol) == false)
120                 throw new IllegalArgumentException JavaDoc("Terminals must be enclosed within quotes: "+symbol);
121             
122             String JavaDoc text = symbol.substring(1, symbol.length() - 1); // remove quotes
123

124             if (ensureStrategy().hasTerminal(symbol) == false) { // could have been called for second time
125
if (symbol.charAt(0) == Token.COMMAND_QUOTE) { // is a scan terminal covered by a Consumer
126
Consumer cc = (Consumer) charConsumers.get(text);
127                     if (cc == null)
128                         throw new IllegalArgumentException JavaDoc("Lexer token is not among character consumers: "+text);
129                     else
130                         ensureStrategy().addTokenConsumer(symbol, cc);
131                 }
132                 else {
133                     ensureStrategy().addTokenConsumer(symbol, new Consumer(text));
134                 }
135             }
136         } // end for
137

138         if (debug)
139             System.err.println("StrategyList is:\n"+strategy);
140     }
141
142
143     /** Implements Lexer: Does nothing as no states are stored. This Lexer can not be loaded with new syntaxes. */
144     public void clear() {
145     }
146
147
148
149     /**
150         This is an optional functionality of Lexer. It is <b>NOT</b> called by the Parser.
151         It can be used for heuristic reading from an input (not knowing if there is more input
152         after the token was read).
153         <p />
154         The passed LexerSemantic will receive every matched rule (top-down) together with
155         its ResultTree. See <i>lex()</i> for details.
156         
157         @param lexerSemantic the LexerSemantic to be called with every evaluated Rule and its lexing ResultTree.
158         @return a Token with a terminal symbol and its instance text, or a Token with null symbol for error.
159     */

160     public Token getNextToken(LexerSemantic lexerSemantic)
161         throws IOException
162     {
163         return getNextToken(lexerSemantic, null);
164     }
165     
166     /**
167         Implements Lexer: returns the next token from input, or EPSILON when no more input.
168         This is called by the Parser to get the next syntax token from input.
169         When returned <i>token.symbol</i> is null, no input could be recognized (ERROR).
170         @param expectedTokenSymbols contains the expected String token symbols (in keys),
171                 can be null when no Parser drives this Lexer.
172         @return a Token with a terminal symbol and its instance text, or a Token with null symbol for error.
173     */

174     public Token getNextToken(Map expectedTokenSymbols)
175         throws IOException
176     {
177         return getNextToken(null, expectedTokenSymbols);
178     }
179     
180     private Token getNextToken(LexerSemantic lexerSemantic, Map expectedTokenSymbols)
181         throws IOException
182     {
183         if (input == null)
184             throw new IllegalStateException JavaDoc("Lexer has no input, call setInput(...).");
185
186         Token.Address start = new Token.Address(input.getScanLine(), input.getScanColumn(), input.getScanOffset());
187         int c = input.peek(); // read lookahead
188
if (c == Input.EOF)
189             return createToken(Token.EPSILON, null, new Token.Range(start, start));
190
191         // not EOF, there must be a lexer item or error
192
Strategy.Item item = getNextLexerItem(expectedTokenSymbols, c);
193
194         if (item != null) { // successful scan
195
if (ignoredSymbols != null && ignoredSymbols.indexOf(item.getSymbol()) >= 0) {
196                 if (listeners != null && listeners.size() > 0) // creating a token takes time, do it only when listeners are present
197
fireTokenReceived(createToken(item.getTokenIdentifier(), item.getResultTree(), lexerSemantic), true);
198                 return getNextToken(expectedTokenSymbols);
199             }
200             else {
201                 Token token = createToken(item.getTokenIdentifier(), item.getResultTree(), lexerSemantic);
202                 fireTokenReceived(token, false);
203                 return token;
204             }
205         }
206
207         // error state, return an error Token with null symbol
208
Token.Address end = new Token.Address(input.getReadLine(), input.getReadColumn(), input.getScanOffset());
209         return createToken(null, input.getUnreadText(), new Token.Range(start, end));
210     }
211     
212     // strategic scan of next item
213
private Strategy.Item getNextLexerItem(Map expectedTokenSymbols, int lookahead)
214         throws IOException
215     {
216         if (strategy == null)
217             throw new IllegalStateException JavaDoc("Lexer has no terminals, call setTerminals(syntaxSeparation.getTokenSymbols()).");
218
219         Strategy.Item item = strategy.consume(input, lookahead, expectedTokenSymbols);
220         
221         if (item != null)
222             input.resolveBuffer(); // forget old contents
223

224         return item;
225     }
226     
227     // calls the token listeners with scanned token
228
private void fireTokenReceived(Token token, boolean ignored) {
229         for (int i = 0; listeners != null && i < listeners.size(); i++)
230             ((Lexer.TokenListener) listeners.get(i)).tokenReceived(token, ignored);
231     }
232
233     /** Token factory method. Can be overridden to access the lexing ResultTree. Delegates to createToken(tokenIdentifier, text, range). */
234     protected Token createToken(String JavaDoc tokenIdentifier, ResultTree result, LexerSemantic lexerSemantic) {
235         if (lexerSemantic != null)
236             loopResultTree(result, lexerSemantic);
237         return createToken(tokenIdentifier, result.toString(), result.getRange()); // toString() takes time as it builds the token text
238
}
239     
240     /** Token factory method. Can be overridden to convert token.text to some Java object. */
241     protected Token createToken(String JavaDoc tokenIdentifier, String JavaDoc text, Token.Range range) {
242         return new Token(tokenIdentifier, text, range);
243     }
244
245
246     /**
247         This is an optional functionality of Lexer. It is <b>NOT</b> called by the Parser.
248         It can be used to run a standalone Lexer with a LexerSemantic, processing a ready-scanned
249         syntax tree. Other than with Parser Semantic no value stack is available for LexerSemantic,
250         and all input will have been read when LexerSemantic is called with the built syntax tree.
251         <p />
252         The passed LexerSemantic will receive every matched rule (top-down) together with
253         its results ResultTree, containing the range within input.
254         ResultTree can be converted to text by calling <i>resultTree.toString()</i>.
255         <p />
256         This method evaluates the input using end-of-input like a parser, that means it returns
257         false if the input was either syntactically incorrect or EOF was not received when all rules
258         have been evaluated.
259         <p />
260         <b>MIND:</b> This method does not call any TokenListener, as the LexerSemantic is expected to
261             dispatch results!
262         
263         @param lexerSemantic the LexerSemantic to be called with every evaluated Rule and its lexing ResultTree.
264         @return true when lexer succeeded (input was syntactically ok), else false.
265     */

266     public boolean lex(LexerSemantic lexerSemantic)
267         throws IOException
268     {
269         int c = input.peek();
270         boolean eof = (c == Input.EOF);
271         boolean error = eof;
272         
273         if (error == false) {
274             Strategy.Item item = getNextLexerItem(null, c);
275             error = (item == null || item.getTokenIdentifier() == null);
276
277             if (error == false && lexerSemantic != null)
278                 loopResultTree(item.getResultTree(), lexerSemantic);
279
280             c = input.peek();
281             eof = (c == Input.EOF);
282             error = (eof == false);
283         }
284         
285         if (error) {
286             dump(System.err);
287             System.err.println("Could not process character '"+(char)c+"' (int "+c+"), at line/column "+input.getScanLine()+"/"+input.getScanColumn()+", at offset "+input.getScanOffset());
288         }
289
290         return error == false;
291     }
292
293     /**
294         After top-down lexing this method is called to dispatch all results. Can be overridden to change dispatch logic.
295         This method calls itself recursively with all result tree children. Nonterminals starting with "_" are ignored
296         by default, as this marks artificial rules.
297         @param result lexer result, returns text on getText().
298         @param semantic semantic that dispatches the lexer results.
299         @return a Token with the range and return of the Semantic call for this Rule/ResultTree.
300     */

301     protected void loopResultTree(ResultTree result, LexerSemantic lexerSemantic) {
302         Set wantedNonterminals = lexerSemantic.getWantedNonterminals();
303         Set ignoredNonterminals = lexerSemantic.getIgnoredNonterminals();
304         String JavaDoc nonterminal = result.getRule().getNonterminal();
305         
306         if (nonterminal.startsWith(Token.ARTIFICIAL_NONTERMINAL_START_CHARACTER) == false &&
307                 (wantedNonterminals == null || wantedNonterminals.contains(nonterminal)) &&
308                 (ignoredNonterminals == null || ignoredNonterminals.contains(nonterminal) == false))
309         {
310             lexerSemantic.ruleEvaluated(result.getRule(), result);
311         }
312         
313         for (int i = 0; i < result.getChildCount(); i++) {
314             Object JavaDoc child = result.getChild(i);
315             if (child instanceof ResultTree)
316                 loopResultTree((ResultTree) child, lexerSemantic);
317         }
318     }
319
320
321
322     // debug methods
323

324     /** Implements Lexer: Set debug on to output information about scanned tokens. */
325     public void setDebug(boolean debug) {
326         this.debug = debug;
327     }
328
329     /** Returns the current line, as far as read. */
330     public String JavaDoc getLineText() {
331         return input.getLine();
332     }
333     
334     /** Returns the number of the current line, 1-n. */
335     public int getLine() {
336         return input.getReadLine();
337     }
338
339     /** Returns the position within the current line, 0-n. */
340     public int getColumn() {
341         return input.getReadColumn();
342     }
343     
344     /** Returns the offset read so far from input. This is an absolute offset, including newlines. */
345     public int getOffset() {
346         return input.getScanOffset();
347     }
348     
349
350     /** Outputs current and previous line, with line numbers. Call this on ERROR. */
351     public void dump(PrintStream out) {
352         int lineNr = input.getReadLine();
353         String JavaDoc line = getLineText();
354         
355         if (lineNr > 1) {
356             String JavaDoc prevLine = input.getPreviousLine();
357             out.print((lineNr - 1)+":\t");
358             out.println(prevLine);
359         }
360         
361         out.print(lineNr+":\t");
362         out.println(line);
363
364         int nrLen = Integer.toString(lineNr).length();
365         for (int i = 0; i < nrLen; i++)
366             out.print(" ");
367
368         out.print("\t");
369
370         int errPos = input.getReadColumn();
371
372         for (int i = 0; i < errPos && i < line.length(); i++)
373             if (line.charAt(i) == '\t')
374                 out.print("\t");
375             else
376                 out.print(" ");
377
378         out.println("^");
379     }
380
381 }
382
Popular Tags