KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > java_cup > internal > lexer


1 package com.sun.java_cup.internal;
2
3 import com.sun.java_cup.internal.runtime.Symbol;
4 import java.util.Hashtable JavaDoc;
5
6 /** This class implements a small scanner (aka lexical analyzer or lexer) for
7  * the JavaCup specification. This scanner reads characters from standard
8  * input (System.in) and returns integers corresponding to the terminal
9  * number of the next Symbol. Once end of input is reached the EOF Symbol is
10  * returned on every subsequent call.<p>
11  * Symbols currently returned include: <pre>
12  * Symbol Constant Returned Symbol Constant Returned
13  * ------ ----------------- ------ -----------------
14  * "package" PACKAGE "import" IMPORT
15  * "code" CODE "action" ACTION
16  * "parser" PARSER "terminal" TERMINAL
17  * "non" NON "init" INIT
18  * "scan" SCAN "with" WITH
19  * "start" START "precedence" PRECEDENCE
20  * "left" LEFT "right" RIGHT
21  * "nonassoc" NONASSOC "%prec PRECENT_PREC
22  * [ LBRACK ] RBRACK
23  * ; SEMI
24  * , COMMA * STAR
25  * . DOT : COLON
26  * ::= COLON_COLON_EQUALS | BAR
27  * identifier ID {:...:} CODE_STRING
28  * "nonterminal" NONTERMINAL
29  * </pre>
30  * All symbol constants are defined in sym.java which is generated by
31  * JavaCup from parser.cup.<p>
32  *
33  * In addition to the scanner proper (called first via init() then with
34  * next_token() to get each Symbol) this class provides simple error and
35  * warning routines and keeps a count of errors and warnings that is
36  * publicly accessible.<p>
37  *
38  * This class is "static" (i.e., it has only static members and methods).
39  *
40  * @version last updated: 7/3/96
41  * @author Frank Flannery
42  */

43 public class lexer {
44
45   /*-----------------------------------------------------------*/
46   /*--- Constructor(s) ----------------------------------------*/
47   /*-----------------------------------------------------------*/
48
49   /** The only constructor is private, so no instances can be created. */
50   private lexer() { }
51
52   /*-----------------------------------------------------------*/
53   /*--- Static (Class) Variables ------------------------------*/
54   /*-----------------------------------------------------------*/
55
56   /** First character of lookahead. */
57   protected static int next_char;
58
59   /** Second character of lookahead. */
60   protected static int next_char2;
61
62   /** Second character of lookahead. */
63   protected static int next_char3;
64
65   /** Second character of lookahead. */
66   protected static int next_char4;
67
68   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
69
70   /** EOF constant. */
71   protected static final int EOF_CHAR = -1;
72
73   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
74
75   /** Table of keywords. Keywords are initially treated as identifiers.
76    * Just before they are returned we look them up in this table to see if
77    * they match one of the keywords. The string of the name is the key here,
78    * which indexes Integer objects holding the symbol number.
79    */

80   protected static Hashtable JavaDoc keywords = new Hashtable JavaDoc(23);
81
82   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
83
84   /** Table of single character symbols. For ease of implementation, we
85    * store all unambiguous single character Symbols in this table of Integer
86    * objects keyed by Integer objects with the numerical value of the
87    * appropriate char (currently Character objects have a bug which precludes
88    * their use in tables).
89    */

90   protected static Hashtable JavaDoc char_symbols = new Hashtable JavaDoc(11);
91
92   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
93
94   /** Current line number for use in error messages. */
95   protected static int current_line = 1;
96
97   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
98
99   /** Character position in current line. */
100   protected static int current_position = 1;
101
102   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
103
104   /** Character position in current line. */
105   protected static int absolute_position = 1;
106
107   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
108
109   /** Count of total errors detected so far. */
110   public static int error_count = 0;
111
112   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
113
114   /** Count of warnings issued so far */
115   public static int warning_count = 0;
116
117   /*-----------------------------------------------------------*/
118   /*--- Static Methods ----------------------------------------*/
119   /*-----------------------------------------------------------*/
120
121   /** Initialize the scanner. This sets up the keywords and char_symbols
122     * tables and reads the first two characters of lookahead.
123     */

124   public static void init() throws java.io.IOException JavaDoc
125     {
126       /* set up the keyword table */
127       keywords.put("package", new Integer JavaDoc(sym.PACKAGE));
128       keywords.put("import", new Integer JavaDoc(sym.IMPORT));
129       keywords.put("code", new Integer JavaDoc(sym.CODE));
130       keywords.put("action", new Integer JavaDoc(sym.ACTION));
131       keywords.put("parser", new Integer JavaDoc(sym.PARSER));
132       keywords.put("terminal", new Integer JavaDoc(sym.TERMINAL));
133       keywords.put("non", new Integer JavaDoc(sym.NON));
134       keywords.put("nonterminal",new Integer JavaDoc(sym.NONTERMINAL));// [CSA]
135
keywords.put("init", new Integer JavaDoc(sym.INIT));
136       keywords.put("scan", new Integer JavaDoc(sym.SCAN));
137       keywords.put("with", new Integer JavaDoc(sym.WITH));
138       keywords.put("start", new Integer JavaDoc(sym.START));
139       keywords.put("precedence", new Integer JavaDoc(sym.PRECEDENCE));
140       keywords.put("left", new Integer JavaDoc(sym.LEFT));
141       keywords.put("right", new Integer JavaDoc(sym.RIGHT));
142       keywords.put("nonassoc", new Integer JavaDoc(sym.NONASSOC));
143
144       /* set up the table of single character symbols */
145       char_symbols.put(new Integer JavaDoc(';'), new Integer JavaDoc(sym.SEMI));
146       char_symbols.put(new Integer JavaDoc(','), new Integer JavaDoc(sym.COMMA));
147       char_symbols.put(new Integer JavaDoc('*'), new Integer JavaDoc(sym.STAR));
148       char_symbols.put(new Integer JavaDoc('.'), new Integer JavaDoc(sym.DOT));
149       char_symbols.put(new Integer JavaDoc('|'), new Integer JavaDoc(sym.BAR));
150       char_symbols.put(new Integer JavaDoc('['), new Integer JavaDoc(sym.LBRACK));
151       char_symbols.put(new Integer JavaDoc(']'), new Integer JavaDoc(sym.RBRACK));
152
153       /* read two characters of lookahead */
154       next_char = System.in.read();
155       if (next_char == EOF_CHAR) {
156     next_char2 = EOF_CHAR;
157         next_char3 = EOF_CHAR;
158         next_char4 = EOF_CHAR;
159       } else {
160     next_char2 = System.in.read();
161     if (next_char2 == EOF_CHAR) {
162       next_char3 = EOF_CHAR;
163       next_char4 = EOF_CHAR;
164     } else {
165       next_char3 = System.in.read();
166       if (next_char3 == EOF_CHAR) {
167         next_char4 = EOF_CHAR;
168       } else {
169         next_char4 = System.in.read();
170       }
171     }
172       }
173     }
174
175   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
176
177   /** Advance the scanner one character in the input stream. This moves
178    * next_char2 to next_char and then reads a new next_char2.
179    */

180   protected static void advance() throws java.io.IOException JavaDoc
181     {
182       int old_char;
183
184       old_char = next_char;
185       next_char = next_char2;
186       if (next_char == EOF_CHAR) {
187     next_char2 = EOF_CHAR;
188         next_char3 = EOF_CHAR;
189     next_char4 = EOF_CHAR;
190       } else {
191     next_char2 = next_char3;
192     if (next_char2 == EOF_CHAR) {
193       next_char3 = EOF_CHAR;
194       next_char4 = EOF_CHAR;
195     } else {
196       next_char3 = next_char4;
197       if (next_char3 == EOF_CHAR) {
198         next_char4 = EOF_CHAR;
199       } else {
200         next_char4 = System.in.read();
201       }
202     }
203       }
204
205       /* count this */
206       absolute_position++;
207       current_position++;
208       if (old_char == '\n')
209     {
210       current_line++;
211       current_position = 1;
212     }
213     }
214
215   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
216
217   /** Emit an error message. The message will be marked with both the
218    * current line number and the position in the line. Error messages
219    * are printed on standard error (System.err).
220    * @param message the message to print.
221    */

222   public static void emit_error(String JavaDoc message)
223     {
224       System.err.println("Error at " + current_line + "(" + current_position +
225              "): " + message);
226       error_count++;
227     }
228
229   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
230
231   /** Emit a warning message. The message will be marked with both the
232    * current line number and the position in the line. Messages are
233    * printed on standard error (System.err).
234    * @param message the message to print.
235    */

236   public static void emit_warn(String JavaDoc message)
237     {
238       System.err.println("Warning at " + current_line + "(" + current_position +
239              "): " + message);
240       warning_count++;
241     }
242
243   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
244
245   /** Determine if a character is ok to start an id.
246    * @param ch the character in question.
247    */

248   protected static boolean id_start_char(int ch)
249     {
250       /* allow for % in identifiers. a hack to allow my
251      %prec in. Should eventually make lex spec for this
252      frankf */

253       return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
254          (ch == '_');
255
256       // later need to deal with non-8-bit chars here
257
}
258
259   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
260
261   /** Determine if a character is ok for the middle of an id.
262    * @param ch the character in question.
263    */

264   protected static boolean id_char(int ch)
265     {
266       return id_start_char(ch) || (ch >= '0' && ch <= '9');
267     }
268
269   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
270
271   /** Try to look up a single character symbol, returns -1 for not found.
272    * @param ch the character in question.
273    */

274   protected static int find_single_char(int ch)
275     {
276       Integer JavaDoc result;
277
278       result = (Integer JavaDoc)char_symbols.get(new Integer JavaDoc((char)ch));
279       if (result == null)
280     return -1;
281       else
282     return result.intValue();
283     }
284
285   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
286
287   /** Handle swallowing up a comment. Both old style C and new style C++
288    * comments are handled.
289    */

290   protected static void swallow_comment() throws java.io.IOException JavaDoc
291     {
292       /* next_char == '/' at this point */
293
294       /* is it a traditional comment */
295       if (next_char2 == '*')
296     {
297       /* swallow the opener */
298       advance(); advance();
299
300       /* swallow the comment until end of comment or EOF */
301       for (;;)
302         {
303           /* if its EOF we have an error */
304           if (next_char == EOF_CHAR)
305         {
306           emit_error("Specification file ends inside a comment");
307           return;
308         }
309
310           /* if we can see the closer we are done */
311           if (next_char == '*' && next_char2 == '/')
312         {
313           advance();
314           advance();
315           return;
316         }
317
318           /* otherwise swallow char and move on */
319           advance();
320         }
321     }
322
323       /* is its a new style comment */
324       if (next_char2 == '/')
325     {
326       /* swallow the opener */
327       advance(); advance();
328
329       /* swallow to '\n', '\f', or EOF */
330       while (next_char != '\n' && next_char != '\f' && next_char!=EOF_CHAR)
331         advance();
332
333       return;
334
335     }
336
337       /* shouldn't get here, but... if we get here we have an error */
338       emit_error("Malformed comment in specification -- ignored");
339       advance();
340     }
341
342   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
343
344   /** Swallow up a code string. Code strings begin with "{:" and include
345       all characters up to the first occurrence of ":}" (there is no way to
346       include ":}" inside a code string). The routine returns a String
347       object suitable for return by the scanner.
348    */

349   protected static Symbol do_code_string() throws java.io.IOException JavaDoc
350     {
351       StringBuffer JavaDoc result = new StringBuffer JavaDoc();
352
353       /* at this point we have lookahead of "{:" -- swallow that */
354       advance(); advance();
355
356       /* save chars until we see ":}" */
357       while (!(next_char == ':' && next_char2 == '}'))
358     {
359       /* if we have run off the end issue a message and break out of loop */
360       if (next_char == EOF_CHAR)
361         {
362           emit_error("Specification file ends inside a code string");
363           break;
364         }
365
366       /* otherwise record the char and move on */
367       result.append(new Character JavaDoc((char)next_char));
368       advance();
369     }
370
371       /* advance past the closer and build a return Symbol */
372       advance(); advance();
373       return new Symbol(sym.CODE_STRING, result.toString());
374     }
375
376   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
377
378   /** Process an identifier. Identifiers begin with a letter, underscore,
379    * or dollar sign, which is followed by zero or more letters, numbers,
380    * underscores or dollar signs. This routine returns a String suitable
381    * for return by the scanner.
382    */

383   protected static Symbol do_id() throws java.io.IOException JavaDoc
384     {
385       StringBuffer JavaDoc result = new StringBuffer JavaDoc();
386       String JavaDoc result_str;
387       Integer JavaDoc keyword_num;
388       char buffer[] = new char[1];
389
390       /* next_char holds first character of id */
391       buffer[0] = (char)next_char;
392       result.append(buffer,0,1);
393       advance();
394
395       /* collect up characters while they fit in id */
396       while(id_char(next_char))
397     {
398           buffer[0] = (char)next_char;
399       result.append(buffer,0,1);
400       advance();
401     }
402
403       /* extract a string and try to look it up as a keyword */
404       result_str = result.toString();
405       keyword_num = (Integer JavaDoc)keywords.get(result_str);
406
407       /* if we found something, return that keyword */
408       if (keyword_num != null)
409     return new Symbol(keyword_num.intValue());
410
411       /* otherwise build and return an id Symbol with an attached string */
412       return new Symbol(sym.ID, result_str);
413     }
414
415   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
416
417   /** Return one Symbol. This is the main external interface to the scanner.
418    * It consumes sufficient characters to determine the next input Symbol
419    * and returns it. To help with debugging, this routine actually calls
420    * real_next_token() which does the work. If you need to debug the
421    * parser, this can be changed to call debug_next_token() which prints
422    * a debugging message before returning the Symbol.
423    */

424   public static Symbol next_token() throws java.io.IOException JavaDoc
425     {
426       return real_next_token();
427     }
428
429   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
430
431   /** Debugging version of next_token(). This routine calls the real scanning
432    * routine, prints a message on System.out indicating what the Symbol is,
433    * then returns it.
434    */

435   public static Symbol debug_next_token() throws java.io.IOException JavaDoc
436     {
437       Symbol result = real_next_token();
438       System.out.println("# next_Symbol() => " + result.sym);
439       return result;
440     }
441
442   /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
443
444   /** The actual routine to return one Symbol. This is normally called from
445    * next_token(), but for debugging purposes can be called indirectly from
446    * debug_next_token().
447    */

448   protected static Symbol real_next_token() throws java.io.IOException JavaDoc
449     {
450       int sym_num;
451
452       for (;;)
453     {
454       /* look for white space */
455       if (next_char == ' ' || next_char == '\t' || next_char == '\n' ||
456           next_char == '\f' || next_char == '\r')
457         {
458           /* advance past it and try the next character */
459           advance();
460           continue;
461         }
462
463       /* look for a single character symbol */
464       sym_num = find_single_char(next_char);
465       if (sym_num != -1)
466         {
467           /* found one -- advance past it and return a Symbol for it */
468           advance();
469           return new Symbol(sym_num);
470         }
471
472       /* look for : or ::= */
473       if (next_char == ':')
474         {
475           /* if we don't have a second ':' return COLON */
476           if (next_char2 != ':')
477         {
478           advance();
479           return new Symbol(sym.COLON);
480         }
481
482           /* move forward and look for the '=' */
483           advance();
484           if (next_char2 == '=')
485         {
486           advance(); advance();
487           return new Symbol(sym.COLON_COLON_EQUALS);
488         }
489           else
490         {
491           /* return just the colon (already consumed) */
492           return new Symbol(sym.COLON);
493         }
494         }
495
496       /* find a "%prec" string and return it. otherwise, a '%' was found,
497          which has no right being in the specification otherwise */

498       if (next_char == '%') {
499         advance();
500         if ((next_char == 'p') && (next_char2 == 'r') && (next_char3 == 'e') &&
501         (next_char4 == 'c')) {
502           advance();
503           advance();
504           advance();
505           advance();
506           return new Symbol(sym.PERCENT_PREC);
507         } else {
508           emit_error("Found extraneous percent sign");
509         }
510       }
511
512       /* look for a comment */
513       if (next_char == '/' && (next_char2 == '*' || next_char2 == '/'))
514         {
515           /* swallow then continue the scan */
516           swallow_comment();
517           continue;
518         }
519
520       /* look for start of code string */
521       if (next_char == '{' && next_char2 == ':')
522         return do_code_string();
523
524       /* look for an id or keyword */
525       if (id_start_char(next_char)) return do_id();
526
527       /* look for EOF */
528       if (next_char == EOF_CHAR) return new Symbol(sym.EOF);
529
530       /* if we get here, we have an unrecognized character */
531       emit_warn("Unrecognized character '" +
532         new Character JavaDoc((char)next_char) + "'(" + next_char +
533         ") -- ignored");
534
535       /* advance past it */
536       advance();
537     }
538     }
539
540   /*-----------------------------------------------------------*/
541 }
542
543
Popular Tags