KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > jasmin > Scanner


1 /**
2     Modifications Copyright (C) 1999 Raja Vallee-Rai (rvalleerai@sable.mcgill.ca)
3     All rights reserved.
4    
5     Changes:
6         - Added \\ to the list of possible escape characters for Strings.
7         - March 15, 1999: $ does no longer significant substitution
8 */

9
10 /* --- Copyright Jonathan Meyer 1996. All rights reserved. -----------------
11  > File: jasmin/src/jasmin/Scanner.java
12  > Purpose: Tokenizer for Jasmin
13  > Author: Jonathan Meyer, 10 July 1996
14  */

15
16 /* Scanner.java - class for tokenizing Jasmin files. This is rather
17  * cheap and cheerful.
18 */

19
20 package jasmin;
21
22 import jas.*;
23 import java_cup.runtime.*;
24 import java.util.*;
25 import java.io.InputStream JavaDoc;
26
27 class Scanner implements java_cup.runtime.Scanner {
28     InputStream JavaDoc inp;
29
30     // single lookahead character
31
int next_char;
32
33     // temporary buffer
34
char chars[];
35     char secondChars[];
36     char[] unicodeBuffer;
37         
38     // true if we have not yet emitted a SEP ('\n') token. This is a bit
39
// of a hack so to strip out multiple newlines at the start of the file
40
// and replace them with a single SEP token. (for some reason I can't
41
// write the CUP grammar to accept multiple newlines at the start of the
42
// file)
43
boolean is_first_sep;
44
45     // Whitespace characters
46
static final String JavaDoc WHITESPACE = " \n\t\r";
47
48     // Separator characters
49
static final String JavaDoc SEPARATORS = WHITESPACE + ":=";
50
51
52     // used for error reporting to print out where an error is on the line
53
public int line_num, char_num, token_line_num;
54     public StringBuffer JavaDoc line;
55
56     // used by the .set directive to define new variables.
57
public Hashtable dict = new Hashtable();
58
59     //
60
// returns true if a character code is a whitespace character
61
//
62
protected static boolean whitespace(int c) {
63         return (WHITESPACE.indexOf(c) != -1);
64     }
65
66     //
67
// returns true if a character code is a separator character
68
//
69
protected static boolean separator(int c) {
70         return (SEPARATORS.indexOf(c) != -1);
71     }
72
73
74     //
75
// Advanced the input by one character
76
//
77
protected void advance() throws java.io.IOException JavaDoc
78     {
79         next_char = inp.read();
80         if (next_char == '\n') {
81             // a new line
82
line_num++;
83             char_num = 0;
84             line.setLength(0);
85         } else {
86             line.append((char)next_char);
87             char_num++;
88         }
89     }
90
91     //
92
// initialize the scanner
93
//
94
final static int BIGNUM=65000;
95     public Scanner(InputStream JavaDoc i) throws java.io.IOException JavaDoc
96     {
97     inp = i;
98         line_num = 1;
99         char_num = 0;
100         line = new StringBuffer JavaDoc();
101         chars = new char[BIGNUM];
102         secondChars = new char[BIGNUM];
103         unicodeBuffer = new char[4];
104         is_first_sep = true;
105         advance();
106     }
107
108     int readOctal(int firstChar) throws java.io.IOException JavaDoc {
109         int d1, d2, d3;
110         d1 = firstChar;
111         advance();
112         d2 = next_char;
113         advance();
114         d3 = next_char;
115         return ((d1-'0')&7) * 64 + ((d2-'0')&7) * 8 + ((d3-'0')&7);
116     }
117
118     //
119
// recognize and return the next complete symbol
120
//
121
public Symbol next_token()
122                 throws java.io.IOException JavaDoc, jasError
123     {
124
125         token_line_num = line_num;
126
127         for (;;) {
128             switch (next_char) {
129
130             case ';':
131                 // a comment
132
do { advance(); } while (next_char != '\n');
133
134             case '\n':
135                 // return single SEP token (skip multiple newlines
136
// interspersed with whitespace or comments)
137
for (;;) {
138                     do { advance(); } while (whitespace(next_char));
139                     if (next_char == ';') {
140                         do { advance(); } while (next_char != '\n');
141                     } else {
142                         break;
143                     }
144                 }
145                 if (is_first_sep) {
146                     return next_token();
147                 }
148                 token_line_num = line_num;
149                 return new Symbol(sym.SEP);
150
151             case '0': case '1': case '2': case '3': case '4':
152             case '5': case '6': case '7': case '8': case '9':
153             case '-': case '+':
154             case '.': // a number
155
{
156                     int pos = 0;
157
158                     // record that we have found first item
159
is_first_sep = false;
160
161                     chars[0] = (char)next_char;
162                     pos++;
163                     for (;;) {
164                         advance();
165                         if (separator(next_char)) {
166                             break;
167                         }
168             try {
169               chars[pos] = (char)next_char;
170             } catch (ArrayIndexOutOfBoundsException JavaDoc abe) {
171               char[] tmparray = new char[chars.length*2];
172               System.arraycopy(chars, 0,
173                        tmparray, 0,
174                        chars.length);
175               chars = tmparray;
176               chars[pos] = (char)next_char;
177             }
178                         pos++;
179                     }
180                     String JavaDoc str = new String JavaDoc(chars, 0, pos);
181                     Symbol tok;
182
183                     if(str.equals("+DoubleInfinity"))
184                         return new Symbol(sym.Num, new Double JavaDoc(1.0/0.0));
185                     
186                     if(str.equals("+DoubleNaN"))
187                         return new Symbol(sym.Num, new Double JavaDoc(0.0d/0.0));
188                     
189                     if(str.equals("+FloatNaN"))
190                         return new Symbol(sym.Num, new Float JavaDoc(0.0f/0.0));
191                         
192                     if(str.equals("-DoubleInfinity"))
193                         return new Symbol(sym.Num, new Double JavaDoc(-1.0/0.0));
194                     
195                     if(str.equals("+FloatInfinity"))
196                         return new Symbol(sym.Num, new Float JavaDoc(1.0f/0.0f));
197                         
198                     if(str.equals("-FloatInfinity"))
199                         return new Symbol(sym.Num, new Float JavaDoc(-1.0f/0.0f));
200                     
201                      
202                             
203                     // This catches directives like ".method"
204
if ((tok = ReservedWords.get(str)) != null) {
205                         return tok;
206                     }
207
208                     Number JavaDoc num;
209                     try {
210                         num = ScannerUtils.convertNumber(str);
211                     } catch (NumberFormatException JavaDoc e) {
212                         if (chars[0] == '.') {
213                             throw new jasError("Unknown directive or badly formed number.");
214                         } else {
215                             throw new jasError("Badly formatted number");
216                         }
217                     }
218
219                     if (num instanceof Integer JavaDoc) {
220                         return new Symbol(sym.Int, new Integer JavaDoc(num.intValue()));
221                     } else {
222                         return new Symbol(sym.Num, num);
223                     }
224                 }
225
226             case '"': // quoted strings
227
{
228                     int pos = 0;
229
230                     is_first_sep = false;
231
232                     for (;;) {
233                         advance();
234                         if (next_char == '\\') {
235                             advance();
236                             switch (next_char) {
237                             case 'n': next_char = '\n'; break;
238                             case 'r': next_char = '\r'; break;
239                             case 't': next_char = '\t'; break;
240                             case 'f': next_char = '\f'; break;
241                             case 'b': next_char = '\b'; break;
242                             case 'u':
243                             {
244                                 advance();
245                                 unicodeBuffer[0] = (char) next_char;
246                                 advance();
247                                 unicodeBuffer[1] = (char) next_char;
248                                 advance();
249                                 unicodeBuffer[2] = (char) next_char;
250                                 advance();
251                                 unicodeBuffer[3] = (char) next_char;
252                                 
253                                 // System.out.println(unicodeBuffer[0] + ":" + unicodeBuffer[1] + ":" + unicodeBuffer[2] + ":" + unicodeBuffer[3] + ":");
254

255                                 next_char = (char) Integer.parseInt(new String JavaDoc(unicodeBuffer, 0, 4), 16);
256                                 // System.out.println("value: " + next_char);
257
break;
258                             }
259                             case '"': next_char = '"'; break;
260                             case '\'': next_char = '\''; break;
261                             case '\\': next_char = '\\'; break;
262                             
263                             case '0': case '1': case '2': case '3': case '4':
264                             case '5': case '6': case '7':
265                 next_char = readOctal(next_char);
266                 break;
267                             default:
268                 throw new jasError("Bad backslash escape sequence");
269                             }
270                         } else if (next_char == '"') {
271                             break;
272                         }
273             
274             try {
275               chars[pos] = (char)next_char;
276             } catch (ArrayIndexOutOfBoundsException JavaDoc abe) {
277               char[] tmparray = new char[chars.length*2];
278               System.arraycopy(chars, 0,
279                        tmparray, 0,
280                        chars.length);
281               chars = tmparray;
282               chars[pos] = (char)next_char;
283             }
284                         pos++;
285                     }
286                     advance(); // skip close quote
287
return new Symbol(sym.Str, new String JavaDoc(chars, 0, pos));
288                 }
289
290             case ' ':
291             case '\t':
292             case '\r': // whitespace
293
advance();
294                 break;
295
296             case '=': // EQUALS token
297
advance();
298                 is_first_sep = false;
299                 return new Symbol(sym.EQ);
300
301             case ':': // COLON token
302
advance();
303                 is_first_sep = false;
304                 return new Symbol(sym.COLON);
305
306             case -1: // EOF token
307
is_first_sep = false;
308                 char_num = -1;
309                 line.setLength(0);
310                 return new Symbol(sym.EOF);
311
312             default:
313                 {
314                     // read up until a separatorcharacter
315

316                     int pos = 0;
317                     int secondPos = 0;
318                     chars[0] = (char)next_char;
319                     is_first_sep = false;
320
321                     pos++;
322                     for (;;) {
323                         advance();
324                         if (separator(next_char)) {
325                             break;
326                         }
327             try {
328               chars[pos] = (char)next_char;
329             } catch (ArrayIndexOutOfBoundsException JavaDoc abe) {
330               char[] tmparray = new char[chars.length*2];
331               System.arraycopy(chars, 0,
332                        tmparray, 0,
333                        chars.length);
334               chars = tmparray;
335               chars[pos] = (char)next_char;
336             }
337                         pos++;
338                     }
339
340                     secondPos = 0;
341                     
342                     // Parse all the unicode escape sequences
343
for(int i = 0; i < pos; i++)
344                         {
345               if(chars[i] == '\\' && (i + 5) < pos &&
346                  chars[i+1] == 'u') {
347                 int intValue =
348                   Integer.parseInt(new String JavaDoc(chars, i+2, 4), 16);
349                           
350                 try {
351                   secondChars[secondPos] = (char) intValue;
352                 } catch (ArrayIndexOutOfBoundsException JavaDoc abe) {
353                   char[] tmparray =
354                 new char[secondChars.length*2];
355                   System.arraycopy(secondChars, 0,
356                            tmparray, 0,
357                            secondChars.length);
358                   secondChars = tmparray;
359                   secondChars[secondPos] = (char)intValue;
360                 }
361                 secondPos++;
362
363                 i += 5;
364               } else {
365                 try {
366                   secondChars[secondPos] = chars[i];
367                 } catch (ArrayIndexOutOfBoundsException JavaDoc abe) {
368                   char[] tmparray =
369                 new char[secondChars.length*2];
370                   System.arraycopy(secondChars, 0,
371                            tmparray, 0,
372                            secondChars.length);
373                   secondChars = tmparray;
374                   secondChars[secondPos] = chars[i];
375                 }
376                 secondPos++;
377               }
378                         }
379                         
380                     // convert the byte array into a String
381
String JavaDoc str = new String JavaDoc(secondChars, 0, secondPos);
382
383                     Symbol tok;
384                     if ((tok = ReservedWords.get(str)) != null) {
385                         // Jasmin keyword or directive
386
return tok;
387                     } else if (InsnInfo.contains(str)) {
388                         // its a JVM instruction
389
return new Symbol(sym.Insn, str);
390                     } /*else if (str.charAt(0) == '$') {
391                         // Perform variable substitution
392                         Object v;
393                         if ((v = dict.get(str.substring(1))) != null) {
394                             return ((Symbol)v);
395                         }
396                     } */
else {
397                         // Unrecognized string token (e.g. a classname)
398
return new Symbol(sym.Word, str);
399                     }
400
401                 } /* default */
402             } /* switch */
403         } /* for */
404     }
405
406 };
407
408 /* --- Revision History ---------------------------------------------------
409 --- Jonathan Meyer, Feb 8 1997
410     Converted to be non-static
411 --- Jonathan Meyer, Oct 30 1996
412     Added support for more \ escapes in quoted strings (including octals).
413 --- Jonathan Meyer, Oct 1 1996
414     Added .interface and .implements
415 --- Jonathan Meyer, July 25 1996
416     changed IN to IS. Added token_line_num, which is the line number of the
417     last token returned by next_token().
418 --- Jonathan Meyer, July 24 1996 added mods to recognize '\r' as whitespace.
419 */

420
Popular Tags