1 package gnu.ecmascript; 2 import gnu.mapping.*; 3 import gnu.expr.*; 4 import gnu.lists.Sequence; 5 import gnu.text.Char; 6 7 10 11 public class Lexer extends gnu.text.Lexer 12 { 13 private boolean prevWasCR = false; 14 15 public Lexer (InPort port) 16 { 17 super(port); 18 } 19 20 public final static Char lparenToken = Char.make('('); 21 public final static Char rparenToken = Char.make(')'); 22 public final static Char lbraceToken = Char.make('{'); 23 public final static Char rbraceToken = Char.make('}'); 24 public final static Char lbracketToken = Char.make('['); 25 public final static Char rbracketToken = Char.make(']'); 26 public final static Char dotToken = Char.make('.'); 27 public final static Char condToken = Char.make('?'); 28 public final static Char commaToken = Char.make(','); 29 public final static Char colonToken = Char.make(':'); 30 public final static Char equalToken = Char.make('='); 31 public final static Char tildeToken = Char.make('~'); 32 public final static Char notToken = Char.make('!'); 33 public final static Char semicolonToken = Char.make(';'); 34 public final static Object eolToken = Char.make('\n'); 35 public final static Object eofToken = Sequence.eofValue; 36 public final static Reserved elseToken 37 = new Reserved("else", Reserved.ELSE_TOKEN); 38 public final static Reserved newToken 39 = new Reserved("new", Reserved.NEW_TOKEN); 40 41 static java.util.Hashtable reserved; 42 static synchronized void initReserved() 43 { 44 if (reserved == null) 45 { 46 reserved = new java.util.Hashtable (20); 47 reserved.put("null", new QuoteExp(null)); 48 reserved.put("true", new QuoteExp(java.lang.Boolean.TRUE)); 49 reserved.put("false", new QuoteExp(java.lang.Boolean.FALSE)); 50 51 reserved.put("var", new Reserved("var", Reserved.VAR_TOKEN)); 52 reserved.put("if", new Reserved("if", Reserved.IF_TOKEN)); 53 reserved.put("while", new Reserved("while", Reserved.WHILE_TOKEN)); 54 reserved.put("for", new Reserved("for", Reserved.FOR_TOKEN)); 55 reserved.put("continue", 56 new Reserved("continue", Reserved.CONTINUE_TOKEN)); 57 reserved.put("break", new Reserved("break", Reserved.BREAK_TOKEN)); 58 reserved.put("return", new Reserved("return", Reserved.RETURN_TOKEN)); 59 reserved.put("with", new Reserved("with", Reserved.WITH_TOKEN)); 60 reserved.put("function", 61 new Reserved("function", Reserved.FUNCTION_TOKEN)); 62 reserved.put("this", new Reserved("this", Reserved.THIS_TOKEN)); 63 reserved.put("else", elseToken); 64 reserved.put("new", newToken); 65 } 66 } 67 public static Object checkReserved(String name) 68 { 69 if (reserved == null) 70 initReserved(); 71 return reserved.get(name); 72 } 73 74 public Double getNumericLiteral (int c) 75 throws java.io.IOException 76 { 77 int radix = 10; 78 if (c == '0') 79 { 80 c = read(); 81 if (c == 'x' || c == 'X') 82 { 83 radix = 16; 84 c = read(); 85 } 86 else if (c == '.' || c == 'e' || c == 'E') ; 87 else 88 radix = 8; 89 } 90 int i = port.pos; 91 if (c >= 0) 92 i--; port.pos = i; 94 long ival = Lexer.readDigitsInBuffer(port, radix); 95 boolean digit_seen = port.pos > i; 96 if (digit_seen && port.pos < port.limit) 97 { 98 c = port.buffer[port.pos]; 99 if (! Character.isLetterOrDigit((char) c) && c != '.') 100 { 101 double dval; 102 if (ival >= 0) 103 dval = (double) ival; 104 else dval = gnu.math.IntNum.valueOf(port.buffer, i, port.pos - i, 106 radix, false).doubleValue(); 107 return new Double (dval); 108 } 109 } 110 if (radix != 10) 111 error("invalid character in non-decimal number"); 112 StringBuffer str = new StringBuffer (20); 113 if (digit_seen) 114 str.append(port.buffer, i, port.pos - i); 115 116 117 int point_loc = -1; 118 int exp = 0; 119 boolean exp_seen = false; 120 for (;;) 121 { 122 c = port.read (); 123 if (Character.digit ((char)c, radix) >= 0) 124 { 125 digit_seen = true; 126 str.append ((char) c); 127 continue; 128 } 129 switch (c) 130 { 131 case '.': 132 if (point_loc >= 0) 133 error("duplicate '.' in number"); 134 else 135 { 136 point_loc = str.length (); 137 str.append ('.'); 138 } 139 continue; 140 case 'e': case 'E': 141 int next; 142 if (radix != 10 || !((next = port.peek ()) == '+' || next == '-' 143 || Character.digit ((char)next, 10) >= 0)) 144 break; 145 if (!digit_seen) 146 error("mantissa with no digits"); 147 exp = readOptionalExponent(); 148 exp_seen = true; 149 c = read(); 150 break; 151 } 152 break; 153 } 154 155 if (c >= 0) 156 port.unread(); 157 158 if (exp != 0) 159 { 160 str.append('e'); 161 str.append(exp); 162 } 163 return new Double (str.toString ()); 164 } 165 166 public String getStringLiteral (char quote) 167 throws java.io.IOException , gnu.text.SyntaxException 168 { 169 int i = port.pos; 170 int start = i; 171 int limit = port.limit; 172 char[] buffer = port.buffer; 173 char c; 174 for ( ; i < limit; i++) 175 { 176 c = buffer[i]; 177 if (c == quote) 178 { 179 port.pos = i+1; 180 return new String (buffer, start, i - start); 181 } 182 if (c == '\\' || c == '\n' || c == '\r') 183 break; 184 } 185 port.pos = i; 186 StringBuffer sbuf = new StringBuffer (); 187 sbuf.append(buffer, start, i - start); 188 for (;;) 189 { 190 int ch = port.read(); 191 if (ch == quote) 192 return sbuf.toString(); 193 if (ch < 0) 194 eofError("unterminated string literal"); 195 if (ch == '\n' || ch == '\r') 196 fatal("string literal not terminated before end of line"); 197 if (ch == '\\') 198 { 199 ch = port.read(); 200 int val; 201 switch (ch) 202 { 203 case -1: 204 eofError("eof following '\\' in string"); 205 case '\n': case '\r': 206 fatal("line terminator following '\\' in string"); 207 case '\'': case '\"': case '\\': 208 break; 209 case 'b': ch = '\b'; break; 210 case 't': ch = '\t'; break; 211 case 'n': ch = '\n'; break; 212 case 'f': ch = '\f'; break; 213 case 'r': ch = '\r'; break; 214 case 'x': case 'u': 215 val = 0; 216 for (i = ch == 'x' ? 2 : 4; --i >= 0; ) 217 { 218 int d = port.read(); 219 if (d < 0) 220 eofError("eof following '\\" 221 +((char)ch)+"' in string"); 222 d = Character.forDigit((char) d, 16); 223 if (d < 0) 224 { 225 error("invalid char following '\\" 226 +((char)ch)+"' in string"); 227 val = '?'; 228 break; } 229 val = 16 * val + d; 230 } 231 ch = val; 232 break; 233 default: 234 if (ch < '0' || ch > '7') 235 break; 236 val = 0; 237 for (i = 3; --i >= 0; ) 238 { 239 int d = port.read(); 240 if (d < 0) 241 eofError("eof in octal escape in string literal"); 242 d = Character.forDigit((char) d, 8); 243 if (d < 0) 244 { 245 port.unread_quick(); 246 break; 247 } 248 val = 8 * val + d; 249 } 250 ch = val; 251 break; 252 253 } 254 } 255 sbuf.append((char) ch); 256 } 257 } 258 259 public String getIdentifier (int ch) 260 throws java.io.IOException 261 { 262 int i = port.pos; 263 int start = i - 1; 264 int limit = port.limit; 265 char[] buffer = port.buffer; 266 while (i < limit && Character.isJavaIdentifierPart(buffer[i])) 267 i++; 268 port.pos = i; 269 if (i < limit) 270 return new String (buffer, start, i - start); 271 StringBuffer sbuf = new StringBuffer (); 272 sbuf.append(buffer, start, i - start); 273 for (;;) 274 { 275 ch = port.read(); 276 if (ch < 0) 277 break; 278 if (Character.isJavaIdentifierPart((char) ch)) 279 sbuf.append((char) ch); 280 else 281 { 282 port.unread_quick(); 283 break; 284 } 285 } 286 return sbuf.toString(); 287 } 288 289 290 public Object maybeAssignment(Object token) 291 throws java.io.IOException , gnu.text.SyntaxException 292 { 293 int ch = read(); 294 if (ch == '=') 295 { 296 error("assignment operation not implemented"); 297 } 299 if (ch >= 0) 300 port.unread_quick(); 301 return token; 302 } 303 304 321 322 public Object getToken() 323 throws java.io.IOException , gnu.text.SyntaxException 324 { 325 int ch = read(); 326 for (;;) 327 { 328 if (ch < 0) 329 return eofToken; 330 if (! Character.isWhitespace((char) ch)) 331 break; 332 if (ch == '\r') 333 { 334 prevWasCR = true; 335 return eolToken; 336 } 337 if (ch == '\n' && ! prevWasCR) 338 return eolToken; 339 prevWasCR = false; 340 ch = read(); 341 } 342 343 switch (ch) 344 { 345 case '.': 346 ch = port.peek(); 347 if (ch >= '0' && ch <= '9') 348 return new QuoteExp(getNumericLiteral('.')); 349 return dotToken; 350 case '0': case '1': case '2': case '3': case '4': 351 case '5': case '6': case '7': case '8': case '9': 352 return new QuoteExp(getNumericLiteral(ch)); 353 case '\'': case '\"': 354 return new QuoteExp(getStringLiteral((char) ch)); 355 case '(': return lparenToken; 356 case ')': return rparenToken; 357 case '[': return lbracketToken; 358 case ']': return rbracketToken; 359 case '{': return lbraceToken; 360 case '}': return rbraceToken; 361 case '?': return condToken; 362 case ':': return colonToken; 363 case ';': return semicolonToken; 364 case ',': return commaToken; 365 case '=': 366 if (port.peek() == '=') 367 { 368 port.skip_quick(); 369 return Reserved.opEqual; 370 } 371 return equalToken; 372 case '!': 373 if (port.peek() == '=') 374 { 375 port.skip_quick(); 376 return Reserved.opNotEqual; 377 } 378 return notToken; 379 case '~': 380 return tildeToken; 381 case '*': return maybeAssignment(Reserved.opTimes); 382 case '/': return maybeAssignment(Reserved.opDivide); 383 case '^': return maybeAssignment(Reserved.opBitXor); 384 case '%': return maybeAssignment(Reserved.opRemainder); 385 case '+': 386 if (port.peek() == '+') 387 { 388 port.skip_quick(); 389 return maybeAssignment(Reserved.opPlusPlus); 390 } 391 return maybeAssignment(Reserved.opPlus); 392 case '-': 393 if (port.peek() == '-') 394 { 395 port.skip_quick(); 396 return maybeAssignment(Reserved.opMinusMinus); 397 } 398 return maybeAssignment(Reserved.opMinus); 399 case '&': 400 if (port.peek() == '&') 401 { 402 port.skip_quick(); 403 return maybeAssignment(Reserved.opBoolAnd); 404 } 405 return maybeAssignment(Reserved.opBitAnd); 406 case '|': 407 if (port.peek() == '|') 408 { 409 port.skip_quick(); 410 return maybeAssignment(Reserved.opBoolOr); 411 } 412 return maybeAssignment(Reserved.opBitOr); 413 case '>': 414 ch = port.peek(); 415 switch (ch) 416 { 417 case '>': 418 port.skip_quick(); 419 if (port.peek() == '>') 420 { 421 port.skip_quick(); 422 return maybeAssignment(Reserved.opRshiftUnsigned); 423 } 424 return maybeAssignment(Reserved.opRshiftSigned); 425 case '=': 426 port.skip_quick(); 427 return Reserved.opGreaterEqual; 428 default: 429 return Reserved.opGreater; 430 } 431 case '<': 432 ch = port.peek(); 433 switch (ch) 434 { 435 case '<': 436 port.skip_quick(); 437 return maybeAssignment(Reserved.opLshift); 438 case '=': 439 port.skip_quick(); 440 return Reserved.opLessEqual; 441 default: 442 return Reserved.opLess; 443 } 444 } 445 if (Character.isJavaIdentifierStart((char) ch)) 446 { 447 String word = getIdentifier(ch).intern(); 448 Object token = checkReserved(word); 449 if (token != null) 450 return token; 451 return word; 452 } 453 return Char.make((char) ch); 454 } 455 456 public static Object getToken(InPort inp) 457 throws java.io.IOException , gnu.text.SyntaxException 458 { 459 return new Lexer(inp).getToken(); 460 } 461 462 public static void main(String [] args) 463 { 464 InPort inp = InPort.inDefault(); 465 Lexer reader = new Lexer(inp); 466 for (;;) 467 { 468 try 469 { 470 Object token = reader.getToken(); 471 OutPort out = OutPort.outDefault(); 472 out.print("token:"); 473 out.print(token); 474 out.println(" [class:"+token.getClass()+"]"); 475 if (token == Sequence.eofValue) 476 break; 477 } 478 catch (Exception ex) 479 { 480 System.err.println("caught exception:"+ex); 481 return; 482 } 483 } 484 } 485 } 486 | Popular Tags |