Lexer


1   /*
2    * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3    *
4    * This file is part of Resin(R) Open Source
5    *
6    * Each copy or derived work must preserve the copyright notice and this
7    * notice unmodified.
8    *
9    * Resin Open Source is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU General Public License as published by
11   * the Free Software Foundation; either version 2 of the License, or
12   * (at your option) any later version.
13   *
14   * Resin Open Source is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17   * of NON-INFRINGEMENT.  See the GNU General Public License for more
18   * details.
19   *
20   * You should have received a copy of the GNU General Public License
21   * along with Resin Open Source; if not, write to the
22   *   Free SoftwareFoundation, Inc.
23   *   59 Temple Place, Suite 330
24   *   Boston, MA 02111-1307  USA
25   *
26   * @author Scott Ferguson
27   */
28  
29  package com.caucho.es.parser;
30  
31  import com.caucho.es.*;
32  import com.caucho.java.LineMap;
33  import com.caucho.util.CharBuffer;
34  import com.caucho.util.L10N;
35  import com.caucho.vfs.ReadStream;
36  
37  import java.io.CharConversionException  ;
38  import java.io.IOException  ;
39  import java.util.ArrayList  ;
40  import java.util.HashMap  ;
41  import java.util.regex.Pattern  ;
42  
43  /**
44   * JavaScript lexer.
45   */
46  class Lexer {
47    private static final L10N L = new L10N(Lexer.class);
48    
49    final static int ERROR = -3;
50    final static int START = -2;
51    final static int EOF = -1;
52  
53    final static int RESERVED = 256;
54    final static int LITERAL = RESERVED + 1;
55    final static int REGEXP = LITERAL + 1;
56    final static int IDENTIFIER = REGEXP + 1;
57    final static int THIS = IDENTIFIER + 1;
58  
59    final static int HASH_DEF = THIS + 1;
60    final static int HASH_REF = HASH_DEF + 1;
61  
62    final static int BIN_OP = HASH_REF + 1;
63    final static int UNARY_OP = BIN_OP + 1;
64    final static int BANDU_OP = UNARY_OP + 1;
65  
66    final static int RSHIFT = BANDU_OP + 1;
67    final static int URSHIFT = RSHIFT + 1;
68    final static int LSHIFT = URSHIFT + 1;
69    final static int BITAND = LSHIFT + 1;
70    final static int BITOR = BITAND + 1;
71  
72    final static int GEQ = BITOR + 1;
73    final static int LEQ = GEQ + 1;
74    final static int EQ = LEQ + 1;
75    final static int NEQ = EQ + 1;
76  
77    final static int STRICT_EQ = NEQ + 1;
78    final static int STRICT_NEQ = STRICT_EQ + 1;
79  
80    final static int AND = STRICT_NEQ + 1;
81    final static int OR = AND + 1;
82  
83    final static int ASSIGN_OP = OR + 1;
84  
85    final static int PREFIX = ASSIGN_OP + 1;
86    final static int POSTFIX = PREFIX + 1;
87    final static int DELETE = POSTFIX + 1;
88    final static int VOID = DELETE + 1;
89    final static int TYPEOF = VOID + 1;
90  
91    final static int IF = TYPEOF + 1;
92    final static int ELSE = IF + 1;
93  
94    final static int SWITCH = ELSE + 1;
95    final static int CASE = SWITCH + 1;
96    final static int DEFAULT = CASE + 1;
97  
98    final static int WHILE = DEFAULT + 1;
99    final static int DO = WHILE + 1;
100   final static int FOR = DO + 1;
101   final static int IN = FOR + 1;
102   final static int BREAK = IN + 1;
103   final static int CONTINUE = BREAK + 1;
104 
105   final static int FUNCTION = CONTINUE + 1;
106   final static int CONSTRUCTOR = FUNCTION;
107   final static int RETURN = CONSTRUCTOR + 1;
108 
109   final static int NEW = RETURN + 1;
110   final static int VAR = NEW + 1;
111   final static int WITH = VAR + 1;
112 
113   final static int NULL = WITH + 1;
114   final static int UNDEFINED = NULL + 1;
115   final static int TRUE = UNDEFINED + 1;
116   final static int FALSE = TRUE + 1;
117   final static int EVAL = FALSE + 1;
118 
119   final static int CLASS = EVAL + 1;
120   final static int EXTENDS = CLASS + 1;
121 
122   final static int SYNCHRONIZED = EXTENDS + 1;
123 
124   final static int TRY = SYNCHRONIZED + 1;
125   final static int CATCH = TRY + 1;
126   final static int FINALLY = CATCH + 1;
127   final static int THROW = FINALLY + 1;
128 
129   final static int IMPORT = THROW + 1;
130   final static int STATIC = IMPORT + 1;
131   
132   final static int LAST_LEXEME = STATIC;
133 
134   static HashMap   ops;
135   static HashMap   reserved;
136   
137   Global resin;
138   ReadStream is;
139   int peek = -1;
140   int peek2 = -1;
141 
142   ArrayList   macros = new ArrayList  ();
143 
144   CharBuffer macroText;
145   int macroIndex;
146   int macroOldLine;
147 
148   int _flags;
149 
150   int state;
151   int lbrace;
152   int stringClose;
153   boolean isRegexp;
154 
155   LineMap lineMap;
156   String   filename;
157   String   lastFilename;
158   String   beginFilename;
159 
160   int lastLine;
161   int beginLine;
162   int beginLineCh;
163   int line;
164   int lineCh;
165 
166   Op op;
167   int lexeme;
168   int lastLexeme;
169   CharBuffer text;
170   CharBuffer lineText = new CharBuffer();
171   boolean isEof = false;
172   ESId id;
173   ESBase literal;
174   int intValue;
175   boolean hasLf;
176   boolean regexpOk;
177   String   writeln;
178 
179   CharBuffer temp = new CharBuffer();
180 
181   Lexer(ReadStream is, String   filename, int line, LineMap lineMap)
182   {
183     this.filename = filename;
184     this.line = line;
185     this.lastFilename = filename;
186     this.lastLine = line;
187     this.lineMap = lineMap;
188     this.is = is;
189     peek = -1;
190     peek2 = -1;
191     text = new CharBuffer();
192     lexeme = START;
193     lastLexeme = START;
194     regexpOk = true;
195     macroText = null;
196     macroIndex = 0;
197 
198     // Initialize the operator table
199     if (ops == null) {
200       ops = new HashMap  ();
201       opsPut(".", '.', '.', Parser.PREC_DOT, false);
202       opsPut("++", '+', POSTFIX, Parser.PREC_DOT, false);
203       opsPut("--", '-', POSTFIX, Parser.PREC_DOT, false);
204 
205       opsPut("@", '@', '@', Parser.PREC_DOT, false);
206       
207       opsPut("~", '~', UNARY_OP, Parser.PREC_UMINUS, false);
208       opsPut("!", '!', UNARY_OP, Parser.PREC_UMINUS, false);
209 
210       opsPut("*", '*', BIN_OP, Parser.PREC_TIMES, false);
211       opsPut("/", '/', BIN_OP, Parser.PREC_TIMES, false);
212       opsPut("%", '%', BIN_OP, Parser.PREC_TIMES, false);
213 
214       opsPut("+", '+', BANDU_OP, Parser.PREC_PLUS, false);
215       opsPut("-", '-', BANDU_OP, Parser.PREC_PLUS, false);
216 
217       opsPut(">>", RSHIFT, BIN_OP, Parser.PREC_SHIFT, false);
218       opsPut(">>>", URSHIFT, BIN_OP, Parser.PREC_SHIFT, false);
219       opsPut("<<", LSHIFT, BIN_OP, Parser.PREC_SHIFT, false);
220 
221       opsPut(">", '>', BIN_OP, Parser.PREC_CMP, false);
222       opsPut(">=", GEQ, BIN_OP, Parser.PREC_CMP, false);
223       opsPut("<", '<', BIN_OP, Parser.PREC_CMP, false);
224       opsPut("<=", LEQ, BIN_OP, Parser.PREC_CMP, false);
225       opsPut("==", EQ, BIN_OP, Parser.PREC_CMP, false);
226       opsPut("!=", NEQ, BIN_OP, Parser.PREC_CMP, false);
227       opsPut("===", STRICT_EQ, BIN_OP, Parser.PREC_CMP, false);
228       opsPut("!==", STRICT_NEQ, BIN_OP, Parser.PREC_CMP, false);
229 
230       opsPut("&", '&', BIN_OP, Parser.PREC_BITAND, false);
231       opsPut("^", '^', BIN_OP, Parser.PREC_BITXOR, false);
232       opsPut("|", '|', BIN_OP, Parser.PREC_BITOR, false);
233 
234       opsPut("&&", AND, BIN_OP, Parser.PREC_AND, false);
235       opsPut("||", OR, BIN_OP, Parser.PREC_OR, false);
236 
237       opsPut("?", '?', '?', Parser.PREC_COND, false);
238 
239       opsPut("=", '=', '=', Parser.PREC_ASSIGN, true);
240       opsPut("*=", '*', '=', Parser.PREC_ASSIGN, true);
241       opsPut("/=", '/', '=', Parser.PREC_ASSIGN, true);
242       opsPut("%=", '%', '=', Parser.PREC_ASSIGN, true);
243       opsPut("+=", '+', '=', Parser.PREC_ASSIGN, true);
244       opsPut("-=", '-', '=', Parser.PREC_ASSIGN, true);
245       opsPut(">>=", RSHIFT, '=', Parser.PREC_ASSIGN, true);
246       opsPut(">>>=", URSHIFT, '=', Parser.PREC_ASSIGN, true);
247       opsPut("<<=", LSHIFT, '=', Parser.PREC_ASSIGN, true);
248       opsPut("&=", '&', '=', Parser.PREC_ASSIGN, true);
249       opsPut("^=", '^', '=', Parser.PREC_ASSIGN, true);
250       opsPut("|=", '|', '=', Parser.PREC_ASSIGN, true);
251 
252       opsPut(",", ',', ',', Parser.PREC_COMMA, false);
253 
254       reserved = new HashMap  ();
255       resPut("new", NEW);
256       resPut("var", VAR);
257       resPut("delete", DELETE);
258       resPut("void", VOID);
259       resPut("typeof", TYPEOF);
260 
261       resPut("if", IF);
262       resPut("else", ELSE);
263       resPut("switch", SWITCH);
264       resPut("case", CASE);
265       resPut("default", DEFAULT);
266 
267       resPut("while", WHILE);
268       resPut("do", DO);
269       resPut("for", FOR);
270       resPut("in", IN);
271       resPut("break", BREAK);
272       resPut("continue", CONTINUE);
273 
274       resPut("null", NULL);
275       resPut("undefined", UNDEFINED);
276       resPut("true", TRUE);
277       resPut("false", FALSE);
278       resPut("this", THIS);
279       resPut("eval", EVAL);
280 
281       resPut("function", FUNCTION);
282       //resPut("constructor", CONSTRUCTOR);
283       resPut("return", RETURN);
284 
285       resPut("with", WITH);
286 
287       resPut("class", CLASS);
288       resPut("extends", EXTENDS);
289 
290       resPut("synchronized", SYNCHRONIZED);
291 
292       resPut("try", TRY);
293       resPut("catch", CATCH);
294       resPut("finally", FINALLY);
295       resPut("throw", THROW);
296 
297       resPut("import", IMPORT);
298       resPut("static", STATIC);
299       
300       resPut("const", RESERVED);
301       resPut("debugger", RESERVED);
302       resPut("enum", RESERVED);
303       resPut("export", RESERVED);
304       resPut("super", RESERVED);
305 /*
306       resPut("boolean", RESERVED);
307       resPut("byte", RESERVED);
308       resPut("char", RESERVED);
309       resPut("double", RESERVED);
310       resPut("float", RESERVED);
311       resPut("int", RESERVED);
312       resPut("long", RESERVED);
313       resPut("short", RESERVED);
314 */    
315       resPut("public", RESERVED);
316       resPut("private", RESERVED);
317       resPut("protected", RESERVED);
318       resPut("throws", RESERVED);
319     }
320   }
321   
322   Lexer(ReadStream is, String   filename, int line)
323   {
324     this(is, filename, line, null);
325   }
326   
327   Lexer(ReadStream is, LineMap lineMap)
328   {
329     this(is, null, 1, lineMap);
330   }
331 
332   void setLineMap(LineMap lineMap)
333   {
334     this.lineMap = lineMap;
335   }
336 
337   private void opsPut(String   name, int code, int lex, int prec, boolean flag)
338   {
339     ops.put(new CharBuffer(name), new Op(code, lex, prec, flag));
340   }
341 
342   private void resPut(String   name, int code)
343   {
344     reserved.put(new CharBuffer(name), new Integer  (code));
345   }
346 
347   int peek() throws ESParseException
348   {
349     try {
350       if (lexeme == START) {
351     lexeme = lex();
352       }
353       
354       lastLexeme = lexeme;
355 
356       return lexeme;
357     } catch (ESParseException e) {
358       throw e;
359     } catch (Exception   e) {
360       e.printStackTrace();
361       throw error(e.toString());
362     }
363   }
364 
365   int next() throws ESParseException
366   {
367     try {
368       int value = lexeme;
369 
370       if (value == START) {
371     value = lex();
372       }
373 
374       lastLexeme = value;
375       lexeme = START;
376 
377       lastFilename = beginFilename;
378       lastLine = beginLine;
379 
380       return value;
381     } catch (ESParseException e) {
382       throw e;
383     } catch (Exception   e) {
384       e.printStackTrace();
385       throw error(e == null ? "" : e.toString());
386     }
387   }
388 
389   int prev()
390   {
391     if (lastLexeme == START)
392       throw new RuntimeException  ();
393 
394     lexeme = lastLexeme;
395 
396     lastLexeme = START;
397 
398     return lexeme;
399   }
400 
401   int last()
402   {
403     if (lastLexeme == START)
404       throw new RuntimeException  ();
405 
406     return lastLexeme;
407   }
408 
409   private int peekCh() throws ESParseException
410   {
411     try {
412       int ch = read();
413       ungetc(ch);
414       return (ch);
415     } catch (Exception   e) {
416       return -1;
417     }
418   }
419 
420   /**
421    * Returns the next lexeme
422    */
423   private int lex() throws ESParseException
424   {
425     lastFilename = beginFilename;
426     lastLine = beginLine;
427     
428     hasLf = false;
429 
430     while (true) {
431       beginFilename = filename;
432       beginLine = line;
433       beginLineCh = lineCh;
434 
435       int ch = read();
436 
437       switch (ch) {
438       case -1:
439     isEof = true;
440     return EOF;
441 
442       case ' ': case '\t': case '\f': case 0x0b: /* vertical tab */
443     break;
444     
445       case '\n': 
446     newline();
447     hasLf = true;
448     break;
449 
450       case '+': case '-': case '*': case '!': case ',': case '^':
451       case '<': case '>': case '&': case '|': case '=': case '~':
452       case '?':
453     regexpOk = true; // exception ++/--
454     return lexOp(ch);
455 
456       case ')': case ']': 
457     regexpOk = false;
458     return ch;
459 
460       case ':': case ';': case '(': 
461       case '[': case '{': case '}':
462     regexpOk = true;
463     return ch;
464 
465       case '.':
466     {
467       int ch2 = read();
468 
469       if (ch2 >= '0' && ch2 <= '9') {
470         regexpOk = false;
471         return lexFloat(0, ch2);
472       }
473       else {
474         regexpOk = true;
475         ungetc(ch2);
476         return lexOp(ch);
477       }
478     }
479 
480       case '/':
481     {
482       int ch2 = read();
483 
484       if (ch2 == '/') {
485         for (ch2 = read(); 
486          ch2 > 0 && ch2 != '\n';
487          ch2 = read()) {
488         }
489 
490         ungetc(ch2);
491         break;
492       }
493       else if (ch2 == '*') {
494         boolean seenStar = false;
495         for (ch2 = read(); 
496          ch2 > 0 && (! seenStar || ch2 != '/');
497          ch2 = read()) {
498           if (ch2 == '/') {
499         ch2 = read();
500         if (ch2 == '*')
501           throw error(L.l("comments can't nest"));
502           }
503 
504           seenStar = ch2 == '*';
505 
506           if (ch2 == '\n') {
507         newline();
508         hasLf = true;
509               }
510         }
511         break;
512       }
513       else if (regexpOk) {
514         regexpOk = false;
515 
516         ungetc(ch2);
517         lexString('/', null, true, false);
518 
519         readRegexpFlags();
520         try {
521           Pattern   regexp = Pattern.compile(literal.toString(), _flags);
522           // checking for errors
523         } catch (Exception   e) {
524           // e.printStackTrace();
525           throw error(String.valueOf(e));
526         }
527 
528         return REGEXP;
529       } else {
530         ungetc(ch2);
531         return lexOp(ch);
532       }
533     }
534 
535       case '0': case '1': case '2': case '3': case '4': 
536       case '5': case '6': case '7': case '8': case '9':
537     regexpOk = false;
538     return lexNumber(ch);
539 
540       case '"': case '\'':
541     regexpOk = false;
542     return lexString((char) ch, null, false, false);
543 
544       case '@':
545     {
546       int ch2 = read();
547 
548       switch (ch2) {
549       case '"':
550         CharBuffer macro = new CharBuffer();
551         macro.append('(');
552         interpolate(macro, '"', null, "\"", "\"", false, false);
553         macro.append(')');
554         pushMacro(macro);
555         break;
556 
557       case '\'':
558         macro = new CharBuffer();
559         macro.append('(');
560         interpolate(macro, '\'', null, "\'", "\'", false, false);
561         macro.append(')');
562         pushMacro(macro);
563         break;
564 
565       case '@':
566         if ((ch2 = read()) < 0)
567           throw error(L.l("unexpected end of file"));
568         switch (ch2) {
569         case '{': ch2 = '}'; break;
570         case '<': ch2 = '>'; break;
571         case '(': ch2 = ')'; break;
572         case '[': ch2 = ']'; break;
573         }
574 
575         return lexString((char) ch2, null, true, false);
576 
577       case '<':
578         if ((ch2 = read()) != '<')
579           throw error(L.l("illegal character at `@'"));
580         if (scanMultiline())
581           return LITERAL;
582         break;
583 
584       case '/':
585         macro = new CharBuffer();
586         macro.append("new RegExp(");
587         interpolate(macro, '/', null, "@@/", "/", true, false);
588         macro.append(",");
589         macro.append(readRegexpFlags());
590         macro.append(")");
591         pushMacro(macro);
592         break;
593 
594       default:
595             return lexOp('@');
596       }
597       break;
598     }
599 
600       case '%':
601     {
602       int ch2 = read();
603 
604       regexpOk = true;
605       ungetc(ch2);
606       return lexOp(ch);
607     }
608 
609       case '#':
610     {
611       int ch2 = read();
612       if (line == 1 && lineCh == 2 && ch2 == '!') {
613         for (; ch2 > 0 && ch2 != '\n'; ch2 = read()) {
614         }
615 
616         ungetc(ch2);
617         break;
618       }
619 
620       if (ch2 >= 'a' && ch2 <= 'z' || ch2 >= 'A' && ch2 <= 'Z') {
621         temp.clear();
622         for (; ch2 >= 'a' && ch2 <= 'z' || ch2 >= 'A' && ch2 <= 'Z';
623          ch2 = read()) {
624           temp.append((char) ch2);
625         }
626 
627         if (temp.toString().equals("line"))
628           scanLine(ch2);
629         else if (temp.toString().equals("file"))
630           scanFile(ch2);
631         else
632           throw error(L.l("expected pragma at `{0}'", temp));
633 
634         break;
635       }
636 
637       if (ch2 < '0' || ch2 > '9')
638         throw error(L.l("expected digit at {0}", badChar(ch2)));
639       intValue = 0;
640 
641       for (; ch2 >= '0' && ch2 <= '9'; ch2 = read())
642         intValue = 10 * intValue + ch2 - '0';
643 
644       if (ch2 == '=')
645         return HASH_DEF;
646       else if (ch2 == '#')
647         return HASH_REF;
648       else
649         throw error(L.l("expected sharp variable at {0}", badChar(ch)));
650     }
651 
652       default:
653     if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' ||
654         ch == '_' || ch == '$') { 
655       regexpOk = false;
656       return lexId(ch);
657     } else {
658       throw error(L.l("illegal character at {0}", badChar(ch)));
659     }
660       }
661     }
662   }
663 
664   /**
665    * Returns the text object for the lexeme.
666    */
667   CharBuffer getText() { return text; }
668 
669   boolean isEof() { return isEof; }
670 
671   /**
672    * Used for error messages.
673    */
674   String   getToken() 
675   {
676     return lineText.substring(beginLineCh, lineCh);
677   }
678   /**
679    * Returns the Id
680    */
681   ESId getId() { return id; }
682   /**
683    * Returns true if seen linefeed since the last.
684    */
685   boolean seenLineFeed() { return hasLf; }
686 
687   ESParseException error(String   text)
688   {
689     return new ESParseException(filename, beginLine, beginLineCh,
690                 line, lineCh, text);
691   }
692 
693   private String   hex(int value)
694   {
695     CharBuffer cb = new CharBuffer();
696 
697     for (int b = 3; b >= 0; b--) {
698       int v = (value >> (4 * b)) & 0xf;
699       if (v < 10)
700     cb.append((char) (v + '0'));
701       else
702     cb.append((char) (v - 10 + 'a'));
703     }
704 
705     return cb.toString();
706   }
707 
708   private String   badChar(int ch)
709   {
710     if (ch >= 0x20 && ch <= 0x7f)
711       return "`" + (char) ch + "'";
712     else if (ch == '\n')
713       return L.l("end of line");
714     else if (ch == -1)
715       return L.l("end of file");
716     else
717       return "`" + (char) ch + "' (\\u" + hex(ch) + ")";
718   }
719 
720   String   getFilename()
721   {
722     if (lineMap != null) {
723       LineMap.Line map = lineMap.getLine(line);
724       if (map != null)
725         return map.getSourceFilename();
726     }
727 
728     return filename;
729   }
730 
731   long getLastModified()
732   {
733     if (is.getPath() == null)
734       return 0;
735     else
736       return is.getPath().getLastModified();
737   }
738   
739   int getLine()
740   {
741     if (lineMap != null) {
742       LineMap.Line map = lineMap.getLine(line);
743       if (map != null) {
744         return map.getSourceLine(line);
745       }
746     }
747 
748     return line;
749   }
750 
751   String   getLastFilename()
752   {
753     if (lineMap != null) {
754       LineMap.Line map = lineMap.getLine(lastLine);
755       if (map != null)
756         return map.getSourceFilename();
757     }
758 
759     return lastFilename;
760   }
761   
762   int getLastLine()
763   {
764     if (lineMap != null) {
765       LineMap.Line map = lineMap.getLine(lastLine);
766       if (map != null) {
767         return map.getSourceLine(lastLine);
768       }
769     }
770 
771     return lastLine;
772   }
773   
774   private void pushMacro(CharBuffer cb)
775     throws ESParseException
776   {
777     if (peek >= 0)
778       cb.append((char) read()); // Because of peek
779     if (peek >= 0)
780       cb.append((char) read()); // Because of peek
781     if (macroText != null)
782       macros.add(new Macro(macroText, macroIndex, macroOldLine));
783     macroText = cb;
784     macroIndex = 0;
785     macroOldLine = line;
786   }
787 
788   /**
789    * Update variables to handle a newline.
790    */
791   private void newline()
792   {
793     line++;
794     lineCh = 0;
795     lineText.clear();
796   }
797 
798   /**
799    * Handles all the goodies for a floating point number after the
800    * dot or 'e'
801    */
802   private int lexFloat(double value, int ch) throws ESParseException
803   {
804     int expt = 0;
805 
806     for (; ch >= '0' && ch <= '9'; ch = read()) {
807       value = 10 * value + ch - '0';
808       expt--;
809     }
810 
811     if (ch == 'e' || ch == 'E') {
812       ch = read();
813 
814       int sign = 1;
815       if (ch == '-') {
816     sign = -1;
817     ch = read();
818       } else if (ch == '+') {
819     ch = read();
820       }
821 
822       if (ch < '0' || ch > '9')
823     throw error(L.l("expected exponent at {0}", badChar(ch)));
824 
825       int userExpt = 0;
826       for (; ch >= '0' && ch <= '9'; ch = read()) {
827     userExpt = 10 * userExpt + ch - '0';
828       }
829 
830       expt += sign * userExpt;
831     }
832     
833     ungetc(ch);
834     if (expt >= 0)
835       literal = ESNumber.create(value * Math.pow(10, expt));
836     else
837       literal = ESNumber.create(value / Math.pow(10, -expt));
838     return LITERAL;
839   }
840 
841   /**
842    * Lexeme for a number
843    */
844   private int lexNumber(int ch) throws ESParseException
845   {
846     int radix = 10;
847     double value = 0;
848     boolean hasChar = true;
849 
850     if (ch == '0') {
851       ch = read();
852       if (ch >= '0' && ch <= '9')
853     radix = 8;
854       else if (ch == 'x' || ch == 'X') {
855     hasChar = false;
856     radix = 16;
857     ch = read();
858       }
859     }
860 
861     for (; ch >= 0; ch = read()) {
862       if (ch >= '0' && ch <= '9') {
863     value = radix * value + ch - '0';
864     hasChar = true;
865     
866     if (radix == 8 && ch >= '8')
867       throw error(L.l("expected octal digit at {0}", badChar(ch)));
868       } else if (radix == 16 && ch >= 'a' && ch <= 'f') {
869     hasChar = true;
870     value = radix * value + ch - 'a' + 10;
871       }
872       else if (radix == 16 && ch >= 'A' && ch <= 'F') {
873     hasChar = true;
874     value = radix * value + ch - 'A' + 10;
875       }
876       else
877     break;
878     }
879 
880     if (! hasChar)
881       throw error(L.l("expected hex digit at {0}", badChar(ch)));
882 
883     if (radix == 10 && ch == '.') {
884       ch = read();
885       
886       if (ch >= '0' && ch <= '9')
887     return lexFloat(value, ch);
888       else {
889     ungetc(ch);
890     literal = ESNumber.create(value);
891     return LITERAL;
892       }
893     } else if (radix == 10 && (ch == 'e' || ch == 'E'))
894       return lexFloat(value, ch);
895     else {
896       ungetc(ch);
897       literal = ESNumber.create(value);
898       return LITERAL;
899     }
900   }
901 
902   /**
903    * Returns the number for a hex digit.
904    */
905   private int hexDigit(int ch) throws ESParseException
906   {
907     if (ch >= '0' && ch <= '9')
908       return ch - '0';
909     else if (ch >= 'a' && ch <= 'f')
910       return ch - 'a' + 10;
911     else if (ch >= 'A' && ch <= 'F')
912       return ch - 'A' + 10;
913     else
914       throw error(L.l("expected hex digit at {0}", badChar(ch)));
915   }
916 
917   /**
918    * Lexeme for a string.
919    */
920   private int lexString(char endCh,
921             String   endTail,
922             boolean isRegexp, 
923             boolean isMultiline)
924     throws ESParseException
925   {
926     text.setLength(0);
927     
928     int ch = read();
929     for (; ch >= 0; ch = read()) {
930       if (ch == '\n') {
931         if (isMultiline) {
932         }
933     else if (isRegexp)
934       throw error(L.l("unexpected end of line in regular expression"));
935         else
936       throw error(L.l("unexpected end of line in string"));
937     newline();
938       }
939 
940       if (ch != endCh) {
941       }
942       else if (endTail == null) {
943     literal = ESString.create(text.toString());
944     return LITERAL;
945       }
946       else if (! text.endsWith(endTail)) {
947       }
948       else if (text.length() == endTail.length()) {
949     literal = ESString.create("");
950     return LITERAL;
951       }
952       else {
953         char tailCh = text.charAt(text.length() - endTail.length() - 1);
954 
955         if (tailCh == '\n') {
956           text.setLength(text.length() - endTail.length() - 1);
957           literal = ESString.create(text.toString());
958           return LITERAL;
959         }
960       }
961 
962       if (ch == '\\') {
963     ch = read();
964     switch (ch) {
965     case -1:
966           if (isRegexp)
967             throw error(L.l("unexpected end of file in regular expression"));
968           else
969             throw error(L.l("unexpected end of file in string"));
970 
971     case '\n':
972           if (isRegexp)
973             throw error(L.l("unexpected end of line in regular expression"));
974           else
975             throw error(L.l("unexpected end of line in string"));
976 
977     case 'b':
978       if (isRegexp)
979         text.append("\\b");
980       else
981         text.append('\b');
982       break;
983 
984     case 'e':
985       text.append((char) 0x1b);
986       break;
987 
988     case 'f':
989       text.append('\f');
990       break;
991 
992     case 'n':
993       text.append('\n');
994       break;
995 
996     case 'r':
997       text.append('\r');
998       break;
999 
1000    case 't':
1001      text.append('\t');
1002      break;
1003
1004    case 'v':
1005      text.append((char) 0xb);
1006      break;
1007
1008    case 'c':
1009      {
1010        ch = read();
1011        if (ch >= 'a' && ch <= 'z')
1012          text.append((char) (ch - 'a' + 1));
1013        else if (ch >= 'A' && ch <= 'Z')
1014          text.append((char) (ch - 'A' + 1));
1015        else if (ch - '@' >= 0 && ch - '@' < ' ')
1016          text.append((char) (ch - '@'));
1017        else
1018          throw error(L.l("expected control character at {0}",
1019                              badChar(ch)));
1020      }
1021      break;
1022
1023    case 'o':
1024      {
1025        int value = 0;
1026        while ((ch = read()) >= '0' && ch <= '8') {
1027          value = 8 * value + ch - '0';
1028        }
1029        ungetc(ch);
1030        text.append((char) value);
1031      }
1032      break;
1033
1034    case 'x':
1035      {
1036        int value = 16 * hexDigit(read());
1037        value += hexDigit(read());
1038        text.append((char) value);
1039      }
1040      break;
1041
1042    case 'u':
1043      {
1044        int value = 4096 * hexDigit(read());
1045        value += 256 * hexDigit(read());
1046        value += 16 * hexDigit(read());
1047        value += hexDigit(read());
1048        text.append((char) value);
1049      }
1050      break;
1051
1052    case '0': case '1': case '2': case '3':
1053    case '4': case '5': case '6': case '7':
1054      {
1055        int value = ch - '0';
1056
1057        if (ch != '0' && isRegexp) {
1058          text.append('\\');
1059          text.append((char) ch);
1060          break;
1061        }
1062
1063        if ((ch = read()) >= '0' && ch <= '7') {
1064          value = 8 * value + ch - '0';
1065
1066          if (value >= 040) {
1067          }
1068          else if ((ch = read()) >= '0' && ch <= '7')
1069        value = 8 * value + ch - '0';
1070          else
1071        ungetc(ch);
1072        } else
1073          ungetc(ch);
1074        text.append((char) value);
1075      }
1076      break;
1077
1078    default:
1079      if (isRegexp)
1080        text.append('\\');
1081      text.append((char) ch);
1082      break;
1083    }
1084      } else {
1085    text.append((char) ch);
1086      }
1087    }
1088
1089    if (ch != -1) {
1090    }
1091    else if (isRegexp)
1092      throw error(L.l("unexpected end of file in regular expression"));
1093    else
1094      throw error(L.l("unexpected end of file in string"));
1095
1096    literal = ESString.create(text.toString());
1097
1098    return LITERAL;
1099  }
1100
1101  private void scanMacroStatement(CharBuffer macro, int end,
1102                  boolean isRegexp, boolean multiline)
1103   throws ESParseException
1104  {
1105    int ch;
1106
1107    while ((ch = read()) >= 0 && ch != end) {
1108      macro.append((char) ch);
1109
1110      switch (ch) {
1111      case '\\':
1112    ch = read();
1113    macro.append((char) ch);
1114    break;
1115
1116      case '\'':
1117      case '"':
1118    int testch = ch;
1119          
1120    while ((ch = read()) >= 0) {
1121      if (ch == '\\') {
1122        macro.append((char) ch);
1123        ch = read();
1124      }
1125      else if (ch == testch) {
1126        macro.append((char) ch);
1127        break;
1128      } else if (ch == '\n') {
1129        if (! multiline)
1130          throw error("unexpected end of line in " +
1131              (isRegexp ? "regular expression" : "string"));
1132        newline();
1133      }
1134  
1135      macro.append((char) ch);
1136    }
1137    break;
1138
1139      case '(':
1140    scanMacroStatement(macro, ')', isRegexp, multiline);
1141    macro.append(')');
1142    break;
1143
1144      case '{':
1145    scanMacroStatement(macro, '}', isRegexp, multiline);
1146    macro.append('}');
1147    break;
1148
1149      case '\n':
1150    if (! multiline)
1151      throw error("unexpected end of line in " + 
1152              (isRegexp ? "regular expression" : "string"));
1153    newline();
1154    break;
1155
1156      default:
1157    break;
1158      }
1159    }
1160  }
1161
1162  private void interpolate(CharBuffer macro, int tail,
1163               String   matchText,
1164               String   beginStr, String   endStr,
1165               boolean isRegexp, boolean multiline)
1166    throws ESParseException
1167  {
1168    int ch = read();
1169    int ch1;
1170
1171    macro.append(beginStr);
1172    int start = macro.length();
1173  loop:
1174    for (; ch >= 0; ch = read()) {
1175      switch (ch) {
1176      case '\\':
1177    macro.append((char) ch);
1178    ch = read();
1179    if (ch != -1)
1180      macro.append((char) ch);
1181    break;
1182
1183      case '$':
1184    if ((ch = read()) == -1)
1185      break;
1186
1187    if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' ||
1188        ch == '_' || ch == '$') { 
1189      macro.append(endStr);
1190      macro.append("+(");
1191      macro.append((char) ch);
1192
1193      while ((ch = read()) >= 0 && 
1194         (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
1195         (ch >= '0' && ch <= '9') || ch == '_' || ch == '$') {
1196        macro.append((char) ch);
1197      }
1198      ungetc(ch);
1199      macro.append(")+");
1200      macro.append(beginStr);
1201    } else if (ch == '{') {
1202      macro.append(endStr);
1203      macro.append("+(");
1204      scanMacroStatement(macro, '}', isRegexp, multiline);
1205      macro.append(")+");
1206      macro.append(beginStr);
1207    } else if (ch == '(') {
1208      macro.append(endStr);
1209      macro.append("+(");
1210      scanMacroStatement(macro, ')', isRegexp, multiline);
1211      macro.append(")+");
1212      macro.append(beginStr);
1213    } else {
1214      ungetc(ch);
1215      macro.append('$');
1216    }
1217    break;
1218    
1219      default:
1220    if (ch == '\n') {
1221      newline();
1222      if (! multiline) 
1223        throw error("unexpected end of line in " +
1224            (isRegexp ? "regular expression" : "string"));
1225    }
1226
1227    if (ch != tail) {
1228    }
1229    else if (matchText == null) {
1230      break loop;
1231    }
1232    else if (! macro.endsWith(matchText)) {
1233    }
1234    else if (macro.length() - start == matchText.length()) {
1235      macro.setLength(start);
1236      break loop;
1237    }
1238    else if (macro.charAt(macro.length() - matchText.length() - 1) == '\n') {
1239      macro.setLength(macro.length() - matchText.length() - 1);
1240      break loop;
1241    }
1242
1243    macro.append((char) ch);
1244
1245    break;
1246      }
1247    }
1248
1249    macro.append(endStr);
1250  }
1251
1252  private boolean scanMultiline() throws ESParseException
1253  {
1254    int ch;
1255    CharBuffer end = new CharBuffer();
1256    boolean interpolate = true;
1257    boolean endNewline = true;
1258
1259    if ((ch = read()) >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' ||
1260    ch == '_' || ch == '$') {
1261      for (; ch >= 0 && ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' ||
1262         ch == '_' || ch == '$' || ch >= '0' && ch <= '9';
1263       ch = read()) {
1264    end.append((char) ch);
1265      }
1266    } else if (ch == '\'') {
1267      interpolate = false;
1268      for (ch = read();
1269           ch >= 0 && ch != '\'' && ch != '\n';
1270           ch = read()) {
1271    end.append((char) ch);
1272      }
1273
1274      if (ch != '\'')
1275    throw error(L.l("multiline escape error at {0}", badChar(ch)));
1276      ch = read();
1277    } else if (ch == '`') {
1278      interpolate = false;
1279      for (ch = read();
1280           ch >= 0 && ch != '`' && ch != '\n';
1281           ch = read()) {
1282    end.append((char) ch);
1283      }
1284
1285      if (ch != '`')
1286    throw error(L.l("multiline escape error at {0}", badChar(ch)));
1287      endNewline = false;
1288    } else if (ch == '\"') {
1289      for (ch = read();
1290           ch >= 0 && ch != '\"' && ch != '\n';
1291           ch = read()) {
1292    end.append((char) ch);
1293      }
1294
1295      if (ch != '\"')
1296    throw error(L.l("multiline escape error at {0}", badChar(ch)));
1297      ch = read();
1298    }
1299    
1300    int oldLine = line;
1301    CharBuffer lineTail = null;
1302
1303    if (endNewline) {
1304      lineTail = new CharBuffer();
1305      for (; ch >= 0 && ch != '\n'; ch = read()) {
1306    lineTail.append((char) ch);
1307      }
1308      if (ch == '\r') {
1309        lineTail.append((char) ch);
1310        ch = read();
1311      }
1312      if (ch == '\n') {
1313    newline();
1314    lineTail.append((char) ch);
1315      }
1316    }
1317
1318    CharBuffer macro = null;
1319    String   endString = end.toString();
1320    if (interpolate) {
1321      macro = new CharBuffer();
1322      macro.append('(');
1323      interpolate(macro, '\n', endString, "@<<`" + endString + "`", 
1324          "\n" + endString + '\n', false, true);
1325      macro.append("+'\\n')");
1326    } else {
1327      if (endNewline) {
1328    lexString('\n', endString, false, true);
1329    text.append('\n');
1330    literal = ESString.create(text);
1331      } else {
1332    lexString('\n', endString, false, true);
1333    line -= 2;
1334      }
1335    }
1336
1337    if (endNewline) {
1338      pushMacro(lineTail);
1339      line = oldLine;
1340    }
1341
1342    if (interpolate) {
1343      pushMacro(macro);
1344      line++;
1345      return false;
1346    } else
1347      return true;
1348  }
1349
1350  private int readRegexpFlags() throws ESParseException
1351  {
1352    int ch;
1353    while (true) {
1354      switch ((ch = read())) {
1355      case 'x':
1356    _flags |= Pattern.COMMENTS;
1357    break;
1358      case 'i':
1359    _flags |= Pattern.CASE_INSENSITIVE;
1360    break;
1361      case 'g':
1362    break;
1363      case 'm':
1364    _flags |= Pattern.MULTILINE;
1365    break;
1366      case 's':
1367    break;
1368      default:
1369    ungetc(ch);
1370    return _flags;
1371      }
1372    }
1373  }
1374
1375  /**
1376   * Lexeme for an Id.  Reserved words are looked up in a
1377   * HashMap.
1378   */
1379  private int lexId(int ch) throws ESParseException
1380  {
1381    text.setLength(0);
1382
1383    text.append((char) ch);
1384
1385    while (true) {
1386      ch = read();
1387
1388      if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' ||
1389      ch == '_' || ch == '$' || ch >= '0' && ch <= '9') { 
1390    text.append((char) ch);
1391      } else {
1392    ungetc(ch);
1393
1394    break;
1395      }
1396    }
1397
1398    Integer   value = (Integer  ) reserved.get(text);
1399 
1400    if (value == null) {
1401      id = ESId.intern(text.toString());
1402      return IDENTIFIER;
1403    }
1404    else {
1405      int intValue = value.intValue();
1406
1407      switch (intValue) {
1408      case NULL: 
1409    literal = ESBase.esNull;
1410    return LITERAL;
1411
1412      case UNDEFINED: 
1413    literal = ESBase.esUndefined;
1414    return LITERAL;
1415
1416      case FALSE: 
1417    literal = ESBoolean.create(false);
1418    return LITERAL;
1419
1420      case TRUE: 
1421    literal = ESBoolean.create(true);
1422    return LITERAL;
1423
1424      default: return value.intValue();
1425      }
1426    }
1427  }
1428
1429  /**
1430   * Lexeme for an operation
1431   */
1432  private int lexOp(int ch) throws ESParseException
1433  {
1434    text.setLength(0);
1435    text.append((char) ch);
1436
1437  loop:
1438    while ((ch = read()) >= 0) {
1439      switch (ch) {
1440      case '+': case '-': case '*': case '/': case '%': case '!':
1441      case '<': case '.': case '>': case '&': case '|': case '=':
1442      case '^': case '?':
1443    text.append((char) ch);
1444
1445    op = (Op) ops.get(text);
1446    if (op == null) {
1447      text.setLength(text.length() - 1);
1448      ungetc(ch);
1449      break loop;
1450    }
1451    break;
1452
1453      default:
1454    ungetc(ch);
1455    break loop;
1456      }
1457    }
1458
1459    op = (Op) ops.get(text);
1460
1461    // XXX: non-reachable
1462    if (op == null)
1463      throw error(L.l("expected operator at `{0}'", text.toString()));
1464
1465    return op.lexeme;
1466  }
1467
1468  /**
1469   * Return the operation for a lexeme.  Binary operations like '*' will
1470   * return BIN_OP as the lexeme.  Calling getOp() will get the actual
1471   * operation.
1472   */
1473  int getOp()
1474  {
1475    return op.op;
1476  }
1477
1478  int getPrecedence()
1479  {
1480    return op.precedence;
1481  }
1482
1483  boolean isRightAssoc()
1484  {
1485    return op.isRightAssoc;
1486  }
1487
1488  ESBase getLiteral()
1489  {
1490    return literal;
1491  }
1492
1493  int getFlags()
1494  {
1495    return _flags;
1496  }
1497
1498  private void scanLine(int ch) throws ESParseException
1499  {
1500    for (; ch == ' ' || ch == '\t'; ch = read()) {
1501    }
1502
1503    if (ch < '0' || ch > '9')
1504      throw error(L.l("expected digit at {0}", badChar(ch)));
1505
1506    line = 0;
1507    for (; ch >= '0' && ch <= '9'; ch = read())
1508      line = 10 * line + ch - '0';
1509
1510    for (; ch == ' ' || ch == '\t'; ch = read()) {
1511    }
1512
1513    if (ch != '#')
1514      throw error(L.l("expected `#' at {0}", badChar(ch)));
1515  }
1516
1517  private void scanFile(int ch) throws ESParseException
1518  {
1519    for (; ch == ' ' || ch == '\t'; ch = read()) {
1520    }
1521
1522    temp.clear();
1523    for (; ch >= 0 && ch != ' ' && ch != '\t' && ch != '#'; ch = read())
1524      temp.append((char) ch);
1525
1526    if (temp.length() == 0)
1527      throw error(L.l("expected filename at {0}", badChar(ch)));
1528    filename = temp.toString();
1529
1530    for (; ch == ' ' || ch == '\t'; ch = read()) {
1531    }
1532
1533    line = 0;
1534    for (; ch >= '0' && ch <= '9'; ch = read())
1535      line = 10 * line + ch - '0';
1536
1537    if (line == 0)
1538      line = 1;
1539
1540    for (; ch == ' ' || ch == '\t'; ch = read()) {
1541    }
1542
1543    if (ch != '#')
1544      throw error(L.l("expected `#' at {0}", badChar(ch)));
1545  }
1546
1547  /**
1548   * Reads the next character.
1549   */
1550  private int read() throws ESParseException
1551  {
1552    lineCh++;
1553    if (peek >= 0) {
1554      int ch = peek;
1555      peek = peek2;
1556      peek2 = -1;
1557      return ch;
1558    } 
1559
1560    while (macroText != null) {
1561      if (macroIndex < macroText.length()) {
1562    int ch = macroText.charAt(macroIndex++);
1563    lineText.append((char) ch);
1564    return ch;
1565      }
1566
1567      line = macroOldLine;
1568
1569      if (macros.size() == 0)
1570    macroText = null;
1571      else {
1572    Macro macro = (Macro) macros.remove(macros.size() - 1);
1573    macroText = macro.text;
1574    macroIndex = macro.index;
1575    macroOldLine = macro.oldLine;
1576      }
1577    }
1578
1579    try {
1580      int ch = is.readChar();
1581
1582      if (ch == '\r') {
1583        ch = is.readChar();
1584        if (ch != '\n') {
1585          if (ch == '\r')
1586            peek = '\n';
1587          else
1588            peek = ch;
1589        }
1590        ch = '\n';
1591      }
1592      lineText.append((char) ch);
1593
1594      return ch;
1595    } catch (CharConversionException   e1) {
1596      throw error(L.l("expected {0} encoded character", is.getEncoding()));
1597    } catch (IOException   e1) {
1598      throw new ESParseException(e1);
1599    }
1600  }
1601
1602  private void ungetc(int ch)
1603  {
1604    peek2 = peek;
1605    peek = ch;
1606    if (lineCh > 0)
1607      lineCh--;
1608
1609    /*
1610    if (ch == '\n')
1611      line--;
1612    */
1613  }
1614
1615  static class Op {
1616    int op;
1617    int lexeme;
1618    int precedence;
1619    boolean isRightAssoc;
1620
1621    Op(int op, int lexeme, int precedence, boolean isRightAssoc)
1622    {
1623      this.op = op;
1624      this.lexeme = lexeme;
1625      this.precedence = precedence;
1626      this.isRightAssoc = isRightAssoc;
1627    }
1628  };
1629
1630  class Macro {
1631    CharBuffer text;
1632    int index;
1633    int oldLine;
1634
1635    void clear()
1636    {
1637      text.clear();
1638      index = 0;
1639    }
1640    
1641    Macro(CharBuffer cb, int index, int oldLine)
1642    {
1643      this.text = cb;
1644      this.index = index;
1645      this.oldLine = oldLine;
1646    }
1647  }
1648}
1649
1650
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags