KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jruby > lexer > yacc > RubyYaccLexer


1 /***** BEGIN LICENSE BLOCK *****
2  * Version: CPL 1.0/GPL 2.0/LGPL 2.1
3  *
4  * The contents of this file are subject to the Common Public
5  * License Version 1.0 (the "License"); you may not use this file
6  * except in compliance with the License. You may obtain a copy of
7  * the License at http://www.eclipse.org/legal/cpl-v10.html
8  *
9  * Software distributed under the License is distributed on an "AS
10  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11  * implied. See the License for the specific language governing
12  * rights and limitations under the License.
13  *
14  * Copyright (C) 2002 Benoit Cerrina <b.cerrina@wanadoo.fr>
15  * Copyright (C) 2002-2004 Anders Bengtsson <ndrsbngtssn@yahoo.se>
16  * Copyright (C) 2002-2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
17  * Copyright (C) 2004-2006 Thomas E Enebo <enebo@acm.org>
18  * Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
19  * Copyright (C) 2004-2005 David Corbin <dcorbin@users.sourceforge.net>
20  * Copyright (C) 2005 Zach Dennis <zdennis@mktec.com>
21  * Copyright (C) 2006 Thomas Corbat <tcorbat@hsr.ch>
22  *
23  * Alternatively, the contents of this file may be used under the terms of
24  * either of the GNU General Public License Version 2 or later (the "GPL"),
25  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26  * in which case the provisions of the GPL or the LGPL are applicable instead
27  * of those above. If you wish to allow use of your version of this file only
28  * under the terms of either the GPL or the LGPL, and not to allow others to
29  * use your version of this file under the terms of the CPL, indicate your
30  * decision by deleting the provisions above and replace them with the notice
31  * and other provisions required by the GPL or the LGPL. If you do not delete
32  * the provisions above, a recipient may use your version of this file under
33  * the terms of any one of the CPL, the GPL or the LGPL.
34  ***** END LICENSE BLOCK *****/

35 package org.jruby.lexer.yacc;
36
37 import java.io.IOException JavaDoc;
38
39 import java.math.BigInteger JavaDoc;
40
41 import org.jruby.ast.BackRefNode;
42 import org.jruby.ast.BignumNode;
43 import org.jruby.ast.CommentNode;
44 import org.jruby.ast.FixnumNode;
45 import org.jruby.ast.FloatNode;
46 import org.jruby.ast.NthRefNode;
47 import org.jruby.common.IRubyWarnings;
48 import org.jruby.parser.BlockStaticScope;
49 import org.jruby.parser.ParserSupport;
50 import org.jruby.parser.StaticScope;
51 import org.jruby.parser.Tokens;
52 import org.jruby.util.IdUtil;
53 import org.jruby.util.PrintfFormat;
54
55 /** This is a port of the MRI lexer to Java it is compatible to Ruby 1.8.1.
56  */

57 public class RubyYaccLexer {
58     // Last token read via yylex().
59
private int token;
60     
61     // Value of last token which had a value associated with it.
62
Object JavaDoc yaccValue;
63
64     // Stream of data that yylex() examines.
65
private LexerSource src;
66     
67     // Used for tiny smidgen of grammar in lexer (see setParserSupport())
68
private ParserSupport parserSupport = null;
69
70     // What handles warnings
71
private IRubyWarnings warnings;
72
73     // Additional context surrounding tokens that both the lexer and
74
// grammar use.
75
private LexState lex_state;
76     
77     // Tempory buffer to build up a potential token. Consumer takes responsibility to reset
78
// this before use.
79
private StringBuffer JavaDoc tokenBuffer = new StringBuffer JavaDoc(60);
80
81     private StackState conditionState = new StackState();
82     private StackState cmdArgumentState = new StackState();
83     private StrTerm lex_strterm;
84     private boolean commandStart;
85
86     // Give a name to a value. Enebo: This should be used more.
87
static final int EOF = 0;
88
89     // ruby constants for strings (should this be moved somewhere else?)
90
static final int STR_FUNC_ESCAPE=0x01;
91     static final int STR_FUNC_EXPAND=0x02;
92     static final int STR_FUNC_REGEXP=0x04;
93     static final int STR_FUNC_QWORDS=0x08;
94     static final int STR_FUNC_SYMBOL=0x10;
95     static final int STR_FUNC_INDENT=0x20;
96
97     private final int str_squote = 0;
98     private final int str_dquote = STR_FUNC_EXPAND;
99     private final int str_xquote = STR_FUNC_EXPAND;
100     private final int str_regexp = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND;
101     private final int str_ssym = STR_FUNC_SYMBOL;
102     private final int str_dsym = STR_FUNC_SYMBOL | STR_FUNC_EXPAND;
103     
104     public RubyYaccLexer() {
105         reset();
106     }
107     
108     public void reset() {
109         token = 0;
110         yaccValue = null;
111         src = null;
112         lex_state = null;
113         resetStacks();
114         lex_strterm = null;
115         commandStart = true;
116     }
117     
118     /**
119      * How the parser advances to the next token.
120      *
121      * @return true if not at end of file (EOF).
122      */

123     public boolean advance() throws IOException JavaDoc {
124         return (token = yylex()) != EOF;
125     }
126     
127     /**
128      * Last token read from the lexer at the end of a call to yylex()
129      *
130      * @return last token read
131      */

132     public int token() {
133         return token;
134     }
135
136     public StringBuffer JavaDoc getTokenBuffer() {
137         return tokenBuffer;
138     }
139     
140     /**
141      * Value of last token (if it is a token which has a value).
142      *
143      * @return value of last value-laden token
144      */

145     public Object JavaDoc value() {
146         return yaccValue;
147     }
148
149     public ISourcePositionFactory getPositionFactory() {
150         return src.getPositionFactory();
151     }
152     
153     /**
154      * Get position information for Token/Node that follows node represented by startPosition
155      * and current lexer location.
156      *
157      * @param startPosition previous node/token
158      * @param inclusive include previous node into position information of current node
159      * @return a new position
160      */

161     public ISourcePosition getPosition(ISourcePosition startPosition, boolean inclusive) {
162         return src.getPosition(startPosition, inclusive);
163     }
164     
165     public ISourcePosition getPosition() {
166         return src.getPosition(null, false);
167     }
168
169     /**
170      * Parse must pass its support object for some check at bottom of
171      * yylex(). Ruby does it this way as well (i.e. a little parsing
172      * logic in the lexer).
173      *
174      * @param parserSupport
175      */

176     public void setParserSupport(ParserSupport parserSupport) {
177         this.parserSupport = parserSupport;
178     }
179
180     /**
181      * Allow the parser to set the source for its lexer.
182      *
183      * @param source where the lexer gets raw data
184      */

185     public void setSource(LexerSource source) {
186         this.src = source;
187     }
188
189     public StrTerm getStrTerm() {
190         return lex_strterm;
191     }
192     
193     public void setStrTerm(StrTerm strterm) {
194         this.lex_strterm = strterm;
195     }
196
197     public void resetStacks() {
198         conditionState.reset();
199         cmdArgumentState.reset();
200     }
201     
202     public void setWarnings(IRubyWarnings warnings) {
203         this.warnings = warnings;
204     }
205
206
207     public void setState(LexState state) {
208         this.lex_state = state;
209     }
210
211     public StackState getCmdArgumentState() {
212         return cmdArgumentState;
213     }
214
215     public StackState getConditionState() {
216         return conditionState;
217     }
218     
219     public void setValue(Object JavaDoc yaccValue) {
220         this.yaccValue = yaccValue;
221     }
222
223     private boolean isNext_identchar() throws IOException JavaDoc {
224         char c = src.read();
225         src.unread(c);
226
227         return c != EOF && (Character.isLetterOrDigit(c) || c == '-');
228     }
229     
230     private Object JavaDoc getInteger(String JavaDoc value, int radix) {
231         try {
232             return new FixnumNode(getPosition(), Long.parseLong(value, radix));
233         } catch (NumberFormatException JavaDoc e) {
234             return new BignumNode(getPosition(), new BigInteger JavaDoc(value, radix));
235         }
236     }
237
238     /**
239      * Do the next characters from the source match provided String in a case insensitive manner.
240      * If so, then consume those characters and that string. Otherwise, consume none of them and
241      * return null.
242      *
243      * @param s to be matched against
244      * @return string if string matches, null otherwise
245      */

246     private String JavaDoc isNextNoCase(String JavaDoc s) throws IOException JavaDoc {
247         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
248         
249         for (int i = 0; i < s.length(); i++) {
250             char c = s.charAt(i);
251             char r = src.read();
252             buf.append(r);
253             
254             if (Character.toLowerCase(c) != r &&
255                 Character.toUpperCase(c) != r) {
256                 src.unreadMany(buf);
257                 return null;
258             }
259         }
260
261         return buf.toString();
262     }
263
264     /**
265      * @param c the character to test
266      * @return true if character is a hex value (0-9a-f)
267      */

268     static final boolean isHexChar(char c) {
269         return Character.isDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
270     }
271
272     /**
273      * @param c the character to test
274      * @return true if character is an octal value (0-7)
275      */

276     static final boolean isOctChar(char c) {
277         return '0' <= c && c <= '7';
278     }
279     
280     /**
281      * @param c is character to be compared
282      * @return whether c is an identifier or not
283      */

284     private static final boolean isIdentifierChar(char c) {
285         return Character.isLetterOrDigit(c) || c == '_';
286     }
287     
288     /**
289      * What type/kind of quote are we dealing with?
290      *
291      * @param c first character the the quote construct
292      * @return a token that specifies the quote type
293      */

294     private int parseQuote(char c) throws IOException JavaDoc {
295         char begin, end;
296         boolean shortHand;
297         
298         // Short-hand (e.g. %{,%.,%!,... versus %Q{).
299
if (!Character.isLetterOrDigit(c)) {
300             begin = c;
301             c = 'Q';
302             shortHand = true;
303         // Long-hand (e.g. %Q{}).
304
} else {
305             shortHand = false;
306             begin = src.read();
307             if (Character.isLetterOrDigit(begin) /* no mb || ismbchar(term)*/) {
308                 throw new SyntaxException(getPosition(), "unknown type of %string");
309             }
310         }
311         if (c == EOF || begin == EOF) {
312             throw new SyntaxException(getPosition(), "unterminated quoted string meets end of file");
313         }
314         
315         // Figure end-char. '\0' is special to indicate begin=end and that no nesting?
316
if (begin == '(') end = ')';
317         else if (begin == '[') end = ']';
318         else if (begin == '{') end = '}';
319         else if (begin == '<') end = '>';
320         else { end = begin; begin = '\0'; };
321
322         switch (c) {
323         case 'Q':
324             lex_strterm = new StringTerm(str_dquote, end, begin);
325             yaccValue = new Token("%"+ (shortHand ? (""+end) : ("" + c + begin)), getPosition());
326             return Tokens.tSTRING_BEG;
327
328         case 'q':
329             lex_strterm = new StringTerm(str_squote, end, begin);
330             yaccValue = new Token("%"+c+begin, getPosition());
331             return Tokens.tSTRING_BEG;
332
333         case 'W':
334             lex_strterm = new StringTerm(str_dquote | STR_FUNC_QWORDS, end, begin);
335             do {c = src.read();} while (Character.isWhitespace(c));
336             src.unread(c);
337             yaccValue = new Token("%"+c+begin, getPosition());
338             return Tokens.tWORDS_BEG;
339
340         case 'w':
341             lex_strterm = new StringTerm(str_squote | STR_FUNC_QWORDS, end, begin);
342             do {c = src.read();} while (Character.isWhitespace(c));
343             src.unread(c);
344             yaccValue = new Token("%"+c+begin, getPosition());
345             return Tokens.tQWORDS_BEG;
346
347         case 'x':
348             lex_strterm = new StringTerm(str_xquote, end, begin);
349             yaccValue = new Token("%"+c+begin, getPosition());
350             return Tokens.tXSTRING_BEG;
351
352         case 'r':
353             lex_strterm = new StringTerm(str_regexp, end, begin);
354             yaccValue = new Token("%"+c+begin, getPosition());
355             return Tokens.tREGEXP_BEG;
356
357         case 's':
358             lex_strterm = new StringTerm(str_ssym, end, begin);
359             lex_state = LexState.EXPR_FNAME;
360             yaccValue = new Token("%"+c+begin, getPosition());
361             return Tokens.tSYMBEG;
362
363         default:
364             throw new SyntaxException(getPosition(), "Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '" + c + "'.");
365         }
366     }
367     
368     private int hereDocumentIdentifier() throws IOException JavaDoc {
369         char c = src.read();
370         int term;
371
372         int func = 0;
373         if (c == '-') {
374             c = src.read();
375             func = STR_FUNC_INDENT;
376         }
377         
378         if (c == '\'' || c == '"' || c == '`') {
379             if (c == '\'') {
380                 func |= str_squote;
381             } else if (c == '"') {
382                 func |= str_dquote;
383             } else {
384                 func |= str_xquote;
385             }
386
387             tokenBuffer.setLength(0);
388             term = c;
389             while ((c = src.read()) != EOF && c != term) {
390                 tokenBuffer.append(c);
391             }
392             if (c == EOF) {
393                 throw new SyntaxException(getPosition(), "unterminated here document identifier");
394             }
395         } else {
396             if (!isIdentifierChar(c)) {
397                 src.unread(c);
398                 if ((func & STR_FUNC_INDENT) != 0) {
399                     src.unread(c);
400                 }
401                 return 0;
402             }
403             tokenBuffer.setLength(0);
404             term = '"';
405             func |= str_dquote;
406             do {
407                 tokenBuffer.append(c);
408             } while ((c = src.read()) != EOF && isIdentifierChar(c));
409             src.unread(c);
410         }
411
412         String JavaDoc line = src.readLine() + '\n';
413         String JavaDoc tok = tokenBuffer.toString();
414         lex_strterm = new HeredocTerm(tok, func, line);
415
416         if (term == '`') {
417             yaccValue = new Token("`", getPosition());
418             return Tokens.tXSTRING_BEG;
419         }
420         
421         yaccValue = new Token("\"", getPosition());
422         // Hacky: Advance position to eat newline here....
423
getPosition();
424         return Tokens.tSTRING_BEG;
425     }
426     
427     private void arg_ambiguous() {
428         warnings.warning(getPosition(), "Ambiguous first argument; make sure.");
429     }
430
431     /**
432      * Read a comment up to end of line. When found each comment will get stored away into
433      * the parser result so that any interested party can use them as they seem fit. One idea
434      * is that IDE authors can do distance based heuristics to associate these comments to the
435      * AST node they think they belong to.
436      *
437      * @param c last character read from lexer source
438      * @return newline or eof value
439      */

440     protected int readComment(char c) throws IOException JavaDoc {
441         ISourcePosition startPosition = src.getPosition();
442         tokenBuffer.setLength(0);
443         tokenBuffer.append(c);
444
445         // FIXME: Consider making a better LexerSource.readLine
446
while ((c = src.read()) != '\n') {
447             tokenBuffer.append(c);
448             if (c == EOF) {
449                 break;
450             }
451         }
452         src.unread(c);
453         
454         // Store away each comment to parser result so IDEs can do whatever they want with them.
455
ISourcePosition position = startPosition.union(getPosition());
456         parserSupport.getResult().addComment(new CommentNode(position, tokenBuffer.toString()));
457         
458         return c;
459     }
460     
461     /*
462      * Not normally used, but is left in here since it can be useful in debugging
463      * grammar and lexing problems.
464     private void printToken(int token) {
465         //System.out.print("LOC: " + support.getPosition() + " ~ ");
466         
467         switch (token) {
468             case Tokens.yyErrorCode: System.err.print("yyErrorCode,"); break;
469             case Tokens.kCLASS: System.err.print("kClass,"); break;
470             case Tokens.kMODULE: System.err.print("kModule,"); break;
471             case Tokens.kDEF: System.err.print("kDEF,"); break;
472             case Tokens.kUNDEF: System.err.print("kUNDEF,"); break;
473             case Tokens.kBEGIN: System.err.print("kBEGIN,"); break;
474             case Tokens.kRESCUE: System.err.print("kRESCUE,"); break;
475             case Tokens.kENSURE: System.err.print("kENSURE,"); break;
476             case Tokens.kEND: System.err.print("kEND,"); break;
477             case Tokens.kIF: System.err.print("kIF,"); break;
478             case Tokens.kUNLESS: System.err.print("kUNLESS,"); break;
479             case Tokens.kTHEN: System.err.print("kTHEN,"); break;
480             case Tokens.kELSIF: System.err.print("kELSIF,"); break;
481             case Tokens.kELSE: System.err.print("kELSE,"); break;
482             case Tokens.kCASE: System.err.print("kCASE,"); break;
483             case Tokens.kWHEN: System.err.print("kWHEN,"); break;
484             case Tokens.kWHILE: System.err.print("kWHILE,"); break;
485             case Tokens.kUNTIL: System.err.print("kUNTIL,"); break;
486             case Tokens.kFOR: System.err.print("kFOR,"); break;
487             case Tokens.kBREAK: System.err.print("kBREAK,"); break;
488             case Tokens.kNEXT: System.err.print("kNEXT,"); break;
489             case Tokens.kREDO: System.err.print("kREDO,"); break;
490             case Tokens.kRETRY: System.err.print("kRETRY,"); break;
491             case Tokens.kIN: System.err.print("kIN,"); break;
492             case Tokens.kDO: System.err.print("kDO,"); break;
493             case Tokens.kDO_COND: System.err.print("kDO_COND,"); break;
494             case Tokens.kDO_BLOCK: System.err.print("kDO_BLOCK,"); break;
495             case Tokens.kRETURN: System.err.print("kRETURN,"); break;
496             case Tokens.kYIELD: System.err.print("kYIELD,"); break;
497             case Tokens.kSUPER: System.err.print("kSUPER,"); break;
498             case Tokens.kSELF: System.err.print("kSELF,"); break;
499             case Tokens.kNIL: System.err.print("kNIL,"); break;
500             case Tokens.kTRUE: System.err.print("kTRUE,"); break;
501             case Tokens.kFALSE: System.err.print("kFALSE,"); break;
502             case Tokens.kAND: System.err.print("kAND,"); break;
503             case Tokens.kOR: System.err.print("kOR,"); break;
504             case Tokens.kNOT: System.err.print("kNOT,"); break;
505             case Tokens.kIF_MOD: System.err.print("kIF_MOD,"); break;
506             case Tokens.kUNLESS_MOD: System.err.print("kUNLESS_MOD,"); break;
507             case Tokens.kWHILE_MOD: System.err.print("kWHILE_MOD,"); break;
508             case Tokens.kUNTIL_MOD: System.err.print("kUNTIL_MOD,"); break;
509             case Tokens.kRESCUE_MOD: System.err.print("kRESCUE_MOD,"); break;
510             case Tokens.kALIAS: System.err.print("kALIAS,"); break;
511             case Tokens.kDEFINED: System.err.print("kDEFINED,"); break;
512             case Tokens.klBEGIN: System.err.print("klBEGIN,"); break;
513             case Tokens.klEND: System.err.print("klEND,"); break;
514             case Tokens.k__LINE__: System.err.print("k__LINE__,"); break;
515             case Tokens.k__FILE__: System.err.print("k__FILE__,"); break;
516             case Tokens.tIDENTIFIER: System.err.print("tIDENTIFIER["+ value() + "],"); break;
517             case Tokens.tFID: System.err.print("tFID[" + value() + "],"); break;
518             case Tokens.tGVAR: System.err.print("tGVAR[" + value() + "],"); break;
519             case Tokens.tIVAR: System.err.print("tIVAR[" + value() +"],"); break;
520             case Tokens.tCONSTANT: System.err.print("tCONSTANT["+ value() +"],"); break;
521             case Tokens.tCVAR: System.err.print("tCVAR,"); break;
522             case Tokens.tINTEGER: System.err.print("tINTEGER,"); break;
523             case Tokens.tFLOAT: System.err.print("tFLOAT,"); break;
524             case Tokens.tSTRING_CONTENT: System.err.print("tSTRING_CONTENT[" + yaccValue + "],"); break;
525             case Tokens.tSTRING_BEG: System.err.print("tSTRING_BEG,"); break;
526             case Tokens.tSTRING_END: System.err.print("tSTRING_END,"); break;
527             case Tokens.tSTRING_DBEG: System.err.print("STRING_DBEG,"); break;
528             case Tokens.tSTRING_DVAR: System.err.print("tSTRING_DVAR,"); break;
529             case Tokens.tXSTRING_BEG: System.err.print("tXSTRING_BEG,"); break;
530             case Tokens.tREGEXP_BEG: System.err.print("tREGEXP_BEG,"); break;
531             case Tokens.tREGEXP_END: System.err.print("tREGEXP_END,"); break;
532             case Tokens.tWORDS_BEG: System.err.print("tWORDS_BEG,"); break;
533             case Tokens.tQWORDS_BEG: System.err.print("tQWORDS_BEG,"); break;
534             case Tokens.tBACK_REF: System.err.print("tBACK_REF,"); break;
535             case Tokens.tNTH_REF: System.err.print("tNTH_REF,"); break;
536             case Tokens.tUPLUS: System.err.print("tUPLUS"); break;
537             case Tokens.tUMINUS: System.err.print("tUMINUS,"); break;
538             case Tokens.tPOW: System.err.print("tPOW,"); break;
539             case Tokens.tCMP: System.err.print("tCMP,"); break;
540             case Tokens.tEQ: System.err.print("tEQ,"); break;
541             case Tokens.tEQQ: System.err.print("tEQQ,"); break;
542             case Tokens.tNEQ: System.err.print("tNEQ,"); break;
543             case Tokens.tGEQ: System.err.print("tGEQ,"); break;
544             case Tokens.tLEQ: System.err.print("tLEQ,"); break;
545             case Tokens.tANDOP: System.err.print("tANDOP,"); break;
546             case Tokens.tOROP: System.err.print("tOROP,"); break;
547             case Tokens.tMATCH: System.err.print("tMATCH,"); break;
548             case Tokens.tNMATCH: System.err.print("tNMATCH,"); break;
549             case Tokens.tDOT2: System.err.print("tDOT2,"); break;
550             case Tokens.tDOT3: System.err.print("tDOT3,"); break;
551             case Tokens.tAREF: System.err.print("tAREF,"); break;
552             case Tokens.tASET: System.err.print("tASET,"); break;
553             case Tokens.tLSHFT: System.err.print("tLSHFT,"); break;
554             case Tokens.tRSHFT: System.err.print("tRSHFT,"); break;
555             case Tokens.tCOLON2: System.err.print("tCOLON2,"); break;
556             case Tokens.tCOLON3: System.err.print("tCOLON3,"); break;
557             case Tokens.tOP_ASGN: System.err.print("tOP_ASGN,"); break;
558             case Tokens.tASSOC: System.err.print("tASSOC,"); break;
559             case Tokens.tLPAREN: System.err.print("tLPAREN,"); break;
560             case Tokens.tLPAREN_ARG: System.err.print("tLPAREN_ARG,"); break;
561             case Tokens.tLBRACK: System.err.print("tLBRACK,"); break;
562             case Tokens.tLBRACE: System.err.print("tLBRACE,"); break;
563             case Tokens.tSTAR: System.err.print("tSTAR,"); break;
564             case Tokens.tAMPER: System.err.print("tAMPER,"); break;
565             case Tokens.tSYMBEG: System.err.print("tSYMBEG,"); break;
566             case '\n': System.err.println("NL"); break;
567             default: System.err.print("'" + (int)token + "',"); break;
568         }
569     }
570
571     // DEBUGGING HELP
572     private int yylex() {
573         int token = yylex2();
574         
575         printToken(token);
576         
577         return token;
578     }
579     */

580
581     /**
582      * Returns the next token. Also sets yyVal is needed.
583      *
584      *@return Description of the Returned Value
585      */

586     private int yylex() throws IOException JavaDoc {
587         char c;
588         boolean spaceSeen = false;
589         boolean commandState;
590         
591         if (lex_strterm != null) {
592             int tok = lex_strterm.parseString(this, src);
593             if (tok == Tokens.tSTRING_END || tok == Tokens.tREGEXP_END) {
594                 lex_strterm = null;
595                 lex_state = LexState.EXPR_END;
596             }
597             return tok;
598         }
599
600         commandState = commandStart;
601         commandStart = false;
602
603         LexState last_state = lex_state;
604         
605         retry: for(;;) {
606             c = src.read();
607             switch(c) {
608             case '\004': /* ^D */
609             case '\032': /* ^Z */
610             case 0: /* end of script. */
611                 return 0;
612            
613                 /* white spaces */
614             case ' ': case '\t': case '\f': case '\r':
615             case '\13': /* '\v' */
616                 getPosition();
617                 spaceSeen = true;
618                 continue retry;
619             case '#': /* it's a comment */
620                 if (readComment(c) == 0) return 0;
621                     
622                 /* fall through */
623             case '\n':
624                 // Replace a string of newlines with a single one
625
while((c = src.read()) == '\n') {
626                     
627                 }
628                 src.unread( c );
629                 getPosition();
630
631                 if (lex_state == LexState.EXPR_BEG ||
632                     lex_state == LexState.EXPR_FNAME ||
633                     lex_state == LexState.EXPR_DOT ||
634                     lex_state == LexState.EXPR_CLASS) {
635                     continue retry;
636                 }
637
638                 commandStart = true;
639                 lex_state = LexState.EXPR_BEG;
640                 return '\n';
641                 
642             case '*':
643                 if ((c = src.read()) == '*') {
644                     if ((c = src.read()) == '=') {
645                         lex_state = LexState.EXPR_BEG;
646                         yaccValue = new Token("**", getPosition());
647                         return Tokens.tOP_ASGN;
648                     }
649                     src.unread(c);
650                     yaccValue = new Token("**", getPosition());
651                     c = Tokens.tPOW;
652                 } else {
653                     if (c == '=') {
654                         lex_state = LexState.EXPR_BEG;
655                         yaccValue = new Token("*", getPosition());
656                         return Tokens.tOP_ASGN;
657                     }
658                     src.unread(c);
659                     if (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c)) {
660                         warnings.warning(getPosition(), "`*' interpreted as argument prefix");
661                         c = Tokens.tSTAR;
662                     } else if (lex_state == LexState.EXPR_BEG ||
663                             lex_state == LexState.EXPR_MID) {
664                         c = Tokens.tSTAR;
665                     } else {
666                         c = Tokens.tSTAR2;
667                     }
668                     yaccValue = new Token("*", getPosition());
669                 }
670                 if (lex_state == LexState.EXPR_FNAME ||
671                     lex_state == LexState.EXPR_DOT) {
672                     lex_state = LexState.EXPR_ARG;
673                 } else {
674                     lex_state = LexState.EXPR_BEG;
675                 }
676                 return c;
677
678             case '!':
679                 lex_state = LexState.EXPR_BEG;
680                 if ((c = src.read()) == '=') {
681                  yaccValue = new Token("!=",getPosition());
682                  return Tokens.tNEQ;
683                 }
684                 if (c == '~') {
685                     yaccValue = new Token("!~",getPosition());
686                     return Tokens.tNMATCH;
687                 }
688                 src.unread(c);
689                 yaccValue = new Token("!",getPosition());
690                 return Tokens.tBANG;
691
692             case '=':
693                 // documentation nodes
694
if (src.wasBeginOfLine()) {
695                     String JavaDoc equalLabel;
696                     if ((equalLabel = isNextNoCase("begin")) != null) {
697                         tokenBuffer.setLength(0);
698                         tokenBuffer.append(equalLabel);
699                         c = src.read();
700                         
701                         if (Character.isWhitespace(c)) {
702                             // In case last next was the newline.
703
src.unread(c);
704                             for (;;) {
705                                 c = src.read();
706                                 tokenBuffer.append(c);
707
708                                 // If a line is followed by a blank line put
709
// it back.
710
while (c == '\n') {
711                                     c = src.read();
712                                     tokenBuffer.append(c);
713                                 }
714                                 if (c == EOF) {
715                                     throw new SyntaxException(getPosition(), "embedded document meets end of file");
716                                 }
717                                 if (c != '=') continue;
718                                 if (src.wasBeginOfLine() && (equalLabel = isNextNoCase("end")) != null) {
719                                     tokenBuffer.append(equalLabel);
720                                     tokenBuffer.append(src.readLine());
721                                     src.unread('\n');
722                                     break;
723                                 }
724                             }
725                             
726                             parserSupport.getResult().addComment(new CommentNode(getPosition(), tokenBuffer.toString()));
727                             continue retry;
728                         }
729                         src.unread(c);
730                     }
731                 }
732
733                 if (lex_state == LexState.EXPR_FNAME || lex_state == LexState.EXPR_DOT) {
734                     lex_state = LexState.EXPR_ARG;
735                 } else {
736                     lex_state = LexState.EXPR_BEG;
737                 }
738
739                 c = src.read();
740                 if (c == '=') {
741                     c = src.read();
742                     if (c == '=') {
743                         yaccValue = new Token("===", getPosition());
744                         return Tokens.tEQQ;
745                     }
746                     src.unread(c);
747                     yaccValue = new Token("==", getPosition());
748                     return Tokens.tEQ;
749                 }
750                 if (c == '~') {
751                     yaccValue = new Token("=~", getPosition());
752                     return Tokens.tMATCH;
753                 } else if (c == '>') {
754                     yaccValue = new Token("=>", getPosition());
755                     return Tokens.tASSOC;
756                 }
757                 src.unread(c);
758                 yaccValue = new Token("=", getPosition());
759                 return '=';
760                 
761             case '<':
762                 c = src.read();
763                 if (c == '<' &&
764                         lex_state != LexState.EXPR_END &&
765                         lex_state != LexState.EXPR_DOT &&
766                         lex_state != LexState.EXPR_ENDARG &&
767                         lex_state != LexState.EXPR_CLASS &&
768                         (!lex_state.isArgument() || spaceSeen)) {
769                     int tok = hereDocumentIdentifier();
770                     if (tok != 0) return tok;
771                 }
772                 if (lex_state == LexState.EXPR_FNAME ||
773                     lex_state == LexState.EXPR_DOT) {
774                     lex_state = LexState.EXPR_ARG;
775                 } else {
776                     lex_state = LexState.EXPR_BEG;
777                 }
778                 if (c == '=') {
779                     if ((c = src.read()) == '>') {
780                         yaccValue = new Token("<=>", getPosition());
781                         return Tokens.tCMP;
782                     }
783                     src.unread(c);
784                     yaccValue = new Token("<=", getPosition());
785                     return Tokens.tLEQ;
786                 }
787                 if (c == '<') {
788                     if ((c = src.read()) == '=') {
789                         lex_state = LexState.EXPR_BEG;
790                         yaccValue = new Token("<<", getPosition());
791                         return Tokens.tOP_ASGN;
792                     }
793                     src.unread(c);
794                     yaccValue = new Token("<<", getPosition());
795                     return Tokens.tLSHFT;
796                 }
797                 yaccValue = new Token("<", getPosition());
798                 src.unread(c);
799                 return Tokens.tLT;
800                 
801             case '>':
802                 if (lex_state == LexState.EXPR_FNAME ||
803                     lex_state == LexState.EXPR_DOT) {
804                     lex_state = LexState.EXPR_ARG;
805                 } else {
806                     lex_state = LexState.EXPR_BEG;
807                 }
808
809                 if ((c = src.read()) == '=') {
810                     yaccValue = new Token(">=", getPosition());
811                     return Tokens.tGEQ;
812                 }
813                 if (c == '>') {
814                     if ((c = src.read()) == '=') {
815                         lex_state = LexState.EXPR_BEG;
816                         yaccValue = new Token(">>", getPosition());
817                         return Tokens.tOP_ASGN;
818                     }
819                     src.unread(c);
820                     yaccValue = new Token(">>", getPosition());
821                     return Tokens.tRSHFT;
822                 }
823                 src.unread(c);
824                 yaccValue = new Token(">", getPosition());
825                 return Tokens.tGT;
826
827             case '"':
828                 lex_strterm = new StringTerm(str_dquote, '"', '\0');
829                 yaccValue = new Token("\"", getPosition());
830                 return Tokens.tSTRING_BEG;
831
832             case '`':
833                 yaccValue = new Token("`", getPosition());
834                 if (lex_state == LexState.EXPR_FNAME) {
835                     lex_state = LexState.EXPR_END;
836                     return Tokens.tBACK_REF2;
837                 }
838                 if (lex_state == LexState.EXPR_DOT) {
839                     if (commandState) {
840                         lex_state = LexState.EXPR_CMDARG;
841                     } else {
842                         lex_state = LexState.EXPR_ARG;
843                     }
844                     return Tokens.tBACK_REF2;
845                 }
846                 lex_strterm = new StringTerm(str_xquote, '`', '\0');
847                 return Tokens.tXSTRING_BEG;
848
849             case '\'':
850                 lex_strterm = new StringTerm(str_squote, '\'', '\0');
851                 yaccValue = new Token("'", getPosition());
852                 return Tokens.tSTRING_BEG;
853
854             case '?':
855                 if (lex_state == LexState.EXPR_END ||
856                     lex_state == LexState.EXPR_ENDARG) {
857                     lex_state = LexState.EXPR_BEG;
858                     yaccValue = new Token("?",getPosition());
859                     return '?';
860                 }
861                 c = src.read();
862                 if (c == EOF) {
863                     throw new SyntaxException(getPosition(), "incomplete character syntax");
864                 }
865                 if (Character.isWhitespace(c)){
866                     if (!lex_state.isArgument()){
867                         int c2 = 0;
868                         switch (c) {
869                         case ' ':
870                             c2 = 's';
871                             break;
872                         case '\n':
873                             c2 = 'n';
874                             break;
875                         case '\t':
876                             c2 = 't';
877                             break;
878                             /* What is \v in C?
879                         case '\v':
880                             c2 = 'v';
881                             break;
882                             */

883                         case '\r':
884                             c2 = 'r';
885                             break;
886                         case '\f':
887                             c2 = 'f';
888                             break;
889                         }
890                         if (c2 != 0) {
891                             warnings.warn(getPosition(), "invalid character syntax; use ?\\" + c2);
892                         }
893                     }
894                     src.unread(c);
895                     lex_state = LexState.EXPR_BEG;
896                     yaccValue = new Token("?", getPosition());
897                     return '?';
898                 /*} else if (ismbchar(c)) { // ruby - we don't support them either?
899                     rb_warn("multibyte character literal not supported yet; use ?\\" + c);
900                     support.unread(c);
901                     lexState = LexState.EXPR_BEG;
902                     return '?';*/

903                 } else if ((Character.isLetterOrDigit(c) || c == '_') &&
904                         !src.peek('\n') && isNext_identchar()) {
905                     src.unread(c);
906                     lex_state = LexState.EXPR_BEG;
907                     yaccValue = new Token("?", getPosition());
908                     return '?';
909                 } else if (c == '\\') {
910                     c = src.readEscape();
911                 }
912                 c &= 0xff;
913                 lex_state = LexState.EXPR_END;
914                 yaccValue = new FixnumNode(getPosition(), c);
915                 return Tokens.tINTEGER;
916
917             case '&':
918                 if ((c = src.read()) == '&') {
919                     lex_state = LexState.EXPR_BEG;
920                     if ((c = src.read()) == '=') {
921                         yaccValue = new Token("&&", getPosition());
922                         lex_state = LexState.EXPR_BEG;
923                         return Tokens.tOP_ASGN;
924                     }
925                     src.unread(c);
926                     yaccValue = new Token("&&", getPosition());
927                     return Tokens.tANDOP;
928                 }
929                 else if (c == '=') {
930                     yaccValue = new Token("&", getPosition());
931                     lex_state = LexState.EXPR_BEG;
932                     return Tokens.tOP_ASGN;
933                 }
934                 src.unread(c);
935                 //tmpPosition is required because getPosition()'s side effects.
936
//if the warning is generated, the getPosition() on line 954 (this line + 18) will create
937
//a wrong position if the "inclusive" flag is not set.
938
ISourcePosition tmpPosition = getPosition();
939                 if (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c)){
940                     warnings.warning(tmpPosition, "`&' interpreted as argument prefix");
941                     c = Tokens.tAMPER;
942                 } else if (lex_state == LexState.EXPR_BEG ||
943                         lex_state == LexState.EXPR_MID) {
944                     c = Tokens.tAMPER;
945                 } else {
946                     c = Tokens.tAMPER2;
947                 }
948                 
949                 if (lex_state == LexState.EXPR_FNAME ||
950                     lex_state == LexState.EXPR_DOT) {
951                     lex_state = LexState.EXPR_ARG;
952                 } else {
953                     lex_state = LexState.EXPR_BEG;
954                 }
955                 yaccValue = new Token("&", tmpPosition);
956                 return c;
957                 
958             case '|':
959                 if ((c = src.read()) == '|') {
960                     lex_state = LexState.EXPR_BEG;
961                     if ((c = src.read()) == '=') {
962                         lex_state = LexState.EXPR_BEG;
963                         yaccValue = new Token("||", getPosition());
964                         return Tokens.tOP_ASGN;
965                     }
966                     src.unread(c);
967                     yaccValue = new Token("||", getPosition());
968                     return Tokens.tOROP;
969                 }
970                 if (c == '=') {
971                     lex_state = LexState.EXPR_BEG;
972                     yaccValue = new Token("|", getPosition());
973                     return Tokens.tOP_ASGN;
974                 }
975                 if (lex_state == LexState.EXPR_FNAME ||
976                     lex_state == LexState.EXPR_DOT) {
977                     lex_state = LexState.EXPR_ARG;
978                 } else {
979                     lex_state = LexState.EXPR_BEG;
980                 }
981                 src.unread(c);
982                 yaccValue = new Token("|", getPosition());
983                 return Tokens.tPIPE;
984
985             case '+':
986                 c = src.read();
987                 if (lex_state == LexState.EXPR_FNAME ||
988                     lex_state == LexState.EXPR_DOT) {
989                     lex_state = LexState.EXPR_ARG;
990                     if (c == '@') {
991                         yaccValue = new Token("+@", getPosition());
992                         return Tokens.tUPLUS;
993                     }
994                     src.unread(c);
995                     yaccValue = new Token("+", getPosition());
996                     return Tokens.tPLUS;
997                 }
998                 if (c == '=') {
999                     lex_state = LexState.EXPR_BEG;
1000                    yaccValue = new Token("+", getPosition());
1001                    return Tokens.tOP_ASGN;
1002                }
1003                if (lex_state == LexState.EXPR_BEG ||
1004                    lex_state == LexState.EXPR_MID ||
1005                        (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c))) {
1006                    if (lex_state.isArgument()) arg_ambiguous();
1007                    lex_state = LexState.EXPR_BEG;
1008                    src.unread(c);
1009                    if (Character.isDigit(c)) {
1010                        c = '+';
1011                        return parseNumber(c);
1012                    }
1013                    yaccValue = new Token("+", getPosition());
1014                    return Tokens.tUPLUS;
1015                }
1016                lex_state = LexState.EXPR_BEG;
1017                src.unread(c);
1018                yaccValue = new Token("+", getPosition());
1019                return Tokens.tPLUS;
1020
1021            case '-':
1022                c = src.read();
1023                if (lex_state == LexState.EXPR_FNAME || lex_state == LexState.EXPR_DOT) {
1024                    lex_state = LexState.EXPR_ARG;
1025                    if (c == '@') {
1026                        yaccValue = new Token("-@", getPosition());
1027                        return Tokens.tUMINUS;
1028                    }
1029                    src.unread(c);
1030                    yaccValue = new Token("-", getPosition());
1031                    return Tokens.tMINUS;
1032                }
1033                if (c == '=') {
1034                    lex_state = LexState.EXPR_BEG;
1035                    yaccValue = new Token("-", getPosition());
1036                    return Tokens.tOP_ASGN;
1037                }
1038                if (lex_state == LexState.EXPR_BEG || lex_state == LexState.EXPR_MID ||
1039                        (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c))) {
1040                    if (lex_state.isArgument()) arg_ambiguous();
1041                    lex_state = LexState.EXPR_BEG;
1042                    src.unread(c);
1043                    yaccValue = new Token("-", getPosition());
1044                    if (Character.isDigit(c)) {
1045                        return Tokens.tUMINUS_NUM;
1046                    }
1047                    return Tokens.tUMINUS;
1048                }
1049                lex_state = LexState.EXPR_BEG;
1050                src.unread(c);
1051                yaccValue = new Token("-", getPosition());
1052                return Tokens.tMINUS;
1053                
1054            case '.':
1055                lex_state = LexState.EXPR_BEG;
1056                if ((c = src.read()) == '.') {
1057                    if ((c = src.read()) == '.') {
1058                        yaccValue = new Token("...", getPosition());
1059                        return Tokens.tDOT3;
1060                    }
1061                    src.unread(c);
1062                    yaccValue = new Token("..", getPosition());
1063                    return Tokens.tDOT2;
1064                }
1065                src.unread(c);
1066                if (Character.isDigit(c)) {
1067                    throw new SyntaxException(getPosition(), "no .<digit> floating literal anymore; put 0 before dot");
1068                }
1069                lex_state = LexState.EXPR_DOT;
1070                yaccValue = new Token(".", getPosition());
1071                return Tokens.tDOT;
1072            case '0' : case '1' : case '2' : case '3' : case '4' :
1073            case '5' : case '6' : case '7' : case '8' : case '9' :
1074                return parseNumber(c);
1075                
1076            case ')':
1077                conditionState.restart();
1078                cmdArgumentState.restart();
1079                lex_state = LexState.EXPR_END;
1080                yaccValue = new Token(")", getPosition());
1081                return Tokens.tRPAREN;
1082            case ']':
1083                conditionState.restart();
1084                cmdArgumentState.restart();
1085                lex_state = LexState.EXPR_END;
1086                yaccValue = new Token(")", getPosition());
1087                return Tokens.tRBRACK;
1088            case '}':
1089                conditionState.restart();
1090                cmdArgumentState.restart();
1091                lex_state = LexState.EXPR_END;
1092                yaccValue = new Token("}",getPosition());
1093                return Tokens.tRCURLY;
1094
1095            case ':':
1096                c = src.read();
1097                if (c == ':') {
1098                    if (lex_state == LexState.EXPR_BEG ||
1099                        lex_state == LexState.EXPR_MID ||
1100                        lex_state == LexState.EXPR_CLASS ||
1101                        (lex_state.isArgument() && spaceSeen)) {
1102                        lex_state = LexState.EXPR_BEG;
1103                        yaccValue = new Token("::", getPosition());
1104                        return Tokens.tCOLON3;
1105                    }
1106                    lex_state = LexState.EXPR_DOT;
1107                    yaccValue = new Token(":",getPosition());
1108                    return Tokens.tCOLON2;
1109                }
1110                if (lex_state == LexState.EXPR_END ||
1111                    lex_state == LexState.EXPR_ENDARG || Character.isWhitespace(c)) {
1112                    src.unread(c);
1113                    lex_state = LexState.EXPR_BEG;
1114                    yaccValue = new Token(":",getPosition());
1115                    return ':';
1116                }
1117                switch (c) {
1118                case '\'':
1119                    lex_strterm = new StringTerm(str_ssym, c, '\0');
1120                    break;
1121                case '"':
1122                    lex_strterm = new StringTerm(str_dsym, c, '\0');
1123                    break;
1124                default:
1125                    src.unread(c);
1126                    break;
1127                }
1128                lex_state = LexState.EXPR_FNAME;
1129                yaccValue = new Token(":", getPosition());
1130                return Tokens.tSYMBEG;
1131
1132            case '/':
1133                if (lex_state == LexState.EXPR_BEG ||
1134                    lex_state == LexState.EXPR_MID) {
1135                    lex_strterm = new StringTerm(str_regexp, '/', '\0');
1136                    yaccValue = new Token("/",getPosition());
1137                    return Tokens.tREGEXP_BEG;
1138                }
1139                
1140                if ((c = src.read()) == '=') {
1141                    yaccValue = new Token("/", getPosition());
1142                    lex_state = LexState.EXPR_BEG;
1143                    return Tokens.tOP_ASGN;
1144                }
1145                src.unread(c);
1146                if (lex_state.isArgument() && spaceSeen) {
1147                    if (!Character.isWhitespace(c)) {
1148                        arg_ambiguous();
1149                        lex_strterm = new StringTerm(str_regexp, '/', '\0');
1150                        yaccValue = new Token("/",getPosition());
1151                        return Tokens.tREGEXP_BEG;
1152                    }
1153                }
1154                if (lex_state == LexState.EXPR_FNAME ||
1155                    lex_state == LexState.EXPR_DOT) {
1156                    lex_state = LexState.EXPR_ARG;
1157                } else {
1158                    lex_state = LexState.EXPR_BEG;
1159                }
1160                yaccValue = new Token("/", getPosition());
1161                return Tokens.tDIVIDE;
1162
1163            case '^':
1164                if ((c = src.read()) == '=') {
1165                    lex_state = LexState.EXPR_BEG;
1166                    yaccValue = new Token("^", getPosition());
1167                    return Tokens.tOP_ASGN;
1168                }
1169                if (lex_state == LexState.EXPR_FNAME ||
1170                    lex_state == LexState.EXPR_DOT) {
1171                    lex_state = LexState.EXPR_ARG;
1172                } else {
1173                    lex_state = LexState.EXPR_BEG;
1174                }
1175                src.unread(c);
1176                yaccValue = new Token("^", getPosition());
1177                return Tokens.tCARET;
1178
1179            case ';':
1180                commandStart = true;
1181            case ',':
1182                lex_state = LexState.EXPR_BEG;
1183                yaccValue = new Token(",", getPosition());
1184                return c;
1185
1186            case '~':
1187                if (lex_state == LexState.EXPR_FNAME ||
1188                    lex_state == LexState.EXPR_DOT) {
1189                    if ((c = src.read()) != '@') {
1190                        src.unread(c);
1191                    }
1192                }
1193                if (lex_state == LexState.EXPR_FNAME ||
1194                        lex_state == LexState.EXPR_DOT) {
1195                    lex_state = LexState.EXPR_ARG;
1196                } else {
1197                    lex_state = LexState.EXPR_BEG;
1198                }
1199                yaccValue = new Token("~", getPosition());
1200                return Tokens.tTILDE;
1201            case '(':
1202                c = Tokens.tLPAREN2;
1203                commandStart = true;
1204                if (lex_state == LexState.EXPR_BEG ||
1205                    lex_state == LexState.EXPR_MID) {
1206                    c = Tokens.tLPAREN;
1207                } else if (spaceSeen) {
1208                    if (lex_state == LexState.EXPR_CMDARG) {
1209                        c = Tokens.tLPAREN_ARG;
1210                    } else if (lex_state == LexState.EXPR_ARG) {
1211                        warnings.warn(getPosition(), "don't put space before argument parentheses");
1212                        c = Tokens.tLPAREN2;
1213                    }
1214                }
1215                conditionState.stop();
1216                cmdArgumentState.stop();
1217                lex_state = LexState.EXPR_BEG;
1218                yaccValue = new Token("(", getPosition());
1219                return c;
1220
1221            case '[':
1222                if (lex_state == LexState.EXPR_FNAME ||
1223                    lex_state == LexState.EXPR_DOT) {
1224                    lex_state = LexState.EXPR_ARG;
1225                    if ((c = src.read()) == ']') {
1226                        if ((c = src.read()) == '=') {
1227                            yaccValue = new Token("[]=", getPosition());
1228                            return Tokens.tASET;
1229                        }
1230                        yaccValue = new Token("[]", getPosition());
1231                        src.unread(c);
1232                        return Tokens.tAREF;
1233                    }
1234                    src.unread(c);
1235                    yaccValue = new Token("[", getPosition());
1236                    return '[';
1237                } else if (lex_state == LexState.EXPR_BEG ||
1238                           lex_state == LexState.EXPR_MID) {
1239                    c = Tokens.tLBRACK;
1240                } else if (lex_state.isArgument() && spaceSeen) {
1241                    c = Tokens.tLBRACK;
1242                }
1243                lex_state = LexState.EXPR_BEG;
1244                conditionState.stop();
1245                cmdArgumentState.stop();
1246                yaccValue = new Token("[", getPosition());
1247                return c;
1248                
1249            case '{':
1250                c = Tokens.tLCURLY;
1251                
1252                if (lex_state.isArgument() || lex_state == LexState.EXPR_END) {
1253                    c = Tokens.tLCURLY; /* block (primary) */
1254                } else if (lex_state == LexState.EXPR_ENDARG) {
1255                    c = Tokens.tLBRACE_ARG; /* block (expr) */
1256                } else {
1257                    c = Tokens.tLBRACE; /* hash */
1258                }
1259                conditionState.stop();
1260                cmdArgumentState.stop();
1261                lex_state = LexState.EXPR_BEG;
1262                yaccValue = new Token("{", getPosition());
1263                return c;
1264
1265            case '\\':
1266                c = src.read();
1267                if (c == '\n') {
1268                    spaceSeen = true;
1269                    continue retry; /* skip \\n */
1270                }
1271                src.unread(c);
1272                yaccValue = new Token("\\", getPosition());
1273                return '\\';
1274
1275            case '%':
1276                if (lex_state == LexState.EXPR_BEG ||
1277                    lex_state == LexState.EXPR_MID) {
1278                    return parseQuote(src.read());
1279                }
1280                if ((c = src.read()) == '=') {
1281                    lex_state = LexState.EXPR_BEG;
1282                    yaccValue = new Token("%", getPosition());
1283                    return Tokens.tOP_ASGN;
1284                }
1285                if (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c)) {
1286                    return parseQuote(c);
1287                }
1288                if (lex_state == LexState.EXPR_FNAME ||
1289                    lex_state == LexState.EXPR_DOT) {
1290                    lex_state = LexState.EXPR_ARG;
1291                } else {
1292                    lex_state = LexState.EXPR_BEG;
1293                }
1294                src.unread(c);
1295                yaccValue = new Token("%", getPosition());
1296                return Tokens.tPERCENT;
1297
1298            case '$':
1299                lex_state = LexState.EXPR_END;
1300                tokenBuffer.setLength(0);
1301                c = src.read();
1302                switch (c) {
1303                case '_': /* $_: last read line string */
1304                    c = src.read();
1305                    if (isIdentifierChar(c)) {
1306                        tokenBuffer.append('$');
1307                        tokenBuffer.append('_');
1308                        break;
1309                    }
1310                    src.unread(c);
1311                    c = '_';
1312                    /* fall through */
1313                case '*': /* $*: argv */
1314                case '$': /* $$: pid */
1315                case '?': /* $?: last status */
1316                case '!': /* $!: error string */
1317                case '@': /* $@: error position */
1318                case '/': /* $/: input record separator */
1319                case '\\': /* $\: output record separator */
1320                case ';': /* $;: field separator */
1321                case ',': /* $,: output field separator */
1322                case '.': /* $.: last read line number */
1323                case '=': /* $=: ignorecase */
1324                case ':': /* $:: load path */
1325                case '<': /* $<: reading filename */
1326                case '>': /* $>: default output handle */
1327                case '\"': /* $": already loaded files */
1328                    tokenBuffer.append('$');
1329                    tokenBuffer.append(c);
1330                    yaccValue = new Token(tokenBuffer.toString(), getPosition());
1331                    return Tokens.tGVAR;
1332
1333                case '-':
1334                    tokenBuffer.append('$');
1335                    tokenBuffer.append(c);
1336                    c = src.read();
1337                    if (isIdentifierChar(c)) {
1338                        tokenBuffer.append(c);
1339                    } else {
1340                        src.unread(c);
1341                    }
1342                    yaccValue = new Token(tokenBuffer.toString(), getPosition());
1343                    /* xxx shouldn't check if valid option variable */
1344                    return Tokens.tGVAR;
1345
1346                case '~': /* $~: match-data */
1347                case '&': /* $&: last match */
1348                case '`': /* $`: string before last match */
1349                case '\'': /* $': string after last match */
1350                case '+': /* $+: string matches last paren. */
1351                    yaccValue = new BackRefNode(getPosition(), c);
1352                    return Tokens.tBACK_REF;
1353
1354                case '1': case '2': case '3':
1355                case '4': case '5': case '6':
1356                case '7': case '8': case '9':
1357                    tokenBuffer.append('$');
1358                    do {
1359                        tokenBuffer.append(c);
1360                        c = src.read();
1361                    } while (Character.isDigit(c));
1362                    src.unread(c);
1363                    if(last_state == LexState.EXPR_FNAME) {
1364                        yaccValue = new Token(tokenBuffer.toString(), getPosition());
1365                        return Tokens.tGVAR;
1366                    } else {
1367                        yaccValue = new NthRefNode(getPosition(), Integer.parseInt(tokenBuffer.substring(1)));
1368                        return Tokens.tNTH_REF;
1369                    }
1370                default:
1371                    if (!isIdentifierChar(c)) {
1372                        src.unread(c);
1373                        yaccValue = new Token("$", getPosition());
1374                        return '$';
1375                    }
1376                case '0':
1377                    tokenBuffer.append('$');
1378                }
1379                break;
1380
1381            case '@':
1382                c = src.read();
1383                tokenBuffer.setLength(0);
1384                tokenBuffer.append('@');
1385                if (c == '@') {
1386                    tokenBuffer.append('@');
1387                    c = src.read();
1388                }
1389                if (Character.isDigit(c)) {
1390                    if (tokenBuffer.length() == 1) {
1391                        throw new SyntaxException(getPosition(), "`@" + c + "' is not allowed as an instance variable name");
1392                    }
1393                    throw new SyntaxException(getPosition(), "`@@" + c + "' is not allowed as a class variable name");
1394                }
1395                if (!isIdentifierChar(c)) {
1396                    src.unread(c);
1397                    yaccValue = new Token("@", getPosition());
1398                    return '@';
1399                }
1400                break;
1401
1402            case '_':
1403                if (src.wasBeginOfLine() && src.matchString("_END__\n", false)) {
1404                    parserSupport.getResult().setEndSeen(true);
1405                    return 0;
1406                }
1407                tokenBuffer.setLength(0);
1408                break;
1409
1410            default:
1411                if (!isIdentifierChar(c)) {
1412                    throw new SyntaxException(getPosition(), "Invalid char `\\" + new PrintfFormat("%.3o").sprintf(c) + "' in expression");
1413                }
1414            
1415                tokenBuffer.setLength(0);
1416                break;
1417            }
1418    
1419            do {
1420                tokenBuffer.append(c);
1421                /* no special multibyte character handling is needed in Java
1422                 * if (ismbchar(c)) {
1423                    int i, len = mbclen(c)-1;
1424
1425                    for (i = 0; i < len; i++) {
1426                        c = src.read();
1427                        tokenBuffer.append(c);
1428                    }
1429                }*/

1430                c = src.read();
1431            } while (isIdentifierChar(c));
1432            
1433            char peek = src.read();
1434            if ((c == '!' || c == '?') &&
1435                isIdentifierChar(tokenBuffer.charAt(0)) && peek != '=') {
1436                src.unread(peek);
1437                tokenBuffer.append(c);
1438            } else {
1439                src.unread(peek);
1440                src.unread(c);
1441            }
1442            
1443            int result = 0;
1444
1445            switch (tokenBuffer.charAt(0)) {
1446                case '$':
1447                    lex_state = LexState.EXPR_END;
1448                    result = Tokens.tGVAR;
1449                    break;
1450                case '@':
1451                    lex_state = LexState.EXPR_END;
1452                    if (tokenBuffer.charAt(1) == '@') {
1453                        result = Tokens.tCVAR;
1454                    } else {
1455                        result = Tokens.tIVAR;
1456                    }
1457                    break;
1458
1459                default:
1460                    char last = tokenBuffer.charAt(tokenBuffer.length() - 1);
1461                    if (last == '!' || last == '?') {
1462                        result = Tokens.tFID;
1463                    } else {
1464                        if (lex_state == LexState.EXPR_FNAME) {
1465                            if ((c = src.read()) == '=') {
1466                                char c2 = src.read();
1467                            
1468                                if (c2 != '~' && c2 != '>' &&
1469                                    (c2 != '=' || (c2 == '\n' && src.peek('>')))) {
1470                                    result = Tokens.tIDENTIFIER;
1471                                    tokenBuffer.append(c);
1472                                    src.unread(c2);
1473                                } else {
1474                                    src.unread(c2);
1475                                    src.unread(c);
1476                                }
1477                            } else {
1478                                src.unread(c);
1479                            }
1480                        }
1481                        if (result == 0 && Character.isUpperCase(tokenBuffer.charAt(0))) {
1482                            result = Tokens.tCONSTANT;
1483                        } else {
1484                            result = Tokens.tIDENTIFIER;
1485                        }
1486                    }
1487
1488                    if (lex_state != LexState.EXPR_DOT) {
1489                        /* See if it is a reserved word. */
1490                        Keyword keyword = Keyword.getKeyword(tokenBuffer.toString(), tokenBuffer.length());
1491                        if (keyword != null) {
1492                            // enum lex_state
1493
LexState state = lex_state;
1494
1495                            lex_state = keyword.state;
1496                            if (state.isExprFName()) {
1497                                yaccValue = new Token(keyword.name, getPosition());
1498                            } else {
1499                                yaccValue = new Token(tokenBuffer.toString(), getPosition());
1500                            }
1501                            if (keyword.id0 == Tokens.kDO) {
1502                                if (conditionState.isInState()) {
1503                                    return Tokens.kDO_COND;
1504                                }
1505                                if (cmdArgumentState.isInState() && state != LexState.EXPR_CMDARG) {
1506                                    return Tokens.kDO_BLOCK;
1507                                }
1508                                if (state == LexState.EXPR_ENDARG) {
1509                                    return Tokens.kDO_BLOCK;
1510                                }
1511                                return Tokens.kDO;
1512                            }
1513
1514                            if (state == LexState.EXPR_BEG) {
1515                                return keyword.id0;
1516                            }
1517                            if (keyword.id0 != keyword.id1) {
1518                                lex_state = LexState.EXPR_BEG;
1519                            }
1520                            return keyword.id1;
1521                        }
1522                    }
1523
1524                    if (lex_state == LexState.EXPR_BEG ||
1525                            lex_state == LexState.EXPR_MID ||
1526                            lex_state == LexState.EXPR_DOT ||
1527                            lex_state == LexState.EXPR_ARG ||
1528                            lex_state == LexState.EXPR_CMDARG) {
1529                        if (commandState) {
1530                            lex_state = LexState.EXPR_CMDARG;
1531                        } else {
1532                            lex_state = LexState.EXPR_ARG;
1533                        }
1534                    } else {
1535                        lex_state = LexState.EXPR_END;
1536                    }
1537            }
1538            
1539            String JavaDoc tempVal = tokenBuffer.toString();
1540
1541            // Lame: parsing logic made it into lexer in ruby...So we
1542
// are emulating
1543
// FIXME: I believe this is much simpler now...
1544
StaticScope scope = parserSupport.getCurrentScope();
1545            if (IdUtil.getVarType(tempVal) == IdUtil.LOCAL_VAR &&
1546                    (scope instanceof BlockStaticScope && (scope.isDefined(tempVal) >= 0)) ||
1547                    (scope.getLocalScope().isDefined(tempVal) >= 0)) {
1548                lex_state = LexState.EXPR_END;
1549            }
1550
1551            yaccValue = new Token(tempVal, getPosition());
1552
1553            return result;
1554        }
1555    }
1556
1557    /**
1558     * Parse a number from the input stream.
1559     *
1560     *@param c The first character of the number.
1561     *@return A int constant wich represents a token.
1562     */

1563    private int parseNumber(char c) throws IOException JavaDoc {
1564        lex_state = LexState.EXPR_END;
1565
1566        tokenBuffer.setLength(0);
1567
1568        if (c == '-') {
1569            tokenBuffer.append(c);
1570            c = src.read();
1571        } else if (c == '+') {
1572            // We don't append '+' since Java number parser gets confused
1573
c = src.read();
1574        }
1575        
1576        char nondigit = '\0';
1577
1578        if (c == '0') {
1579            int startLen = tokenBuffer.length();
1580
1581            switch (c = src.read()) {
1582                case 'x' :
1583                case 'X' : // hexadecimal
1584
c = src.read();
1585                    if (isHexChar(c)) {
1586                        for (;; c = src.read()) {
1587                            if (c == '_') {
1588                                if (nondigit != '\0') {
1589                                    break;
1590                                }
1591                                nondigit = c;
1592                            } else if (isHexChar(c)) {
1593                                nondigit = '\0';
1594                                tokenBuffer.append(c);
1595                            } else {
1596                                break;
1597                            }
1598                        }
1599                    }
1600                    src.unread(c);
1601
1602                    if (tokenBuffer.length() == startLen) {
1603                        throw new SyntaxException(getPosition(), "Hexadecimal number without hex-digits.");
1604                    } else if (nondigit != '\0') {
1605                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
1606                    }
1607                    yaccValue = getInteger(tokenBuffer.toString(), 16);
1608                    return Tokens.tINTEGER;
1609                case 'b' :
1610                case 'B' : // binary
1611
c = src.read();
1612                    if (c == '0' || c == '1') {
1613                        for (;; c = src.read()) {
1614                            if (c == '_') {
1615                                if (nondigit != '\0') {
1616                                    break;
1617                                }
1618                                nondigit = c;
1619                            } else if (c == '0' || c == '1') {
1620                                nondigit = '\0';
1621                                tokenBuffer.append(c);
1622                            } else {
1623                                break;
1624                            }
1625                        }
1626                    }
1627                    src.unread(c);
1628
1629                    if (tokenBuffer.length() == startLen) {
1630                        throw new SyntaxException(getPosition(), "Binary number without digits.");
1631                    } else if (nondigit != '\0') {
1632                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
1633                    }
1634                    yaccValue = getInteger(tokenBuffer.toString(), 2);
1635                    return Tokens.tINTEGER;
1636                case 'd' :
1637                case 'D' : // decimal
1638
c = src.read();
1639                    if (Character.isDigit(c)) {
1640                        for (;; c = src.read()) {
1641                            if (c == '_') {
1642                                if (nondigit != '\0') {
1643                                    break;
1644                                }
1645                                nondigit = c;
1646                            } else if (Character.isDigit(c)) {
1647                                nondigit = '\0';
1648                                tokenBuffer.append(c);
1649                            } else {
1650                                break;
1651                            }
1652                        }
1653                    }
1654                    src.unread(c);
1655
1656                    if (tokenBuffer.length() == startLen) {
1657                        throw new SyntaxException(getPosition(), "Binary number without digits.");
1658                    } else if (nondigit != '\0') {
1659                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
1660                    }
1661                    yaccValue = getInteger(tokenBuffer.toString(), 2);
1662                    return Tokens.tINTEGER;
1663                case '0' : case '1' : case '2' : case '3' : case '4' : //Octal
1664
case '5' : case '6' : case '7' : case '_' :
1665                    for (;; c = src.read()) {
1666                        if (c == '_') {
1667                            if (nondigit != '\0') {
1668                                break;
1669                            }
1670                            nondigit = c;
1671                        } else if (c >= '0' && c <= '7') {
1672                            nondigit = '\0';
1673                            tokenBuffer.append(c);
1674                        } else {
1675                            break;
1676                        }
1677                    }
1678                    if (tokenBuffer.length() > startLen) {
1679                        src.unread(c);
1680
1681                        if (nondigit != '\0') {
1682                            throw new SyntaxException(getPosition(), "Trailing '_' in number.");
1683                        }
1684
1685                        yaccValue = getInteger(tokenBuffer.toString(), 8);
1686                        return Tokens.tINTEGER;
1687                    }
1688                case '8' :
1689                case '9' :
1690                    throw new SyntaxException(getPosition(), "Illegal octal digit.");
1691                case '.' :
1692                case 'e' :
1693                case 'E' :
1694                    tokenBuffer.append('0');
1695                    break;
1696                default :
1697                    src.unread(c);
1698                    yaccValue = new FixnumNode(getPosition(), 0);
1699                    return Tokens.tINTEGER;
1700            }
1701        }
1702
1703        boolean seen_point = false;
1704        boolean seen_e = false;
1705
1706        for (;; c = src.read()) {
1707            switch (c) {
1708                case '0' :
1709                case '1' :
1710                case '2' :
1711                case '3' :
1712                case '4' :
1713                case '5' :
1714                case '6' :
1715                case '7' :
1716                case '8' :
1717                case '9' :
1718                    nondigit = '\0';
1719                    tokenBuffer.append(c);
1720                    break;
1721                case '.' :
1722                    if (nondigit != '\0') {
1723                        src.unread(c);
1724                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
1725                    } else if (seen_point || seen_e) {
1726                        src.unread(c);
1727                        return getNumberToken(tokenBuffer.toString(), true, nondigit);
1728                    } else {
1729                        char c2;
1730                        if (!Character.isDigit(c2 = src.read())) {
1731                            src.unread(c2);
1732                            src.unread('.');
1733                            if (c == '_') {
1734                                    // Enebo: c can never be antrhign but '.'
1735
// Why did I put this here?
1736
} else {
1737                                yaccValue = getInteger(tokenBuffer.toString(), 10);
1738                                return Tokens.tINTEGER;
1739                            }
1740                        } else {
1741                            tokenBuffer.append('.');
1742                            tokenBuffer.append(c2);
1743                            seen_point = true;
1744                            nondigit = '\0';
1745                        }
1746                    }
1747                    break;
1748                case 'e' :
1749                case 'E' :
1750                    if (nondigit != '\0') {
1751                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
1752                    } else if (seen_e) {
1753                        src.unread(c);
1754                        return getNumberToken(tokenBuffer.toString(), true, nondigit);
1755                    } else {
1756                        tokenBuffer.append(c);
1757                        seen_e = true;
1758                        nondigit = c;
1759                        c = src.read();
1760                        if (c == '-' || c == '+') {
1761                            tokenBuffer.append(c);
1762                            nondigit = c;
1763                        } else {
1764                            src.unread(c);
1765                        }
1766                    }
1767                    break;
1768                case '_' : // '_' in number just ignored
1769
if (nondigit != '\0') {
1770                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
1771                    }
1772                    nondigit = c;
1773                    break;
1774                default :
1775                    src.unread(c);
1776                return getNumberToken(tokenBuffer.toString(), seen_e || seen_point, nondigit);
1777            }
1778        }
1779    }
1780
1781    private int getNumberToken(String JavaDoc number, boolean isFloat, char nondigit) {
1782        if (nondigit != '\0') {
1783            throw new SyntaxException(getPosition(), "Trailing '_' in number.");
1784        }
1785        if (isFloat) {
1786            double d;
1787            try {
1788                d = Double.parseDouble(number);
1789            } catch (NumberFormatException JavaDoc e) {
1790                warnings.warn(getPosition(), "Float " + number + " out of range.");
1791                
1792                d = number.startsWith("-") ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
1793            }
1794            yaccValue = new FloatNode(getPosition(), d);
1795            return Tokens.tFLOAT;
1796        }
1797        yaccValue = getInteger(number, 10);
1798        return Tokens.tINTEGER;
1799    }
1800}
1801
Popular Tags