KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jruby > lexer > yacc > LexerSource


1 /***** BEGIN LICENSE BLOCK *****
2  * Version: CPL 1.0/GPL 2.0/LGPL 2.1
3  *
4  * The contents of this file are subject to the Common Public
5  * License Version 1.0 (the "License"); you may not use this file
6  * except in compliance with the License. You may obtain a copy of
7  * the License at http://www.eclipse.org/legal/cpl-v10.html
8  *
9  * Software distributed under the License is distributed on an "AS
10  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11  * implied. See the License for the specific language governing
12  * rights and limitations under the License.
13  *
14  * Copyright (C) 2004-2006 Thomas E Enebo <enebo@acm.org>
15  * Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
16  * Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
17  * Copyright (C) 2005 Zach Dennis <zdennis@mktec.com>
18  *
19  * Alternatively, the contents of this file may be used under the terms of
20  * either of the GNU General Public License Version 2 or later (the "GPL"),
21  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
22  * in which case the provisions of the GPL or the LGPL are applicable instead
23  * of those above. If you wish to allow use of your version of this file only
24  * under the terms of either the GPL or the LGPL, and not to allow others to
25  * use your version of this file under the terms of the CPL, indicate your
26  * decision by deleting the provisions above and replace them with the notice
27  * and other provisions required by the GPL or the LGPL. If you do not delete
28  * the provisions above, a recipient may use your version of this file under
29  * the terms of any one of the CPL, the GPL or the LGPL.
30  ***** END LICENSE BLOCK *****/

31 package org.jruby.lexer.yacc;
32
33 import java.io.IOException JavaDoc;
34 import java.io.Reader JavaDoc;
35
36 import org.jruby.util.ByteList;
37
38 /**
39  * This class is what feeds the lexer. It is primarily a wrapper around a
40  * Reader that can unread() data back onto the source. Originally, I thought
41  * about using the PushBackReader to handle read/unread, but I realized that
42  * some extremely pathological case could overflow the pushback buffer. Better
43  * safe than sorry. I could have combined this implementation with a
44  * PushbackBuffer, but the added complexity did not seem worth it.
45  *
46  * @author enebo
47  */

48 public class LexerSource {
49     private static final int INITIAL_PUSHBACK_SIZE = 100;
50     private static final int INITIAL_LINEWIDTH_SIZE = 2048;
51     
52     // Where we get new positions from.
53
private ISourcePositionFactory positionFactory;
54     
55     // Where we get our newest char's
56
private final Reader JavaDoc reader;
57     
58     // Our readback/pushback buffer.
59
private char buf[] = new char[INITIAL_PUSHBACK_SIZE];
60     
61     // index of last character in pushback buffer
62
private int bufLength = -1;
63     
64     // How long is every line we have run across. This makes it possible for us to unread()
65
// past a read() line and still know what column we are at.
66
private int lineWidths[] = new int[INITIAL_LINEWIDTH_SIZE];
67
68     // index of last line width in line widths list
69
private int lineWidthsLength = -1;
70     
71     // The name of this source (e.g. a filename: foo.rb)
72
private final String JavaDoc sourceName;
73     
74     // Number of newlines read from the reader
75
private int line = 0;
76     
77     // Column of source.
78
private int column = 0;
79     
80     // How many bytes into the source are we?
81
private int offset = 0;
82
83     // Flag to let us now in next read after a newline that we should reset column
84
private boolean nextCharIsOnANewLine = true;
85     
86     /**
87      * Create our food-source for the lexer
88      *
89      * @param sourceName is the file we are reading
90      * @param reader is what represents the contents of file sourceName
91      */

92     public LexerSource(String JavaDoc sourceName, Reader JavaDoc reader) {
93         this.sourceName = sourceName;
94         this.reader = reader;
95         this.positionFactory = new SourcePositionFactory(this);
96     }
97     
98     public LexerSource(String JavaDoc sourceName, Reader JavaDoc reader, ISourcePositionFactory factory) {
99         this.sourceName = sourceName;
100         this.reader = reader;
101         this.positionFactory = factory;
102     }
103
104     /**
105      * Read next character from this source
106      *
107      * @return next character to viewed by the source
108      */

109     public char read() throws IOException JavaDoc {
110         int length = bufLength;
111         char c;
112         
113         if (length >= 0) {
114             c = buf[bufLength--];
115         } else {
116             c = wrappedRead();
117             
118             // EOF...Do not advance column...Go straight to jail
119
if (c == 0) {
120                 //offset++;
121
return c;
122             }
123         }
124
125         // Reset column back to zero on first read of a line (note it will be-
126
// come '1' by the time it leaves read().
127
if (nextCharIsOnANewLine) {
128             nextCharIsOnANewLine = false;
129             column = 0;
130         }
131         
132         offset++;
133         column++;
134         if (c == '\n') {
135             line++;
136             // Since we are not reading off of unread buffer we must at the
137
// end of a new line for the first time. Add it.
138
if (length < 0) {
139                 lineWidths[++lineWidthsLength] = column;
140                 // If we outgrow our lineLength list then grow it
141
if (lineWidthsLength + 1 == lineWidths.length) {
142                     int[] newLineWidths = new int[lineWidths.length + INITIAL_LINEWIDTH_SIZE];
143                         
144                     System.arraycopy(lineWidths, 0, newLineWidths, 0, lineWidths.length);
145                         
146                     lineWidths = newLineWidths;
147                 }
148             }
149             
150             nextCharIsOnANewLine = true;
151         }
152             
153         return c;
154     }
155
156     /**
157      * Pushes char back onto this source. Note, this also
158      * allows us to push whatever is passes back into the source.
159      *
160      * @param c to be put back onto the source
161      */

162     public void unread(char c) {
163         if (c != (char) 0) {
164             offset--;
165         
166             if (c == '\n') {
167                 line--;
168                 column = lineWidths[line];
169                 nextCharIsOnANewLine = true;
170             } else {
171                 column--;
172             }
173
174             buf[++bufLength] = c;
175             // If we outgrow our pushback stack then grow it (this should only happen in
176
// pretty pathological cases).
177
if (bufLength + 1 == buf.length) {
178                 char[] newBuf = new char[buf.length + INITIAL_PUSHBACK_SIZE];
179                 
180                 System.arraycopy(buf, 0, newBuf, 0, buf.length);
181                 
182                 buf = newBuf;
183             }
184         }
185     }
186     
187     public boolean peek(char to) throws IOException JavaDoc {
188         char c = read();
189         unread(c);
190         return c == to;
191     }
192     
193     /**
194      * What file are we lexing?
195      * @return the files name
196      */

197     public String JavaDoc getFilename() {
198         return sourceName;
199     }
200     
201     /**
202      * What line are we at?
203      * @return the line number 0...line_size-1
204      */

205     public int getLine() {
206         return line;
207     }
208     
209     /**
210      * Are we at beggining of line?
211      *
212      * @return the column (0..x)
213      */

214     public int getColumn() {
215         return column;
216     }
217     
218     /**
219      * The location of the last byte we read from the source.
220      *
221      * @return current location of source
222      */

223     public int getOffset() {
224         return (offset <= 0 ? 0 : offset);
225     }
226
227     /**
228      * Where is the reader within the source {filename,row}
229      *
230      * @return the current position
231      */

232     public ISourcePosition getPosition(ISourcePosition startPosition, boolean inclusive) {
233         return positionFactory.getPosition(startPosition, inclusive);
234     }
235     
236     /**
237      * Where is the reader within the source {filename,row}
238      *
239      * @return the current position
240      */

241     public ISourcePosition getPosition() {
242         return positionFactory.getPosition(null, false);
243     }
244     
245     public ISourcePositionFactory getPositionFactory() {
246         return positionFactory;
247     }
248
249     /**
250      * Convenience method to hide exception. If we do hit an exception
251      * we will pretend we EOF'd.
252      *
253      * @return the current char or EOF (at EOF or on error)
254      */

255     private char wrappedRead() throws IOException JavaDoc {
256             int c = reader.read();
257             
258             // If \r\n then just pass along \n (windows)
259
// If \r[^\n] then pass along \n (MAC)
260
if (c == '\r') {
261                 if ((c = reader.read()) != '\n') {
262                     unread((char)c);
263                     c = '\n';
264                 } else {
265                     // Position within source must reflect the actual offset and column. Since
266
// we ate an extra character here (this accounting is normally done in read
267
// ), we should update position info.
268
offset++;
269                     column++;
270                 }
271             }
272             
273             return c != -1 ? (char) c : '\0';
274
275     }
276     
277     /**
278      * Create a source.
279      *
280      * @param name the name of the source (e.g a filename: foo.rb)
281      * @param content the data of the source
282      * @return the new source
283      */

284     public static LexerSource getSource(String JavaDoc name, Reader JavaDoc content) {
285         return new LexerSource(name, content);
286     }
287
288     public String JavaDoc readLine() throws IOException JavaDoc {
289         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(80);
290         for (char c = read(); c != '\n' && c != '\0'; c = read()) {
291             sb.append(c);
292         }
293         return sb.toString();
294     }
295     
296     public ByteList readLineBytes() throws IOException JavaDoc {
297         ByteList bytelist = new ByteList(80);
298
299         for (char c = read(); c != '\n' && c != '\0'; c = read()) {
300             bytelist.append(c);
301         }
302         return bytelist;
303     }
304
305     public void unreadMany(CharSequence JavaDoc buffer) {
306         int length = buffer.length();
307         for (int i = length - 1; i >= 0; i--) {
308             unread(buffer.charAt(i));
309         }
310     }
311
312     public boolean matchString(String JavaDoc match, boolean indent) throws IOException JavaDoc {
313         int length = match.length();
314         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc(length + 20);
315         
316         if (indent) {
317             char c;
318             while ((c = read()) != '\0') {
319                 if (!Character.isWhitespace(c)) {
320                     unread(c);
321                     break;
322                 }
323                 buffer.append(c);
324             }
325         }
326         
327         for (int i = 0; i < length; i++) {
328             char c = read();
329             buffer.append(c);
330             if (match.charAt(i) != c) {
331                 unreadMany(buffer);
332                 return false;
333             }
334         }
335         return true;
336     }
337
338     public boolean wasBeginOfLine() {
339         return getColumn() == 1;
340     }
341
342     public char readEscape() throws IOException JavaDoc {
343         char c = read();
344
345         switch (c) {
346             case '\\' : // backslash
347
return c;
348             case 'n' : // newline
349
return '\n';
350             case 't' : // horizontal tab
351
return '\t';
352             case 'r' : // carriage return
353
return '\r';
354             case 'f' : // form feed
355
return '\f';
356             case 'v' : // vertical tab
357
return '\u0013';
358             case 'a' : // alarm(bell)
359
return '\u0007';
360             case 'e' : // escape
361
return '\u0033';
362             case '0' : case '1' : case '2' : case '3' : // octal constant
363
case '4' : case '5' : case '6' : case '7' :
364                 unread(c);
365                 return scanOct(3);
366             case 'x' : // hex constant
367
int hexOffset = getColumn();
368                 char hexValue = scanHex(2);
369                 
370                 // No hex value after the 'x'.
371
if (hexOffset == getColumn()) {
372                     throw new SyntaxException(getPosition(), "Invalid escape character syntax");
373                 }
374                 return hexValue;
375             case 'b' : // backspace
376
return '\010';
377             case 's' : // space
378
return ' ';
379             case 'M' :
380                 if ((c = read()) != '-') {
381                     throw new SyntaxException(getPosition(), "Invalid escape character syntax");
382                 } else if ((c = read()) == '\\') {
383                     return (char) (readEscape() | 0x80);
384                 } else if (c == '\0') {
385                     throw new SyntaxException(getPosition(), "Invalid escape character syntax");
386                 }
387                 return (char) ((c & 0xff) | 0x80);
388             case 'C' :
389                 if ((c = read()) != '-') {
390                     throw new SyntaxException(getPosition(), "Invalid escape character syntax");
391                 }
392             case 'c' :
393                 if ((c = read()) == '\\') {
394                     c = readEscape();
395                 } else if (c == '?') {
396                     return '\u0177';
397                 } else if (c == '\0') {
398                     throw new SyntaxException(getPosition(), "Invalid escape character syntax");
399                 }
400                 return (char) (c & 0x9f);
401             case '\0' :
402                 throw new SyntaxException(getPosition(), "Invalid escape character syntax");
403             default :
404                 return c;
405         }
406     }
407
408     private char scanHex(int count) throws IOException JavaDoc {
409         char value = '\0';
410
411         for (int i = 0; i < count; i++) {
412             char c = read();
413
414             if (!RubyYaccLexer.isHexChar(c)) {
415                 unread(c);
416                 break;
417             }
418
419             value <<= 4;
420             value |= Integer.parseInt(""+c, 16) & 15;
421         }
422
423         return value;
424     }
425
426     private char scanOct(int count) throws IOException JavaDoc {
427         char value = '\0';
428
429         for (int i = 0; i < count; i++) {
430             char c = read();
431
432             if (!RubyYaccLexer.isOctChar(c)) {
433                 unread(c);
434                 break;
435             }
436
437             value <<= 3;
438             value |= Integer.parseInt(""+c, 8);
439         }
440
441         return value;
442     }
443
444     /**
445      * Get character ahead of current position by offset positions.
446      *
447      * @param anOffset is location past current position to get char at
448      * @return character index positions ahead of source location or EOF
449      */

450     public char getCharAt(int anOffset) throws IOException JavaDoc {
451         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc(anOffset);
452     
453         // read next offset chars
454
for (int i = 0; i < anOffset; i++) {
455             buffer.append(read());
456         }
457         
458         int length = buffer.length();
459         
460         // Whoops not enough chars left EOF!
461
if (length == 0){
462             return '\0';
463         }
464         
465         // Push chars back now that we found it
466
for (int i = 0; i < length; i++) {
467             unread(buffer.charAt(i));
468         }
469         
470         return buffer.charAt(length - 1);
471     }
472
473     public String JavaDoc toString() {
474         try {
475             StringBuffer JavaDoc buffer = new StringBuffer JavaDoc(20);
476             for (int i = 0; i < 20; i++) {
477                 buffer.append(read());
478             }
479             for (int i = 0; i < 20; i++) {
480                 unread(buffer.charAt(buffer.length() - i - 1));
481             }
482             buffer.append(" ...");
483             return buffer.toString();
484         } catch(Exception JavaDoc e) {
485             return null;
486         }
487     }
488 }
489
Popular Tags