KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jruby > lexer > yacc > StringTerm


1 /***** BEGIN LICENSE BLOCK *****
2  * Version: CPL 1.0/GPL 2.0/LGPL 2.1
3  *
4  * The contents of this file are subject to the Common Public
5  * License Version 1.0 (the "License"); you may not use this file
6  * except in compliance with the License. You may obtain a copy of
7  * the License at http://www.eclipse.org/legal/cpl-v10.html
8  *
9  * Software distributed under the License is distributed on an "AS
10  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11  * implied. See the License for the specific language governing
12  * rights and limitations under the License.
13  *
14  * Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
15  *
16  * Alternatively, the contents of this file may be used under the terms of
17  * either of the GNU General Public License Version 2 or later (the "GPL"),
18  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
19  * in which case the provisions of the GPL or the LGPL are applicable instead
20  * of those above. If you wish to allow use of your version of this file only
21  * under the terms of either the GPL or the LGPL, and not to allow others to
22  * use your version of this file under the terms of the CPL, indicate your
23  * decision by deleting the provisions above and replace them with the notice
24  * and other provisions required by the GPL or the LGPL. If you do not delete
25  * the provisions above, a recipient may use your version of this file under
26  * the terms of any one of the CPL, the GPL or the LGPL.
27  ***** END LICENSE BLOCK *****/

28 package org.jruby.lexer.yacc;
29
30 import org.jruby.ast.RegexpNode;
31 import org.jruby.ast.StrNode;
32 import org.jruby.parser.ReOptions;
33 import org.jruby.parser.Tokens;
34 import org.jruby.util.ByteList;
35
36 public class StringTerm extends StrTerm {
37     /* bit flags to indicate the string type */
38     private int func;
39
40     private final char term;
41
42     private final char paren;
43
44     /* nested string level */
45     private int nest;
46
47     public StringTerm(int func, char term, char paren) {
48         this.func = func;
49         this.term = term;
50         this.paren = paren;
51         this.nest = 0;
52     }
53
54     public int parseString(final RubyYaccLexer lexer, LexerSource src) throws java.io.IOException JavaDoc {
55         char c;
56         int space = 0;
57
58         if (func == -1) {
59             lexer.setValue(new Token("\"", lexer.getPosition()));
60             return Tokens.tSTRING_END;
61         }
62
63         c = src.read();
64         if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
65                 && Character.isWhitespace(c)) {
66             do {
67                 c = src.read();
68             } while (Character.isWhitespace(c));
69             space = 1;
70         }
71
72         if (c == term && nest == 0) {
73             if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
74                 func = -1;
75                 lexer.getPosition();
76                 return ' ';
77             }
78             if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
79                 lexer.setValue(new RegexpNode(src.getPosition(), ByteList.create(""), parseRegexpFlags(src)));
80                 return Tokens.tREGEXP_END;
81             }
82             lexer.setValue(new Token("\"", lexer.getPosition()));
83             return Tokens.tSTRING_END;
84         }
85         if (space != 0) {
86             src.unread(c);
87             lexer.getPosition();
88             return ' ';
89         }
90         ByteList buffer = new ByteList();
91
92         if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') {
93             c = src.read();
94             switch (c) {
95             case '$':
96             case '@':
97                 src.unread(c);
98                 lexer.setValue(new Token("#" + c, lexer.getPosition()));
99                 return Tokens.tSTRING_DVAR;
100             case '{':
101                 lexer.setValue(new Token("#" + c, lexer.getPosition()));
102                 return Tokens.tSTRING_DBEG;
103             }
104             buffer.append('#');
105         }
106         src.unread(c);
107         if (parseStringIntoBuffer(src, buffer) == 0) {
108             throw new SyntaxException(src.getPosition(), "unterminated string meets end of file");
109         }
110
111         lexer.setValue(new StrNode(lexer.getPosition(), buffer));
112         return Tokens.tSTRING_CONTENT;
113     }
114
115     private int parseRegexpFlags(final LexerSource src) throws java.io.IOException JavaDoc {
116         char kcode = 0;
117         int options = 0;
118         char c;
119         StringBuffer JavaDoc unknownFlags = new StringBuffer JavaDoc(10);
120
121         for (c = src.read(); c != RubyYaccLexer.EOF
122                 && Character.isLetter(c); c = src.read()) {
123             switch (c) {
124             case 'i':
125                 options |= ReOptions.RE_OPTION_IGNORECASE;
126                 break;
127             case 'x':
128                 options |= ReOptions.RE_OPTION_EXTENDED;
129                 break;
130             case 'm':
131                 options |= ReOptions.RE_OPTION_MULTILINE;
132                 break;
133             case 'o':
134                 options |= ReOptions.RE_OPTION_ONCE;
135                 break;
136             case 'n':
137                 kcode = 16;
138                 break;
139             case 'e':
140                 kcode = 32;
141                 break;
142             case 's':
143                 kcode = 48;
144                 break;
145             case 'u':
146                 kcode = 64;
147                 break;
148             default:
149                 unknownFlags.append(c);
150                 break;
151             }
152         }
153         src.unread(c);
154         if (unknownFlags.length() != 0) {
155             throw new SyntaxException(src.getPosition(), "unknown regexp option"
156                     + (unknownFlags.length() > 1 ? "s" : "") + " - "
157                     + unknownFlags.toString());
158         }
159         return options | kcode;
160     }
161
162     public char parseStringIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException JavaDoc {
163         char c;
164
165         while ((c = src.read()) != RubyYaccLexer.EOF) {
166             if (paren != '\0' && c == paren) {
167                 nest++;
168             } else if (c == term) {
169                 if (nest == 0) {
170                     src.unread(c);
171                     break;
172                 }
173                 nest--;
174             } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#' && !src.peek('\n')) {
175                 char c2 = src.read();
176
177                 if (c2 == '$' || c2 == '@' || c2 == '{') {
178                     src.unread(c2);
179                     src.unread(c);
180                     break;
181                 }
182                 src.unread(c2);
183             } else if (c == '\\') {
184                 c = src.read();
185                 switch (c) {
186                 case '\n':
187                     if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
188                         break;
189                     }
190                     if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
191                         continue;
192                     }
193                     buffer.append('\\');
194                     break;
195
196                 case '\\':
197                     if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
198                         buffer.append(c);
199                     }
200                     break;
201
202                 default:
203                     if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
204                         src.unread(c);
205                         parseEscapeIntoBuffer(src, buffer);
206                         continue;
207                     } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
208                         src.unread(c);
209                         if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
210                             buffer.append('\\');
211                         }
212                         c = src.readEscape();
213                     } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
214                             && Character.isWhitespace(c)) {
215                         /* ignore backslashed spaces in %w */
216                     } else if (c != term && !(paren != '\0' && c == paren)) {
217                         buffer.append('\\');
218                     }
219                 }
220             } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
221                     && Character.isWhitespace(c)) {
222                 src.unread(c);
223                 break;
224             }
225             if (c == '\0' && (func & RubyYaccLexer.STR_FUNC_SYMBOL) != 0) {
226                 throw new SyntaxException(src.getPosition(), "symbol cannot contain '\\0'");
227             }
228             buffer.append(c);
229         }
230         return c;
231     }
232
233     // Was a goto in original ruby lexer
234
private void escaped(LexerSource src, ByteList buffer) throws java.io.IOException JavaDoc {
235         char c;
236
237         switch (c = src.read()) {
238         case '\\':
239             parseEscapeIntoBuffer(src, buffer);
240             break;
241         case RubyYaccLexer.EOF:
242             throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
243         default:
244             buffer.append(c);
245         }
246     }
247
248     private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException JavaDoc {
249         char c;
250
251         switch (c = src.read()) {
252         case '\n':
253             break; /* just ignore */
254         case '0':
255         case '1':
256         case '2':
257         case '3': /* octal constant */
258         case '4':
259         case '5':
260         case '6':
261         case '7':
262             buffer.append('\\');
263             buffer.append(c);
264             for (int i = 0; i < 2; i++) {
265                 c = src.read();
266                 if (c == RubyYaccLexer.EOF) {
267                     throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
268                 }
269                 if (!RubyYaccLexer.isOctChar(c)) {
270                     src.unread(c);
271                     break;
272                 }
273                 buffer.append(c);
274             }
275             break;
276         case 'x': /* hex constant */
277             buffer.append('\\');
278             buffer.append(c);
279             c = src.read();
280             if (!RubyYaccLexer.isHexChar(c)) {
281                 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
282             }
283             buffer.append(c);
284             c = src.read();
285             if (RubyYaccLexer.isHexChar(c)) {
286                 buffer.append(c);
287             } else {
288                 src.unread(c);
289             }
290             break;
291         case 'M':
292             if ((c = src.read()) != '-') {
293                 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
294             }
295             buffer.append(new byte[] { '\\', 'M', '-' });
296             escaped(src, buffer);
297             break;
298         case 'C':
299             if ((c = src.read()) != '-') {
300                 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
301             }
302             buffer.append(new byte[] { '\\', 'C', '-' });
303             escaped(src, buffer);
304             break;
305         case 'c':
306             buffer.append(new byte[] { '\\', 'c' });
307             escaped(src, buffer);
308             break;
309         case 0:
310             throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
311         default:
312             if (c != '\\' || c != term) {
313                 buffer.append('\\');
314             }
315             buffer.append(c);
316         }
317     }
318 }
319
Popular Tags