StringTerm


1   /***** BEGIN LICENSE BLOCK *****
2    * Version: CPL 1.0/GPL 2.0/LGPL 2.1
3    *
4    * The contents of this file are subject to the Common Public
5    * License Version 1.0 (the "License"); you may not use this file
6    * except in compliance with the License. You may obtain a copy of
7    * the License at http://www.eclipse.org/legal/cpl-v10.html
8    *
9    * Software distributed under the License is distributed on an "AS
10   * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11   * implied. See the License for the specific language governing
12   * rights and limitations under the License.
13   *
14   * Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
15   * 
16   * Alternatively, the contents of this file may be used under the terms of
17   * either of the GNU General Public License Version 2 or later (the "GPL"),
18   * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
19   * in which case the provisions of the GPL or the LGPL are applicable instead
20   * of those above. If you wish to allow use of your version of this file only
21   * under the terms of either the GPL or the LGPL, and not to allow others to
22   * use your version of this file under the terms of the CPL, indicate your
23   * decision by deleting the provisions above and replace them with the notice
24   * and other provisions required by the GPL or the LGPL. If you do not delete
25   * the provisions above, a recipient may use your version of this file under
26   * the terms of any one of the CPL, the GPL or the LGPL.
27   ***** END LICENSE BLOCK *****/
28  package org.jruby.lexer.yacc;
29  
30  import org.jruby.ast.RegexpNode;
31  import org.jruby.ast.StrNode;
32  import org.jruby.parser.ReOptions;
33  import org.jruby.parser.Tokens;
34  import org.jruby.util.ByteList;
35  
36  public class StringTerm extends StrTerm {
37      /* bit flags to indicate the string type */
38      private int func;
39  
40      private final char term;
41  
42      private final char paren;
43  
44      /* nested string level */
45      private int nest;
46  
47      public StringTerm(int func, char term, char paren) {
48          this.func = func;
49          this.term = term;
50          this.paren = paren;
51          this.nest = 0;
52      }
53  
54      public int parseString(final RubyYaccLexer lexer, LexerSource src) throws java.io.IOException   {
55          char c;
56          int space = 0;
57  
58          if (func == -1) {
59              lexer.setValue(new Token("\"", lexer.getPosition()));
60              return Tokens.tSTRING_END;
61          }
62  
63          c = src.read();
64          if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
65                  && Character.isWhitespace(c)) {
66              do {
67                  c = src.read();
68              } while (Character.isWhitespace(c));
69              space = 1;
70          }
71  
72          if (c == term && nest == 0) {
73              if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
74                  func = -1;
75                  lexer.getPosition();
76                  return ' ';
77              }
78              if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
79                  lexer.setValue(new RegexpNode(src.getPosition(), ByteList.create(""), parseRegexpFlags(src)));
80                  return Tokens.tREGEXP_END;
81              }
82              lexer.setValue(new Token("\"", lexer.getPosition()));
83              return Tokens.tSTRING_END;
84          }
85          if (space != 0) {
86              src.unread(c);
87              lexer.getPosition();
88              return ' ';
89          }
90          ByteList buffer = new ByteList();
91  
92          if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') {
93              c = src.read();
94              switch (c) {
95              case '$':
96              case '@':
97                  src.unread(c);
98                  lexer.setValue(new Token("#" + c, lexer.getPosition()));
99                  return Tokens.tSTRING_DVAR;
100             case '{':
101                 lexer.setValue(new Token("#" + c, lexer.getPosition())); 
102                 return Tokens.tSTRING_DBEG;
103             }
104             buffer.append('#');
105         }
106         src.unread(c);
107         if (parseStringIntoBuffer(src, buffer) == 0) {
108             throw new SyntaxException(src.getPosition(), "unterminated string meets end of file");
109         }
110 
111         lexer.setValue(new StrNode(lexer.getPosition(), buffer)); 
112         return Tokens.tSTRING_CONTENT;
113     }
114 
115     private int parseRegexpFlags(final LexerSource src) throws java.io.IOException   {
116         char kcode = 0;
117         int options = 0;
118         char c;
119         StringBuffer   unknownFlags = new StringBuffer  (10);
120 
121         for (c = src.read(); c != RubyYaccLexer.EOF
122                 && Character.isLetter(c); c = src.read()) {
123             switch (c) {
124             case 'i':
125                 options |= ReOptions.RE_OPTION_IGNORECASE;
126                 break;
127             case 'x':
128                 options |= ReOptions.RE_OPTION_EXTENDED;
129                 break;
130             case 'm':
131                 options |= ReOptions.RE_OPTION_MULTILINE;
132                 break;
133             case 'o':
134                 options |= ReOptions.RE_OPTION_ONCE;
135                 break;
136             case 'n':
137                 kcode = 16;
138                 break;
139             case 'e':
140                 kcode = 32;
141                 break;
142             case 's':
143                 kcode = 48;
144                 break;
145             case 'u':
146                 kcode = 64;
147                 break;
148             default:
149                 unknownFlags.append(c);
150                 break;
151             }
152         }
153         src.unread(c);
154         if (unknownFlags.length() != 0) {
155             throw new SyntaxException(src.getPosition(), "unknown regexp option"
156                     + (unknownFlags.length() > 1 ? "s" : "") + " - "
157                     + unknownFlags.toString());
158         }
159         return options | kcode;
160     }
161 
162     public char parseStringIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException   {
163         char c;
164 
165         while ((c = src.read()) != RubyYaccLexer.EOF) {
166             if (paren != '\0' && c == paren) {
167                 nest++;
168             } else if (c == term) {
169                 if (nest == 0) {
170                     src.unread(c);
171                     break;
172                 }
173                 nest--;
174             } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#' && !src.peek('\n')) {
175                 char c2 = src.read();
176 
177                 if (c2 == '$' || c2 == '@' || c2 == '{') {
178                     src.unread(c2);
179                     src.unread(c);
180                     break;
181                 }
182                 src.unread(c2);
183             } else if (c == '\\') {
184                 c = src.read();
185                 switch (c) {
186                 case '\n':
187                     if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
188                         break;
189                     }
190                     if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
191                         continue;
192                     }
193                     buffer.append('\\');
194                     break;
195 
196                 case '\\':
197                     if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
198                         buffer.append(c);
199                     }
200                     break;
201 
202                 default:
203                     if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
204                         src.unread(c);
205                         parseEscapeIntoBuffer(src, buffer);
206                         continue;
207                     } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
208                         src.unread(c);
209                         if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
210                             buffer.append('\\');
211                         }
212                         c = src.readEscape();
213                     } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
214                             && Character.isWhitespace(c)) {
215                         /* ignore backslashed spaces in %w */
216                     } else if (c != term && !(paren != '\0' && c == paren)) {
217                         buffer.append('\\');
218                     }
219                 }
220             } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
221                     && Character.isWhitespace(c)) {
222                 src.unread(c);
223                 break;
224             }
225             if (c == '\0' && (func & RubyYaccLexer.STR_FUNC_SYMBOL) != 0) {
226                 throw new SyntaxException(src.getPosition(), "symbol cannot contain '\\0'");
227             }
228             buffer.append(c);
229         }
230         return c;
231     }
232 
233     // Was a goto in original ruby lexer
234     private void escaped(LexerSource src, ByteList buffer) throws java.io.IOException   {
235         char c;
236 
237         switch (c = src.read()) {
238         case '\\':
239             parseEscapeIntoBuffer(src, buffer);
240             break;
241         case RubyYaccLexer.EOF:
242             throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
243         default:
244             buffer.append(c);
245         }
246     }
247 
248     private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException   {
249         char c;
250 
251         switch (c = src.read()) {
252         case '\n':
253             break; /* just ignore */
254         case '0':
255         case '1':
256         case '2':
257         case '3': /* octal constant */
258         case '4':
259         case '5':
260         case '6':
261         case '7':
262             buffer.append('\\');
263             buffer.append(c);
264             for (int i = 0; i < 2; i++) {
265                 c = src.read();
266                 if (c == RubyYaccLexer.EOF) {
267                     throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
268                 }
269                 if (!RubyYaccLexer.isOctChar(c)) {
270                     src.unread(c);
271                     break;
272                 }
273                 buffer.append(c);
274             }
275             break;
276         case 'x': /* hex constant */
277             buffer.append('\\');
278             buffer.append(c);
279             c = src.read();
280             if (!RubyYaccLexer.isHexChar(c)) {
281                 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
282             }
283             buffer.append(c);
284             c = src.read();
285             if (RubyYaccLexer.isHexChar(c)) {
286                 buffer.append(c);
287             } else {
288                 src.unread(c);
289             }
290             break;
291         case 'M':
292             if ((c = src.read()) != '-') {
293                 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
294             }
295             buffer.append(new byte[] { '\\', 'M', '-' });
296             escaped(src, buffer);
297             break;
298         case 'C':
299             if ((c = src.read()) != '-') {
300                 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
301             }
302             buffer.append(new byte[] { '\\', 'C', '-' });
303             escaped(src, buffer);
304             break;
305         case 'c':
306             buffer.append(new byte[] { '\\', 'c' });
307             escaped(src, buffer);
308             break;
309         case 0:
310             throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
311         default:
312             if (c != '\\' || c != term) {
313                 buffer.append('\\');
314             }
315             buffer.append(c);
316         }
317     }
318 }
319
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags