1 28 package org.jruby.lexer.yacc; 29 30 import org.jruby.ast.RegexpNode; 31 import org.jruby.ast.StrNode; 32 import org.jruby.parser.ReOptions; 33 import org.jruby.parser.Tokens; 34 import org.jruby.util.ByteList; 35 36 public class StringTerm extends StrTerm { 37 38 private int func; 39 40 private final char term; 41 42 private final char paren; 43 44 45 private int nest; 46 47 public StringTerm(int func, char term, char paren) { 48 this.func = func; 49 this.term = term; 50 this.paren = paren; 51 this.nest = 0; 52 } 53 54 public int parseString(final RubyYaccLexer lexer, LexerSource src) throws java.io.IOException { 55 char c; 56 int space = 0; 57 58 if (func == -1) { 59 lexer.setValue(new Token("\"", lexer.getPosition())); 60 return Tokens.tSTRING_END; 61 } 62 63 c = src.read(); 64 if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0 65 && Character.isWhitespace(c)) { 66 do { 67 c = src.read(); 68 } while (Character.isWhitespace(c)); 69 space = 1; 70 } 71 72 if (c == term && nest == 0) { 73 if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) { 74 func = -1; 75 lexer.getPosition(); 76 return ' '; 77 } 78 if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) { 79 lexer.setValue(new RegexpNode(src.getPosition(), ByteList.create(""), parseRegexpFlags(src))); 80 return Tokens.tREGEXP_END; 81 } 82 lexer.setValue(new Token("\"", lexer.getPosition())); 83 return Tokens.tSTRING_END; 84 } 85 if (space != 0) { 86 src.unread(c); 87 lexer.getPosition(); 88 return ' '; 89 } 90 ByteList buffer = new ByteList(); 91 92 if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') { 93 c = src.read(); 94 switch (c) { 95 case '$': 96 case '@': 97 src.unread(c); 98 lexer.setValue(new Token("#" + c, lexer.getPosition())); 99 return Tokens.tSTRING_DVAR; 100 case '{': 101 lexer.setValue(new Token("#" + c, lexer.getPosition())); 102 return Tokens.tSTRING_DBEG; 103 } 104 buffer.append('#'); 105 } 106 src.unread(c); 107 if (parseStringIntoBuffer(src, buffer) == 0) { 108 throw new SyntaxException(src.getPosition(), "unterminated string meets end of file"); 109 } 110 111 lexer.setValue(new StrNode(lexer.getPosition(), buffer)); 112 return Tokens.tSTRING_CONTENT; 113 } 114 115 private int parseRegexpFlags(final LexerSource src) throws java.io.IOException { 116 char kcode = 0; 117 int options = 0; 118 char c; 119 StringBuffer unknownFlags = new StringBuffer (10); 120 121 for (c = src.read(); c != RubyYaccLexer.EOF 122 && Character.isLetter(c); c = src.read()) { 123 switch (c) { 124 case 'i': 125 options |= ReOptions.RE_OPTION_IGNORECASE; 126 break; 127 case 'x': 128 options |= ReOptions.RE_OPTION_EXTENDED; 129 break; 130 case 'm': 131 options |= ReOptions.RE_OPTION_MULTILINE; 132 break; 133 case 'o': 134 options |= ReOptions.RE_OPTION_ONCE; 135 break; 136 case 'n': 137 kcode = 16; 138 break; 139 case 'e': 140 kcode = 32; 141 break; 142 case 's': 143 kcode = 48; 144 break; 145 case 'u': 146 kcode = 64; 147 break; 148 default: 149 unknownFlags.append(c); 150 break; 151 } 152 } 153 src.unread(c); 154 if (unknownFlags.length() != 0) { 155 throw new SyntaxException(src.getPosition(), "unknown regexp option" 156 + (unknownFlags.length() > 1 ? "s" : "") + " - " 157 + unknownFlags.toString()); 158 } 159 return options | kcode; 160 } 161 162 public char parseStringIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException { 163 char c; 164 165 while ((c = src.read()) != RubyYaccLexer.EOF) { 166 if (paren != '\0' && c == paren) { 167 nest++; 168 } else if (c == term) { 169 if (nest == 0) { 170 src.unread(c); 171 break; 172 } 173 nest--; 174 } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#' && !src.peek('\n')) { 175 char c2 = src.read(); 176 177 if (c2 == '$' || c2 == '@' || c2 == '{') { 178 src.unread(c2); 179 src.unread(c); 180 break; 181 } 182 src.unread(c2); 183 } else if (c == '\\') { 184 c = src.read(); 185 switch (c) { 186 case '\n': 187 if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) { 188 break; 189 } 190 if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) { 191 continue; 192 } 193 buffer.append('\\'); 194 break; 195 196 case '\\': 197 if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) { 198 buffer.append(c); 199 } 200 break; 201 202 default: 203 if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) { 204 src.unread(c); 205 parseEscapeIntoBuffer(src, buffer); 206 continue; 207 } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) { 208 src.unread(c); 209 if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) { 210 buffer.append('\\'); 211 } 212 c = src.readEscape(); 213 } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0 214 && Character.isWhitespace(c)) { 215 216 } else if (c != term && !(paren != '\0' && c == paren)) { 217 buffer.append('\\'); 218 } 219 } 220 } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0 221 && Character.isWhitespace(c)) { 222 src.unread(c); 223 break; 224 } 225 if (c == '\0' && (func & RubyYaccLexer.STR_FUNC_SYMBOL) != 0) { 226 throw new SyntaxException(src.getPosition(), "symbol cannot contain '\\0'"); 227 } 228 buffer.append(c); 229 } 230 return c; 231 } 232 233 private void escaped(LexerSource src, ByteList buffer) throws java.io.IOException { 235 char c; 236 237 switch (c = src.read()) { 238 case '\\': 239 parseEscapeIntoBuffer(src, buffer); 240 break; 241 case RubyYaccLexer.EOF: 242 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); 243 default: 244 buffer.append(c); 245 } 246 } 247 248 private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException { 249 char c; 250 251 switch (c = src.read()) { 252 case '\n': 253 break; 254 case '0': 255 case '1': 256 case '2': 257 case '3': 258 case '4': 259 case '5': 260 case '6': 261 case '7': 262 buffer.append('\\'); 263 buffer.append(c); 264 for (int i = 0; i < 2; i++) { 265 c = src.read(); 266 if (c == RubyYaccLexer.EOF) { 267 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); 268 } 269 if (!RubyYaccLexer.isOctChar(c)) { 270 src.unread(c); 271 break; 272 } 273 buffer.append(c); 274 } 275 break; 276 case 'x': 277 buffer.append('\\'); 278 buffer.append(c); 279 c = src.read(); 280 if (!RubyYaccLexer.isHexChar(c)) { 281 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); 282 } 283 buffer.append(c); 284 c = src.read(); 285 if (RubyYaccLexer.isHexChar(c)) { 286 buffer.append(c); 287 } else { 288 src.unread(c); 289 } 290 break; 291 case 'M': 292 if ((c = src.read()) != '-') { 293 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); 294 } 295 buffer.append(new byte[] { '\\', 'M', '-' }); 296 escaped(src, buffer); 297 break; 298 case 'C': 299 if ((c = src.read()) != '-') { 300 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); 301 } 302 buffer.append(new byte[] { '\\', 'C', '-' }); 303 escaped(src, buffer); 304 break; 305 case 'c': 306 buffer.append(new byte[] { '\\', 'c' }); 307 escaped(src, buffer); 308 break; 309 case 0: 310 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); 311 default: 312 if (c != '\\' || c != term) { 313 buffer.append('\\'); 314 } 315 buffer.append(c); 316 } 317 } 318 } 319 | Popular Tags |