1 19 package org.netbeans.modules.ruby.lexer; 20 21 import java.io.IOException ; 22 import java.io.Reader ; 23 24 import org.jruby.common.NullWarnings; 25 import org.jruby.lexer.yacc.LexState; 26 import org.jruby.lexer.yacc.LexerSource; 27 import org.jruby.lexer.yacc.LexerSource; 28 import org.jruby.lexer.yacc.RubyYaccLexer; 29 import org.jruby.lexer.yacc.RubyYaccLexer; 30 import org.jruby.lexer.yacc.StrTerm; 31 import org.jruby.lexer.yacc.SyntaxException; 32 import org.jruby.parser.Tokens; 33 import org.netbeans.api.gsf.GsfTokenId; 34 import org.netbeans.api.lexer.Token; 35 import org.netbeans.modules.ruby.lexer.RubyTokenId; 36 import org.netbeans.spi.lexer.Lexer; 37 import org.netbeans.spi.lexer.LexerInput; 38 import org.netbeans.spi.lexer.LexerRestartInfo; 39 import org.netbeans.spi.lexer.TokenFactory; 40 import org.openide.ErrorManager; 41 42 43 63 public final class RubyLexer implements Lexer<GsfTokenId> { 64 65 private static final boolean REUSE_LEXERS = false; 66 private static RubyLexer cached; 67 private RubyYaccLexer lexer; 68 private LexerSource lexerSource; 69 private boolean inRegexp; 70 private LexerInput input; 71 private TokenFactory<GsfTokenId> tokenFactory; 72 private boolean substituting; 73 private boolean inSymbol; 74 private boolean inEmbedded; 75 76 private RubyLexer(LexerRestartInfo<GsfTokenId> info) { 77 lexer = new RubyYaccLexer(); 78 lexer.setWarnings(new NullWarnings()); 80 lexer.setPreserveSpaces(true); 81 } 82 83 public static synchronized RubyLexer create(LexerRestartInfo<GsfTokenId> info) { 84 RubyLexer rubyLexer = cached; 85 86 if (rubyLexer == null) { 87 rubyLexer = new RubyLexer(info); 88 } 89 90 rubyLexer.restart(info); 91 92 return rubyLexer; 93 } 94 95 void restart(LexerRestartInfo<GsfTokenId> info) { 96 inRegexp = substituting = inSymbol = inEmbedded = false; 97 lexer.reset(); 98 99 input = info.input(); 100 tokenFactory = info.tokenFactory(); 101 102 String fileName = "unknown"; 103 Reader lexerReader = new LexerInputReader(input); 104 lexerSource = new LexerSource(fileName, lexerReader); 105 lexer.setSource(lexerSource); 106 107 Object state = info.state(); 108 109 if (state instanceof JRubyLexerRestartInfo) { 110 ((JRubyLexerRestartInfo)state).initializeState(this); 111 } else if (state instanceof Integer ) { 112 int stateValue = ((Integer )state).intValue(); 113 lexer.setState(LexState.fromOrdinal(stateValue)); 114 } 115 } 116 117 public void release() { 118 if (REUSE_LEXERS) { 119 synchronized (RubyLexer.class) { 121 cached = this; 122 } 123 } 124 } 125 126 public Object state() { 127 if (JRubyLexerRestartInfo.needsStateStorage(this)) { 128 return new JRubyLexerRestartInfo(this); 129 } 130 131 LexState state = lexer.getLexState(); 134 135 if (state == null) { 136 return null; 137 } 138 139 if (lexer.getStrTerm() != null) { 140 return new JRubyLexerRestartInfo(this); 141 } 142 143 return Integer.valueOf(state.getOrdinal()); 146 } 147 148 private Token<GsfTokenId> token(GsfTokenId id, int length) { 149 String fixedText = id.fixedText(); 150 151 return (fixedText != null) ? tokenFactory.getFlyweightToken(id, fixedText) 152 : tokenFactory.createToken(id, length); 153 } 154 155 public Token<GsfTokenId> nextToken() { 156 int token = 0; 157 int tokenLength = 0; 158 int oldOffset = lexerSource.getOffset(); 159 160 while (tokenLength == 0) { 161 try { 162 lexer.advance(); 163 token = lexer.token(); 164 165 StrTerm strTerm = lexer.getStrTerm(); 166 167 if (strTerm != null) { 168 strTerm.splitEmbeddedTokens(); 169 } 170 } catch (SyntaxException ex) { token = Tokens.yyErrorCode; tokenLength = lexerSource.getOffset() - oldOffset; 173 174 if (tokenLength == 0) { 175 if (input.readLength() > 0) { 176 return token(RubyTokenId.IDENTIFIER, input.readLength()); } else { 178 return null; 179 } 180 } 181 182 break; 183 } catch (Throwable ex) { ErrorManager.getDefault().notify(ex); 185 186 break; 187 } 188 189 if (token == 0) { 191 if (input.readLength() > 0) { 192 return token(RubyTokenId.IDENTIFIER, input.readLength()); } else { 194 return null; 195 } 196 } 197 198 int offset = lexerSource.getOffset(); 199 tokenLength = offset - oldOffset; 200 } 201 202 int readAhead = lexerSource.chompReadAhead(); 207 208 if (readAhead > 0) { 209 input.backup(readAhead); 210 } 211 212 GsfTokenId id = getTokenId(token, oldOffset); 214 215 if (inSymbol) { 216 if ("keyword".equals(id.primaryCategory())) { id = RubyTokenId.TYPE_SYMBOL; 220 } 221 } 222 223 inSymbol = (token == Tokens.tSYMBEG); 224 225 return token(id, tokenLength); 226 } 227 228 229 private GsfTokenId getTokenId(int token, int offset) { 230 switch (token) { 232 case Tokens.tCOMMENT: 233 return RubyTokenId.LINE_COMMENT; 234 235 case Tokens.tWHITESPACE: 236 return RubyTokenId.WHITESPACE; 237 238 case Tokens.tFLOAT: 239 return RubyTokenId.FLOAT_LITERAL; 240 241 case Tokens.tINTEGER: 242 return RubyTokenId.INT_LITERAL; 243 244 case Tokens.tSTRING_BEG: 245 case Tokens.tXSTRING_BEG: 246 247 if (lexer.getStrTerm() != null) { 248 substituting = lexer.getStrTerm().isSubstituting(); 249 } else { 250 substituting = false; 251 } 252 253 return substituting ? RubyTokenId.QUOTED_STRING_BEGIN : RubyTokenId.STRING_BEGIN; 254 255 case Tokens.tSTRING_DVAR: 256 case Tokens.tSTRING_DBEG: 257 inEmbedded = true; 258 259 return inRegexp ? RubyTokenId.REGEXP_LITERAL : RubyTokenId.STRING_LITERAL; 260 261 case Tokens.tSTRING_END: 262 return substituting ? RubyTokenId.QUOTED_STRING_END : RubyTokenId.STRING_END; 263 264 case Tokens.tSTRING_CONTENT: 266 if (inEmbedded) { 267 inEmbedded = false; 268 269 return RubyTokenId.EMBEDDED_RUBY; 270 } else if (inRegexp) { 271 return RubyTokenId.REGEXP_LITERAL; 272 } else if (substituting) { 273 return RubyTokenId.QUOTED_STRING_LITERAL; 274 } else { 275 return RubyTokenId.STRING_LITERAL; 276 } 277 278 case Tokens.tREGEXP_BEG: 279 inRegexp = true; 280 281 return RubyTokenId.REGEXP_BEGIN; 282 283 case Tokens.tREGEXP_END: 284 inRegexp = false; 285 286 return RubyTokenId.REGEXP_END; 287 288 case Tokens.tDOCUMENTATION: 289 return RubyTokenId.DOCUMENTATION; 290 291 case Tokens.yyErrorCode: 292 return RubyTokenId.ERROR; 293 294 case Tokens.tGVAR: return RubyTokenId.GLOBAL_VAR; 296 297 case Tokens.tIVAR: return RubyTokenId.INSTANCE_VAR; 299 300 case Tokens.tCVAR: return RubyTokenId.CLASS_VAR; 302 303 case Tokens.tCONSTANT: return inSymbol ? RubyTokenId.TYPE_SYMBOL : RubyTokenId.CONSTANT; 305 306 case Tokens.tIDENTIFIER: 307 return inSymbol ? RubyTokenId.TYPE_SYMBOL : RubyTokenId.IDENTIFIER; 308 309 case Tokens.tSYMBEG: 310 return RubyTokenId.TYPE_SYMBOL; 311 312 case Tokens.tLBRACK: 313 return RubyTokenId.LBRACKET; 314 315 case Tokens.tRBRACK: 316 return RubyTokenId.RBRACKET; 317 318 case Tokens.tLPAREN: 319 case Tokens.tLPAREN2: case Tokens.tLPAREN_ARG: return RubyTokenId.LPAREN; 322 323 case Tokens.tRPAREN: 324 return RubyTokenId.RPAREN; 325 326 case Tokens.tLCURLY: case Tokens.tLBRACE: case Tokens.tLBRACE_ARG: return RubyTokenId.LBRACE; 330 331 case Tokens.tRCURLY: 332 return RubyTokenId.RBRACE; 333 334 case Tokens.kDEF: 335 return RubyTokenId.DEF; 336 337 case Tokens.kEND: 338 return RubyTokenId.END; 339 340 case Tokens.kCLASS: 341 return RubyTokenId.CLASS; 342 343 case Tokens.kMODULE: 344 return RubyTokenId.MODULE; 345 346 case Tokens.kBEGIN: 347 return RubyTokenId.BEGIN; 348 349 case Tokens.kIF: 350 return RubyTokenId.IF; 351 352 case Tokens.kUNLESS: 353 return RubyTokenId.UNLESS; 354 355 case Tokens.kWHILE: 356 return RubyTokenId.WHILE; 357 358 case Tokens.kUNTIL: 359 return RubyTokenId.UNTIL; 360 361 case Tokens.kDO: 362 return RubyTokenId.DO; 363 364 case Tokens.kCASE: 365 return RubyTokenId.CASE; 366 367 case Tokens.kFOR: 368 return RubyTokenId.FOR; 369 370 case Tokens.kELSE: 371 return RubyTokenId.ELSE; 372 373 case Tokens.kELSIF: 374 return RubyTokenId.ELSIF; 375 376 case Tokens.kENSURE: 377 return RubyTokenId.ENSURE; 378 379 case Tokens.kWHEN: 380 return RubyTokenId.WHEN; 381 382 case Tokens.kRESCUE: 383 return RubyTokenId.RESCUE; 384 385 case Tokens.kSUPER: 386 return RubyTokenId.SUPER; 387 388 case Tokens.kSELF: 389 return RubyTokenId.SELF; 390 391 case Tokens.kRESCUE_MOD: 392 case Tokens.kDO_COND: 393 case Tokens.kDO_BLOCK: 394 case Tokens.kUNDEF: 395 case Tokens.kTHEN: 396 case Tokens.kBREAK: 397 case Tokens.kNEXT: 398 case Tokens.kREDO: 399 case Tokens.kRETRY: 400 case Tokens.kIN: 401 case Tokens.kRETURN: 402 case Tokens.kYIELD: 403 case Tokens.kNIL: 404 case Tokens.kTRUE: 405 case Tokens.kFALSE: 406 case Tokens.kAND: 407 case Tokens.kOR: 408 case Tokens.kNOT: 409 case Tokens.kIF_MOD: 410 case Tokens.kUNLESS_MOD: 411 case Tokens.kWHILE_MOD: 412 case Tokens.kUNTIL_MOD: 413 case Tokens.kALIAS: 414 case Tokens.kDEFINED: 415 case Tokens.klBEGIN: case Tokens.klEND: case Tokens.k__LINE__: 418 case Tokens.k__FILE__: 419 return RubyTokenId.ANY_KEYWORD; 420 421 case Tokens.tDOT: 422 return RubyTokenId.DOT; 423 424 case Tokens.tDOT2: 425 case Tokens.tDOT3: 426 return RubyTokenId.RANGE; 427 428 case Tokens.tCOLON3: 429 return RubyTokenId.COLON3; 430 431 default: 432 return RubyTokenId.IDENTIFIER; 433 } 434 } 435 436 private static class JRubyLexerRestartInfo { 437 438 private static final int IN_REGEXP = 1; 439 440 441 private static final int IN_SYMBOL = 2; 442 443 444 private static final int IN_EMBEDDED = 4; 445 446 447 private static final int IN_SUBSTITUTING = 8; 448 449 450 private static final int SET_SPACE_SEEN = 16; 451 452 453 private static final int SET_COMMAND_START = 32; 454 private StrTerm strTerm; 455 private int localState; 456 private LexState lexState; 457 private Object strTermState; 458 459 JRubyLexerRestartInfo(RubyLexer rubyLexer) { 460 strTerm = rubyLexer.lexer.getStrTerm(); 461 462 if (strTerm != null) { 463 strTermState = strTerm.getMutableState(); 464 } 465 466 lexState = rubyLexer.lexer.getLexState(); 467 468 if (rubyLexer.inRegexp) { 469 localState += IN_REGEXP; 470 } 471 472 if (rubyLexer.inSymbol) { 473 localState += IN_SYMBOL; 474 } 475 476 if (rubyLexer.inEmbedded) { 477 localState += IN_EMBEDDED; 478 } 479 480 if (rubyLexer.substituting) { 481 localState += IN_SUBSTITUTING; 482 } 483 484 if (rubyLexer.lexer.isSetSpaceSeen()) { 485 localState += SET_SPACE_SEEN; 486 } 487 488 if (rubyLexer.lexer.isCommandStart()) { 489 localState += SET_COMMAND_START; 490 } 491 } 492 493 494 public static boolean needsStateStorage(RubyLexer rubyLexer) { 495 return rubyLexer.inRegexp || rubyLexer.inSymbol || rubyLexer.inEmbedded || 496 rubyLexer.substituting || rubyLexer.lexer.isCommandStart() || 497 rubyLexer.lexer.isSetSpaceSeen(); 498 } 499 500 public boolean equals(Object obj) { 501 if (obj == null) { 502 return false; 503 } 504 505 if (getClass() != obj.getClass()) { 506 return false; 507 } 508 509 final JRubyLexerRestartInfo other = (JRubyLexerRestartInfo)obj; 510 511 if ((this.strTerm != other.strTerm) && 512 ((this.strTerm == null) || !this.strTerm.equals(other.strTerm))) { 513 return false; 514 } 515 516 if (this.localState != other.localState) { 517 return false; 518 } 519 520 if ((this.lexState != other.lexState) && 521 ((this.lexState == null) || 522 !(this.lexState.getOrdinal() == other.lexState.getOrdinal()))) { 523 return false; 524 } 525 526 if ((this.strTermState != other.strTermState) && 527 ((this.strTermState == null) || !this.strTermState.equals(other.strTermState))) { 528 return false; 529 } 530 531 return true; 532 } 533 534 public int hashCode() { 535 int hash = 7; 536 537 hash = (43 * hash) + this.localState; 538 hash = (43 * hash) + ((this.strTerm != null) ? this.strTerm.hashCode() : 0); 539 hash = (43 * hash) + ((this.strTermState != null) ? this.strTermState.hashCode() : 0); 540 541 return hash; 542 } 543 544 public String toString() { 545 return "RubyLexerState[" + localState + "," + strTerm + "," + lexState + "," + 546 strTermState + "]"; 547 } 548 549 void initializeState(RubyLexer rubyLexer) { 550 rubyLexer.lexer.setStrTerm(strTerm); 551 552 if ((strTermState != null) && (strTerm != null)) { 553 strTerm.setMutableState(strTermState); 554 } 555 556 if ((localState & IN_REGEXP) != 0) { 557 rubyLexer.inRegexp = true; 558 } 559 560 if ((localState & IN_SYMBOL) != 0) { 561 rubyLexer.inSymbol = true; 562 } 563 564 if ((localState & IN_EMBEDDED) != 0) { 565 rubyLexer.inEmbedded = true; 566 } 567 568 if ((localState & IN_SUBSTITUTING) != 0) { 569 rubyLexer.substituting = true; 570 } 571 572 if ((localState & SET_COMMAND_START) != 0) { 573 rubyLexer.lexer.setCommandStart(true); 574 } 575 576 if ((localState & SET_SPACE_SEEN) != 0) { 577 rubyLexer.lexer.setSpaceSeen(true); 578 } 579 580 rubyLexer.lexer.setLexState(lexState); 581 } 582 } 583 584 private class LexerInputReader extends Reader { 585 private LexerInput input; 586 587 LexerInputReader(LexerInput input) { 588 this.input = input; 589 } 590 591 public int read(char[] buf, int off, int len) throws IOException { 592 for (int i = 0; i < len; i++) { 593 int c = input.read(); 594 595 if (c == LexerInput.EOF) { 596 return -1; 597 } 598 599 buf[i + off] = (char)c; 600 } 601 602 return len; 603 } 604 605 public void close() throws IOException { 606 } 607 } 608 } 609 | Popular Tags |