1 36 package org.jruby; 37 38 import java.nio.ByteBuffer ; 39 import java.nio.CharBuffer ; 40 import java.nio.charset.CharacterCodingException ; 41 import java.util.regex.Matcher ; 42 import java.util.regex.Pattern ; 43 import org.jruby.parser.ReOptions; 44 import org.jruby.runtime.Block; 45 import org.jruby.runtime.CallbackFactory; 46 import org.jruby.runtime.ClassIndex; 47 import org.jruby.runtime.ObjectAllocator; 48 import org.jruby.runtime.builtin.IRubyObject; 49 import org.jruby.runtime.marshal.MarshalStream; 50 import org.jruby.runtime.marshal.UnmarshalStream; 51 import org.jruby.util.ByteList; 52 import org.jruby.util.KCode; 53 import org.jruby.util.PrintfFormat; 54 55 59 public class RubyRegexp extends RubyObject implements ReOptions { 60 private static final RegexpTranslator REGEXP_TRANSLATOR = new RegexpTranslator(); 61 62 private static final Pattern SPECIAL_CHARS = Pattern.compile("([\\\t\\\n\\\f\\\r\\ \\#\\\013\\+\\-\\[\\]\\.\\?\\*\\(\\)\\{\\}\\|\\\\\\^\\$])"); 65 66 71 72 private Pattern pattern; 73 private KCode code; 74 75 KCode getCode() { 76 return code; 77 } 78 79 private String lastTarget = null; 81 private Matcher matcher = null; 82 83 public RubyRegexp(Ruby runtime, RubyClass klass) { 84 super(runtime, klass); 85 } 86 87 private RubyRegexp(Ruby runtime) { 88 super(runtime, runtime.getClass("Regexp")); 89 } 90 91 private static ObjectAllocator REGEXP_ALLOCATOR = new ObjectAllocator() { 92 public IRubyObject allocate(Ruby runtime, RubyClass klass) { 93 RubyRegexp instance = new RubyRegexp(runtime, klass); 94 95 return instance; 96 } 97 }; 98 99 public static RubyClass createRegexpClass(Ruby runtime) { 100 RubyClass regexpClass = runtime.defineClass("Regexp", runtime.getObject(), REGEXP_ALLOCATOR); 101 regexpClass.index = ClassIndex.REGEXP; 102 103 CallbackFactory callbackFactory = runtime.callbackFactory(RubyRegexp.class); 104 105 regexpClass.defineConstant("IGNORECASE", runtime.newFixnum(RE_OPTION_IGNORECASE)); 106 regexpClass.defineConstant("EXTENDED", runtime.newFixnum(RE_OPTION_EXTENDED)); 107 regexpClass.defineConstant("MULTILINE", runtime.newFixnum(RE_OPTION_MULTILINE)); 108 109 regexpClass.defineFastMethod("initialize", callbackFactory.getFastOptMethod("initialize")); 110 regexpClass.defineFastMethod("initialize_copy", callbackFactory.getFastMethod("initialize_copy",RubyKernel.IRUBY_OBJECT)); 111 regexpClass.defineFastMethod("==", callbackFactory.getFastMethod("equal", RubyKernel.IRUBY_OBJECT)); 112 regexpClass.defineFastMethod("eql?", callbackFactory.getFastMethod("equal", RubyKernel.IRUBY_OBJECT)); 113 regexpClass.defineFastMethod("===", callbackFactory.getFastMethod("eqq", RubyKernel.IRUBY_OBJECT)); 114 regexpClass.defineFastMethod("=~", callbackFactory.getFastMethod("match", RubyKernel.IRUBY_OBJECT)); 115 regexpClass.defineFastMethod("~", callbackFactory.getFastMethod("match2")); 116 regexpClass.defineFastMethod("match", callbackFactory.getFastMethod("match_m", RubyKernel.IRUBY_OBJECT)); 117 regexpClass.defineFastMethod("inspect", callbackFactory.getFastMethod("inspect")); 118 regexpClass.defineFastMethod("source", callbackFactory.getFastMethod("source")); 119 regexpClass.defineFastMethod("casefold?", callbackFactory.getFastMethod("casefold")); 120 regexpClass.defineFastMethod("kcode", callbackFactory.getFastMethod("kcode")); 121 regexpClass.defineFastMethod("to_s", callbackFactory.getFastMethod("to_s")); 122 regexpClass.defineFastMethod("hash", callbackFactory.getFastMethod("hash")); 123 124 regexpClass.getMetaClass().defineFastMethod("new", callbackFactory.getFastOptSingletonMethod("newInstance")); 125 regexpClass.getMetaClass().defineFastMethod("compile", callbackFactory.getFastOptSingletonMethod("newInstance")); 126 regexpClass.getMetaClass().defineFastMethod("quote", callbackFactory.getFastOptSingletonMethod("quote")); 127 regexpClass.getMetaClass().defineFastMethod("escape", callbackFactory.getFastSingletonMethod("quote", RubyString.class)); 128 regexpClass.getMetaClass().defineFastMethod("last_match", callbackFactory.getFastSingletonMethod("last_match_s")); 129 regexpClass.getMetaClass().defineFastMethod("union", callbackFactory.getFastOptSingletonMethod("union")); 130 131 return regexpClass; 132 } 133 134 public int getNativeTypeIndex() { 135 return ClassIndex.REGEXP; 136 } 137 138 public void initialize(String regex, int options) { 139 try { 140 if(getCode() == KCode.UTF8) { 141 try { 142 regex = new String (ByteList.plain(regex),"UTF8"); 143 } catch(Exception e) { 144 } 145 } 146 pattern = REGEXP_TRANSLATOR.translate(regex, options, code.flags()); 147 } catch(java.util.regex.PatternSyntaxException e) { 148 throw getRuntime().newSyntaxError(e.getMessage()); 149 } 150 } 151 152 public static String escapeSpecialChars(String original) { 153 return SPECIAL_CHARS.matcher(original).replaceAll("\\\\$1"); 154 } 155 156 private void recompileIfNeeded() { 157 checkInitialized(); 158 } 159 160 private void checkInitialized() { 161 if (pattern == null) { 162 throw getRuntime().newTypeError("uninitialized Regexp"); 163 } 164 } 165 166 public static RubyRegexp regexpValue(IRubyObject obj) { 167 if (obj instanceof RubyRegexp) { 168 return (RubyRegexp) obj; 169 } else if (obj instanceof RubyString) { 170 return newRegexp(obj.getRuntime().newString(escapeSpecialChars(((RubyString) obj).toString())), 0, null); 171 } else { 172 throw obj.getRuntime().newArgumentError("can't convert arg to Regexp"); 173 } 174 } 175 176 178 public static RubyRegexp newRegexp(RubyString str, int options, String lang) { 179 return newRegexp(str.getRuntime(), str.toString(), options, lang); 180 } 181 182 public static RubyRegexp newRegexp(Ruby runtime, Pattern pattern, String lang) { 183 RubyRegexp re = new RubyRegexp(runtime); 184 re.code = KCode.create(runtime, lang); 185 re.pattern = pattern; 186 return re; 187 } 188 189 public static RubyRegexp newRegexp(Ruby runtime, String str, int options, String kcode) { 190 RubyRegexp re = new RubyRegexp(runtime); 191 re.code = KCode.create(runtime, kcode); 192 re.initialize(str, options); 193 return re; 194 } 195 196 public static RubyRegexp newInstance(IRubyObject recv, IRubyObject[] args) { 197 RubyClass klass = (RubyClass)recv; 198 199 RubyRegexp re = (RubyRegexp) klass.allocate(); 200 201 re.callInit(args, Block.NULL_BLOCK); 202 203 return re; 204 } 205 206 public IRubyObject initialize(IRubyObject[] args) { 207 String pat = 208 (args[0] instanceof RubyRegexp) 209 ? ((RubyRegexp) args[0]).source().toString() 210 : RubyString.stringValue(args[0]).toString(); 211 int opts = 0; 212 if (args.length > 1) { 213 if (args[1] instanceof RubyFixnum) { 214 opts = (int) ((RubyFixnum) args[1]).getLongValue(); 215 } else if (args[1].isTrue()) { 216 opts |= RE_OPTION_IGNORECASE; 217 } 218 } 219 if (args.length > 2) { 220 code = KCode.create(getRuntime(), RubyString.stringValue (args[2]).toString()); 221 } else { 222 code = KCode.create(getRuntime(), null); 223 } 224 initialize(pat, opts); 225 return getRuntime().getNil(); 226 } 227 228 231 public static RubyString quote(IRubyObject recv, IRubyObject[] args) { 232 if (args.length == 0 || args.length > 2) { 233 throw recv.getRuntime().newArgumentError(0, args.length); 234 } 235 236 KCode kcode = recv.getRuntime().getKCode(); 237 238 if (args.length > 1) { 239 kcode = KCode.create(recv.getRuntime(), args[1].toString()); 240 } 241 242 RubyString str = (RubyString) args[0]; 243 244 if (kcode == KCode.NONE) { 245 return quote(recv, str); 246 } 247 248 try { 249 CharBuffer decoded = kcode.decoder().decode(ByteBuffer.wrap(str.getBytes())); 252 String escaped = escapeSpecialChars(decoded.toString()); 253 ByteBuffer encoded = kcode.encoder().encode(CharBuffer.wrap(escaped)); 254 255 return (RubyString)RubyString.newString(recv.getRuntime(), encoded.array()).infectBy(str); 256 } catch (CharacterCodingException ex) { 257 throw new RuntimeException (ex); 258 } 259 } 260 261 264 public static RubyString quote(IRubyObject recv, RubyString str) { 265 return (RubyString) recv.getRuntime().newString(escapeSpecialChars(str.toString())).infectBy(str); 266 } 267 268 271 public static IRubyObject last_match_s(IRubyObject recv) { 272 return recv.getRuntime().getCurrentContext().getBackref(); 273 } 274 275 278 public IRubyObject equal(IRubyObject other) { 279 if (other == this) { 280 return getRuntime().getTrue(); 281 } 282 if (!(other instanceof RubyRegexp)) { 283 return getRuntime().getFalse(); 284 } 285 RubyRegexp re = (RubyRegexp) other; 286 checkInitialized(); 287 if (!(re.pattern.pattern().equals(pattern.pattern()) && 288 re.pattern.flags() == pattern.flags())) { 289 return getRuntime().getFalse(); 290 } 291 292 if (code != re.code) { 293 return getRuntime().getFalse(); 294 } 295 296 return getRuntime().getTrue(); 297 } 298 299 302 public IRubyObject match2() { 303 IRubyObject target = getRuntime().getCurrentContext().getLastline(); 304 305 return target instanceof RubyString ? match(target) : getRuntime().getNil(); 306 } 307 308 311 public IRubyObject eqq(IRubyObject target) { 312 if(!(target instanceof RubyString)) { 313 target = target.checkStringType(); 314 if(target.isNil()) { 315 getRuntime().getCurrentContext().setBackref(getRuntime().getNil()); 316 return getRuntime().getFalse(); 317 } 318 } 319 String string = RubyString.stringValue(target).toString(); 320 if (string.length() == 0 && "^$".equals(pattern.pattern())) { 321 string = "\n"; 322 } 323 324 int result = search(string, 0); 325 326 return result < 0 ? getRuntime().getFalse() : getRuntime().getTrue(); 327 } 328 329 332 public IRubyObject match(IRubyObject target) { 333 if (target.isNil()) { 334 return getRuntime().getFalse(); 335 } 336 if (target instanceof RubySymbol || target instanceof RubyHash || target instanceof RubyArray) { 338 return getRuntime().getFalse(); 339 } 340 341 String string = RubyString.stringValue(target).toString(); 342 if (string.length() == 0 && "^$".equals(pattern.pattern())) { 343 string = "\n"; 344 } 345 346 int result = search(string, 0); 347 348 return result < 0 ? getRuntime().getNil() : 349 getRuntime().newFixnum(result); 350 } 351 352 355 public IRubyObject match_m(IRubyObject target) { 356 if (target.isNil()) { 357 return target; 358 } 359 IRubyObject result = match(target); 360 return result.isNil() ? result : getRuntime().getCurrentContext().getBackref().rbClone(); 361 } 362 363 366 public RubyString source() { 367 checkInitialized(); 368 return getRuntime().newString(pattern.pattern()); 369 } 370 371 public IRubyObject kcode() { 372 if(code == KCode.NIL) { 373 return code.kcode(getRuntime()); 374 } else { 375 return getRuntime().newString(code.kcode(getRuntime()).toString().toLowerCase()); 376 } 377 } 378 379 382 public RubyBoolean casefold() { 383 checkInitialized(); 384 return getRuntime().newBoolean((pattern.flags() & Pattern.CASE_INSENSITIVE) != 0); 385 } 386 387 390 public static IRubyObject nth_match(int n, IRubyObject match) { 391 IRubyObject nil = match.getRuntime().getNil(); 392 if (match.isNil()) { 393 return nil; 394 } 395 396 RubyMatchData rmd = (RubyMatchData) match; 397 398 if (n > rmd.getSize()) { 399 return nil; 400 } 401 402 if (n < 0) { 403 n += rmd.getSize(); 404 if (n <= 0) { 405 return nil; 406 } 407 } 408 return rmd.group(n); 409 } 410 411 414 public static IRubyObject last_match(IRubyObject match) { 415 return match.isNil() ? match : ((RubyMatchData) match).group(0); 416 } 417 418 421 public static IRubyObject match_pre(IRubyObject match) { 422 return match.isNil() ? match : ((RubyMatchData) match).pre_match(); 423 } 424 425 428 public static IRubyObject match_post(IRubyObject match) { 429 return match.isNil() ? match : ((RubyMatchData) match).post_match(); 430 } 431 432 435 public static IRubyObject match_last(IRubyObject match) { 436 if (match.isNil()) { 437 return match; 438 } 439 RubyMatchData md = (RubyMatchData) match; 440 for (long i = md.getSize() - 1; i > 0; i--) { 441 if (!md.group(i).isNil()) { 442 return md.group(i); 443 } 444 } 445 return md.getRuntime().getNil(); 446 } 447 448 451 public int search(String target, int pos) { 452 if (pos > target.length()) { 453 return -1; 454 } 455 recompileIfNeeded(); 456 457 IRubyObject result = match(target, pos); 459 getRuntime().getCurrentContext().setBackref(result); 460 461 return result instanceof RubyMatchData ? ((RubyMatchData) result).matchStartPosition() : -1; 463 } 464 465 public IRubyObject search2(String str) { 466 IRubyObject result = match(str, 0); 467 468 getRuntime().getCurrentContext().setBackref(result); 469 470 return result; 471 } 472 473 public int searchAgain(String target) { 474 if (matcher == null || !target.equals(lastTarget)) { 475 matcher = pattern.matcher(target); 476 lastTarget = target; 477 } 478 479 if (!matcher.find()) { 480 return -1; 481 } 482 483 int count = matcher.groupCount() + 1; 484 int[] begin = new int[count]; 485 int[] end = new int[count]; 486 for (int i = 0; i < count; i++) { 487 begin[i] = matcher.start(i); 488 end[i] = matcher.end(i); 489 } 490 491 RubyMatchData match = new RubyMatchData(getRuntime(), target, begin, end); 492 493 getRuntime().getCurrentContext().setBackref(match); 494 495 return match.matchStartPosition(); 496 } 497 498 public IRubyObject match(String target, int startPos) { 499 boolean utf8 = getCode() == KCode.UTF8; 500 String t = target; 501 if(utf8) { 502 try { 503 t = new String (ByteList.plain(target),"UTF8"); 504 } catch(Exception e) { 505 } 506 } 507 508 Matcher aMatcher = pattern.matcher(t); 509 510 if (aMatcher.find(startPos)) { 511 int count = aMatcher.groupCount() + 1; 512 int[] begin = new int[count]; 513 int[] end = new int[count]; 514 for (int i = 0; i < count; i++) { 515 begin[i] = aMatcher.start(i); 516 end[i] = aMatcher.end(i); 517 } 518 519 return new RubyMatchData(getRuntime(), target, begin, end); 520 } 521 return getRuntime().getNil(); 522 } 523 524 public void regsub(RubyString str, RubyMatchData match, ByteList sb) { 525 ByteList repl = str.getByteList(); 526 int pos = 0; 527 int end = repl.length(); 528 char c; 529 IRubyObject ins; 530 while (pos < end) { 531 c = (char)(repl.get(pos++) & 0xFF); 532 if (c == '\\' && pos < end) { 533 c = (char)(repl.get(pos++) & 0xFF); 534 switch (c) { 535 case '0' : 536 case '1' : 537 case '2' : 538 case '3' : 539 case '4' : 540 case '5' : 541 case '6' : 542 case '7' : 543 case '8' : 544 case '9' : 545 ins = match.group(c - '0'); 546 break; 547 case '&' : 548 ins = match.group(0); 549 break; 550 case '`' : 551 ins = match.pre_match(); 552 break; 553 case '\'' : 554 ins = match.post_match(); 555 break; 556 case '+' : 557 ins = match_last(match); 558 break; 559 case '\\' : 560 sb.append(c); 561 continue; 562 default : 563 sb.append('\\'); 564 sb.append(c); 565 continue; 566 } 567 if (!ins.isNil()) { 568 sb.append(((RubyString) ins).getByteList()); 569 } 570 } else { 571 sb.append(c); 572 } 573 } 574 } 575 576 579 public IRubyObject regsub(IRubyObject str, RubyMatchData match) { 580 RubyString str2 = str.objAsString(); 581 ByteList sb = new ByteList(str2.getByteList().length()+30); 582 regsub(str2,match,sb); 583 return RubyString.newString(getRuntime(),sb); 584 } 585 586 589 public IRubyObject initialize_copy(IRubyObject original) { 590 if (this == original) return this; 591 592 if (!(getMetaClass() == original.getMetaClass())){ throw getRuntime().newTypeError("wrong argument class"); 594 } 595 596 RubyRegexp origRegexp = (RubyRegexp)original; 597 pattern = origRegexp.pattern; 598 code = origRegexp.code; 599 600 return this; 601 } 602 603 606 public IRubyObject inspect() { 607 final String regex = pattern.pattern(); 608 final int length = regex.length(); 609 StringBuffer sb = new StringBuffer (length + 2); 610 611 sb.append('/'); 612 for (int i = 0; i < length; i++) { 613 char c = regex.charAt(i); 614 615 if (RubyString.isAlnum(c)) { 616 sb.append(c); 617 } else if (c == '/') { 618 if (i == 0 || regex.charAt(i - 1) != '\\') { 619 sb.append("\\"); 620 } 621 sb.append(c); 622 } else if (RubyString.isPrint(c)) { 623 sb.append(c); 624 } else if (c == '\n') { 625 sb.append('\\').append('n'); 626 } else if (c == '\r') { 627 sb.append('\\').append('r'); 628 } else if (c == '\t') { 629 sb.append('\\').append('t'); 630 } else if (c == '\f') { 631 sb.append('\\').append('f'); 632 } else if (c == '\u000B') { 633 sb.append('\\').append('v'); 634 } else if (c == '\u0007') { 635 sb.append('\\').append('a'); 636 } else if (c == '\u001B') { 637 sb.append('\\').append('e'); 638 } else { 639 sb.append(new PrintfFormat("\\%.3o").sprintf(c)); 640 } 641 } 642 sb.append('/'); 643 644 if(code == KCode.NONE) { 645 sb.append('n'); 646 } else if(code == KCode.UTF8) { 647 sb.append('u'); 648 } else if(code == KCode.SJIS) { 649 sb.append('s'); 650 } 651 652 if ((pattern.flags() & Pattern.CASE_INSENSITIVE) > 0) { 653 sb.append('i'); 654 } 655 656 if ((pattern.flags() & Pattern.DOTALL) > 0) { 657 sb.append('m'); 658 } 659 660 if ((pattern.flags() & Pattern.COMMENTS) > 0) { 661 sb.append('x'); 662 } 663 664 return getRuntime().newString(sb.toString()); 665 } 666 667 670 public static IRubyObject union(IRubyObject recv, IRubyObject[] args) { 671 if (args.length == 0) { 672 return newInstance(recv, new IRubyObject[] {recv.getRuntime().newString("(?!)")}); 673 } 674 675 if (args.length == 1) { 676 IRubyObject arg = args[0].convertToType("Regexp", "to_regexp", false); 677 if (!arg.isNil()) { 678 return arg; 679 } 680 return newInstance(recv, new IRubyObject[] {quote(recv, args[0].convertToString())}); 681 } 682 683 StringBuffer buffer = new StringBuffer (); 684 for (int i = 0; i < args.length; i++) { 685 if (i > 0) { 686 buffer.append("|"); 687 } 688 IRubyObject arg = args[i].convertToType("Regexp", "to_regexp", false); 689 if (arg.isNil()) { 690 arg = quote(recv, args[i].convertToString()); 691 } 692 buffer.append(arg.toString()); 693 } 694 695 return newInstance(recv, new IRubyObject[] {recv.getRuntime().newString(buffer.toString())}); 696 } 697 698 699 public IRubyObject to_s() { 700 return getRuntime().newString(toString()); 701 } 702 703 public String toString() { 704 StringBuffer buffer = new StringBuffer (100); 705 StringBuffer off = new StringBuffer (3); 706 707 buffer.append("(?"); 708 709 flagToString(buffer, off, Pattern.DOTALL, 'm'); 710 flagToString(buffer, off, Pattern.CASE_INSENSITIVE, 'i'); 711 flagToString(buffer, off, Pattern.COMMENTS, 'x'); 712 713 if (off.length() > 0) { 714 buffer.append('-').append(off); 715 } 716 717 buffer.append(':'); 718 buffer.append(pattern.pattern().replaceAll("^/|([^\\\\])/", "$1\\\\/")); 719 buffer.append(')'); 720 721 return buffer.toString(); 722 } 723 724 732 private void flagToString(StringBuffer buffer, StringBuffer off, int flag, char c) { 733 if ((pattern.flags() & flag) != 0) { 734 buffer.append(c); 735 } else { 736 off.append(c); 737 } 738 } 739 740 public static RubyRegexp unmarshalFrom(UnmarshalStream input) throws java.io.IOException { 741 RubyRegexp result = newRegexp(input.getRuntime(), 742 RubyString.byteListToString(input.unmarshalString()), input.unmarshalInt(), null); 743 input.registerLinkTarget(result); 744 return result; 745 } 746 747 public static void marshalTo(RubyRegexp regexp, MarshalStream output) throws java.io.IOException { 748 output.writeString(regexp.pattern.pattern()); 749 750 int flags = 0; 751 if ((regexp.pattern.flags() & Pattern.DOTALL) > 0) { 752 flags |= RE_OPTION_MULTILINE; 753 } 754 if ((regexp.pattern.flags() & Pattern.CASE_INSENSITIVE) > 0) { 755 flags |= RE_OPTION_IGNORECASE; 756 } 757 if ((regexp.pattern.flags() & Pattern.COMMENTS) > 0) { 758 flags |= RE_OPTION_EXTENDED; 759 } 760 output.writeInt(flags); 761 } 762 763 public Pattern getPattern() { 764 return this.pattern; 765 } 766 767 public RubyFixnum hash() { 768 return getRuntime().newFixnum(this.pattern.pattern().hashCode()); 769 } 770 } 771 | Popular Tags |