1 19 20 package gnu.regexp; 21 import java.io.InputStream ; 22 import java.io.Reader ; 23 import java.io.Serializable ; 24 import java.util.Locale ; 25 import java.util.PropertyResourceBundle ; 26 import java.util.ResourceBundle ; 27 import java.util.Vector ; 28 29 class IntPair implements Serializable { 30 public int first, second; 31 } 32 33 class CharUnit implements Serializable { 34 public char ch; 35 public boolean bk; 36 } 37 38 98 99 public class RE extends REToken { 100 private static final String VERSION = "1.1.4-dev"; 102 103 private static ResourceBundle messages = PropertyResourceBundle.getBundle("gnu/regexp/MessagesBundle", Locale.getDefault()); 105 106 private REToken firstToken, lastToken; 109 110 private int numSubs; 113 114 115 private int minimumLength; 116 117 121 public static final int REG_ICASE = 2; 122 123 129 public static final int REG_DOT_NEWLINE = 4; 130 131 136 public static final int REG_MULTILINE = 8; 137 138 165 public static final int REG_NOTBOL = 16; 166 167 172 public static final int REG_NOTEOL = 32; 173 174 186 public static final int REG_ANCHORINDEX = 64; 187 188 195 public static final int REG_NO_INTERPOLATE = 128; 196 197 198 public static final String version() { 199 return VERSION; 200 } 201 202 static final String getLocalizedMessage(String key) { 204 return messages.getString(key); 205 } 206 207 217 public RE(Object pattern) throws REException { 218 this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0); 219 } 220 221 232 public RE(Object pattern, int cflags) throws REException { 233 this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0); 234 } 235 236 248 public RE(Object pattern, int cflags, RESyntax syntax) throws REException { 249 this(pattern,cflags,syntax,0,0); 250 } 251 252 private RE(REToken first, REToken last,int subs, int subIndex, int minLength) { 254 super(subIndex); 255 firstToken = first; 256 lastToken = last; 257 numSubs = subs; 258 minimumLength = minLength; 259 addToken(new RETokenEndSub(subIndex)); 260 } 261 262 private RE(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException { 263 super(myIndex); initialize(patternObj, cflags, syntax, myIndex, nextSub); 265 } 266 267 protected RE() { super(0); } 269 270 protected void initialize(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException { 272 char[] pattern; 273 if (patternObj instanceof String ) { 274 pattern = ((String ) patternObj).toCharArray(); 275 } else if (patternObj instanceof char[]) { 276 pattern = (char[]) patternObj; 277 } else if (patternObj instanceof StringBuffer ) { 278 pattern = new char [((StringBuffer ) patternObj).length()]; 279 ((StringBuffer ) patternObj).getChars(0,pattern.length,pattern,0); 280 } else { 281 pattern = patternObj.toString().toCharArray(); 282 } 283 284 int pLength = pattern.length; 285 286 numSubs = 0; Vector branches = null; 288 289 firstToken = lastToken = null; 291 292 boolean insens = ((cflags & REG_ICASE) > 0); 295 296 299 int index = 0; 301 302 CharUnit unit = new CharUnit(); 304 305 IntPair minMax = new IntPair(); 307 308 REToken currentToken = null; 310 char ch; 311 312 while (index < pLength) { 313 index = getCharUnit(pattern,index,unit); 315 316 320 if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk)) 323 || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !unit.bk) ) 324 && !syntax.get(RESyntax.RE_LIMITED_OPS)) { 325 addToken(currentToken); 327 RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength); 328 minimumLength = 0; 329 if (branches == null) { 330 branches = new Vector (); 331 } 332 branches.addElement(theBranch); 333 firstToken = lastToken = currentToken = null; 334 } 335 336 343 else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) { 344 int newIndex = getMinMax(pattern,index,minMax,syntax); 345 if (newIndex > index) { 346 if (minMax.first > minMax.second) 347 throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex); 348 if (currentToken == null) 349 throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex); 350 if (currentToken instanceof RETokenRepeated) 351 throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex); 352 if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary) 353 throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex); 354 if ((currentToken.getMinimumLength() == 0) && (minMax.second == Integer.MAX_VALUE)) 355 throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,newIndex); 356 index = newIndex; 357 currentToken = setRepeated(currentToken,minMax.first,minMax.second,index); 358 } 359 else { 360 addToken(currentToken); 361 currentToken = new RETokenChar(subIndex,unit.ch,insens); 362 } 363 } 364 365 368 else if ((unit.ch == '[') && !unit.bk) { 369 Vector options = new Vector (); 370 boolean negative = false; 371 char lastChar = 0; 372 if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index); 373 374 if ((ch = pattern[index]) == '^') { 376 negative = true; 377 if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); 378 ch = pattern[index]; 379 } 380 381 if (ch == ']') { 383 lastChar = ch; 384 if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); 385 } 386 387 while ((ch = pattern[index++]) != ']') { 388 if ((ch == '-') && (lastChar != 0)) { 389 if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); 390 if ((ch = pattern[index]) == ']') { 391 options.addElement(new RETokenChar(subIndex,lastChar,insens)); 392 lastChar = '-'; 393 } else { 394 options.addElement(new RETokenRange(subIndex,lastChar,ch,insens)); 395 lastChar = 0; 396 index++; 397 } 398 } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) { 399 if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); 400 int posixID = -1; 401 boolean negate = false; 402 char asciiEsc = 0; 403 if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) { 404 switch (pattern[index]) { 405 case 'D': 406 negate = true; 407 case 'd': 408 posixID = RETokenPOSIX.DIGIT; 409 break; 410 case 'S': 411 negate = true; 412 case 's': 413 posixID = RETokenPOSIX.SPACE; 414 break; 415 case 'W': 416 negate = true; 417 case 'w': 418 posixID = RETokenPOSIX.ALNUM; 419 break; 420 } 421 } 422 else if ("nrt".indexOf(pattern[index]) != -1) { 423 switch (pattern[index]) { 424 case 'n': 425 asciiEsc = '\n'; 426 break; 427 case 't': 428 asciiEsc = '\t'; 429 break; 430 case 'r': 431 asciiEsc = '\r'; 432 break; 433 } 434 } 435 if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens)); 436 437 if (posixID != -1) { 438 options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate)); 439 } else if (asciiEsc != 0) { 440 lastChar = asciiEsc; 441 } else { 442 lastChar = pattern[index]; 443 } 444 ++index; 445 } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) { 446 StringBuffer posixSet = new StringBuffer (); 447 index = getPosixSet(pattern,index+1,posixSet); 448 int posixId = RETokenPOSIX.intValue(posixSet.toString()); 449 if (posixId != -1) 450 options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false)); 451 } else { 452 if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens)); 453 lastChar = ch; 454 } 455 if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); 456 } 459 if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens)); 460 461 addToken(currentToken); 463 options.trimToSize(); 464 currentToken = new RETokenOneOf(subIndex,options,negative); 465 } 466 467 470 else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) { 471 boolean pure = false; 472 boolean comment = false; 473 boolean lookAhead = false; 474 boolean negativelh = false; 475 if ((index+1 < pLength) && (pattern[index] == '?')) { 476 switch (pattern[index+1]) { 477 case '!': 478 if (syntax.get(RESyntax.RE_LOOKAHEAD)) { 479 pure = true; 480 negativelh = true; 481 lookAhead = true; 482 index += 2; 483 } 484 break; 485 case '=': 486 if (syntax.get(RESyntax.RE_LOOKAHEAD)) { 487 pure = true; 488 lookAhead = true; 489 index += 2; 490 } 491 break; 492 case ':': 493 if (syntax.get(RESyntax.RE_PURE_GROUPING)) { 494 pure = true; 495 index += 2; 496 } 497 break; 498 case '#': 499 if (syntax.get(RESyntax.RE_COMMENTS)) { 500 comment = true; 501 } 502 break; 503 default: 504 throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index); 505 } 506 } 507 508 if (index >= pLength) { 509 throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index); 510 } 511 512 int endIndex = index; 514 int nextIndex = index; 515 int nested = 0; 516 517 while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0) 518 && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) ) 519 if ((endIndex = nextIndex) >= pLength) 520 throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex); 521 else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) 522 nested++; 523 else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) 524 nested--; 525 526 529 if (comment) index = nextIndex; 530 else { addToken(currentToken); 533 if (!pure) { 534 numSubs++; 535 } 536 537 int useIndex = (pure || lookAhead) ? 0 : nextSub + numSubs; 538 currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs); 539 numSubs += ((RE) currentToken).getNumSubs(); 540 541 if (lookAhead) { 542 currentToken = new RETokenLookAhead(currentToken,negativelh); 543 } 544 545 index = nextIndex; 546 } } 549 else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) { 553 throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index); 554 } 555 556 559 else if ((unit.ch == '^') && !unit.bk) { 560 addToken(currentToken); 561 currentToken = null; 562 addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null)); 563 } 564 565 568 else if ((unit.ch == '$') && !unit.bk) { 569 addToken(currentToken); 570 currentToken = null; 571 addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null)); 572 } 573 574 577 else if ((unit.ch == '.') && !unit.bk) { 578 addToken(currentToken); 579 currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL)); 580 } 581 582 585 else if ((unit.ch == '*') && !unit.bk) { 586 if (currentToken == null) 587 throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); 588 if (currentToken instanceof RETokenRepeated) 589 throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index); 590 if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary) 591 throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index); 592 if (currentToken.getMinimumLength() == 0) 593 throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index); 594 currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index); 595 } 596 597 601 else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) { 602 if (currentToken == null) 603 throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); 604 if (currentToken instanceof RETokenRepeated) 605 throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index); 606 if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary) 607 throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index); 608 if (currentToken.getMinimumLength() == 0) 609 throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index); 610 currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index); 611 } 612 613 618 else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) { 619 if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); 620 621 if (currentToken instanceof RETokenRepeated) { 623 if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy()) 624 ((RETokenRepeated)currentToken).makeStingy(); 625 else 626 throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index); 627 } 628 else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary) 629 throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index); 630 else 631 currentToken = setRepeated(currentToken,0,1,index); 632 } 633 634 638 else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) { 639 addToken(currentToken); 640 currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens); 641 } 642 643 646 else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { 647 addToken(currentToken); 648 currentToken = new RETokenStart(subIndex,null); 649 } 650 651 654 else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { 655 addToken(currentToken); 656 currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false); 657 } 658 659 else if (unit.bk && (unit.ch == '<')) { 662 addToken(currentToken); 663 currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false); 664 } 665 666 else if (unit.bk && (unit.ch == '>')) { 669 addToken(currentToken); 670 currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false); 671 } 672 673 676 else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { 677 addToken(currentToken); 678 currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true); 679 } 680 681 682 685 else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { 686 addToken(currentToken); 687 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false); 688 } 689 690 693 else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { 694 addToken(currentToken); 695 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true); 696 } 697 698 701 else if (unit.bk && (unit.ch == 'n')) { 702 addToken(currentToken); 703 currentToken = new RETokenChar(subIndex,'\n',false); 704 } 705 706 709 else if (unit.bk && (unit.ch == 'r')) { 710 addToken(currentToken); 711 currentToken = new RETokenChar(subIndex,'\r',false); 712 } 713 714 717 else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { 718 addToken(currentToken); 719 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false); 720 } 721 722 725 else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { 726 addToken(currentToken); 727 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true); 728 } 729 730 733 else if (unit.bk && (unit.ch == 't')) { 734 addToken(currentToken); 735 currentToken = new RETokenChar(subIndex,'\t',false); 736 } 737 738 741 else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { 742 addToken(currentToken); 743 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false); 744 } 745 746 749 else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { 750 addToken(currentToken); 751 currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true); 752 } 753 754 757 else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { 758 addToken(currentToken); 759 currentToken = new RETokenEnd(subIndex,null); 760 } 761 762 765 else { addToken(currentToken); 767 currentToken = new RETokenChar(subIndex,unit.ch,insens); 768 } 769 } 771 addToken(currentToken); 773 774 if (branches != null) { 775 branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength)); 776 branches.trimToSize(); minimumLength = 0; 778 firstToken = lastToken = null; 779 addToken(new RETokenOneOf(subIndex,branches,false)); 780 } 781 else addToken(new RETokenEndSub(subIndex)); 782 783 } 784 785 private static int getCharUnit(char[] input, int index, CharUnit unit) throws REException { 786 unit.ch = input[index++]; 787 if (unit.bk = (unit.ch == '\\')) 788 if (index < input.length) 789 unit.ch = input[index++]; 790 else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index); 791 return index; 792 } 793 794 799 public boolean isMatch(Object input) { 800 return isMatch(input,0,0); 801 } 802 803 810 public boolean isMatch(Object input,int index) { 811 return isMatch(input,index,0); 812 } 813 814 815 823 public boolean isMatch(Object input,int index,int eflags) { 824 return isMatchImpl(makeCharIndexed(input,index),index,eflags); 825 } 826 827 private boolean isMatchImpl(CharIndexed input, int index, int eflags) { 828 if (firstToken == null) return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS); 830 REMatch m = new REMatch(numSubs, index, eflags); 831 if (firstToken.match(input, m)) { 832 while (m != null) { 833 if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) { 834 return true; 835 } 836 m = m.next; 837 } 838 } 839 return false; 840 } 841 842 847 public int getNumSubs() { 848 return numSubs; 849 } 850 851 void setUncle(REToken uncle) { 853 if (lastToken != null) { 854 lastToken.setUncle(uncle); 855 } else super.setUncle(uncle); } 857 858 860 boolean chain(REToken next) { 861 super.chain(next); 862 setUncle(next); 863 return true; 864 } 865 866 870 public int getMinimumLength() { 871 return minimumLength; 872 } 873 874 883 public REMatch[] getAllMatches(Object input) { 884 return getAllMatches(input,0,0); 885 } 886 887 898 public REMatch[] getAllMatches(Object input, int index) { 899 return getAllMatches(input,index,0); 900 } 901 902 915 public REMatch[] getAllMatches(Object input, int index, int eflags) { 916 return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags); 917 } 918 919 private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) { 921 Vector all = new Vector (); 922 REMatch m = null; 923 while ((m = getMatchImpl(input,index,eflags,null)) != null) { 924 all.addElement(m); 925 index = m.getEndIndex(); 926 if (m.end[0] == 0) { index++; 928 input.move(1); 929 } else { 930 input.move(m.end[0]); 931 } 932 if (!input.isValid()) break; 933 } 934 REMatch[] mset = new REMatch[all.size()]; 935 all.copyInto(mset); 936 return mset; 937 } 938 939 940 boolean match(CharIndexed input, REMatch mymatch) { 941 if (firstToken == null) return next(input, mymatch); 942 943 mymatch.start[subIndex] = mymatch.index; 945 946 return firstToken.match(input, mymatch); 947 } 948 949 956 public REMatch getMatch(Object input) { 957 return getMatch(input,0,0); 958 } 959 960 969 public REMatch getMatch(Object input, int index) { 970 return getMatch(input,index,0); 971 } 972 973 983 public REMatch getMatch(Object input, int index, int eflags) { 984 return getMatch(input,index,eflags,null); 985 } 986 987 1000 public REMatch getMatch(Object input, int index, int eflags, StringBuffer buffer) { 1001 return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer); 1002 } 1003 1004 REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) { 1005 REMatch mymatch = new REMatch(numSubs, anchor, eflags); 1007 do { 1008 if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) { 1010 if (match(input, mymatch)) { 1011 REMatch longest = mymatch; 1013 while ((mymatch = mymatch.next) != null) { 1014 if (mymatch.index > longest.index) { 1015 longest = mymatch; 1016 } 1017 } 1018 1019 longest.end[0] = longest.index; 1020 longest.finish(input); 1021 return longest; 1022 } 1023 } 1024 mymatch.clear(++anchor); 1025 if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) { 1027 buffer.append(input.charAt(0)); 1028 } 1029 } while (input.move(1)); 1030 1031 return null; 1032 } 1033 1034 1041 public REMatchEnumeration getMatchEnumeration(Object input) { 1042 return getMatchEnumeration(input,0,0); 1043 } 1044 1045 1046 1055 public REMatchEnumeration getMatchEnumeration(Object input, int index) { 1056 return getMatchEnumeration(input,index,0); 1057 } 1058 1059 1069 public REMatchEnumeration getMatchEnumeration(Object input, int index, int eflags) { 1070 return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags); 1071 } 1072 1073 1074 1082 public String substitute(Object input,String replace) { 1083 return substitute(input,replace,0,0); 1084 } 1085 1086 1099 public String substitute(Object input,String replace,int index) { 1100 return substitute(input,replace,index,0); 1101 } 1102 1103 1116 public String substitute(Object input,String replace,int index,int eflags) { 1117 return substituteImpl(makeCharIndexed(input,index),replace,index,eflags); 1118 } 1119 1120 private String substituteImpl(CharIndexed input,String replace,int index,int eflags) { 1121 StringBuffer buffer = new StringBuffer (); 1122 REMatch m = getMatchImpl(input,index,eflags,buffer); 1123 if (m==null) return buffer.toString(); 1124 buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ? 1125 replace : m.substituteInto(replace) ); 1126 if (input.move(m.end[0])) { 1127 do { 1128 buffer.append(input.charAt(0)); 1129 } while (input.move(1)); 1130 } 1131 return buffer.toString(); 1132 } 1133 1134 1143 public String substituteAll(Object input,String replace) { 1144 return substituteAll(input,replace,0,0); 1145 } 1146 1147 1161 public String substituteAll(Object input,String replace,int index) { 1162 return substituteAll(input,replace,index,0); 1163 } 1164 1165 1178 public String substituteAll(Object input,String replace,int index,int eflags) { 1179 return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags); 1180 } 1181 1182 private String substituteAllImpl(CharIndexed input,String replace,int index,int eflags) { 1183 StringBuffer buffer = new StringBuffer (); 1184 REMatch m; 1185 while ((m = getMatchImpl(input,index,eflags,buffer)) != null) { 1186 buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ? 1187 replace : m.substituteInto(replace) ); 1188 index = m.getEndIndex(); 1189 if (m.end[0] == 0) { 1190 char ch = input.charAt(0); 1191 if (ch != CharIndexed.OUT_OF_BOUNDS) 1192 buffer.append(ch); 1193 input.move(1); 1194 } else { 1195 input.move(m.end[0]); 1196 } 1197 1198 if (!input.isValid()) break; 1199 } 1200 return buffer.toString(); 1201 } 1202 1203 1204 private void addToken(REToken next) { 1205 if (next == null) return; 1206 minimumLength += next.getMinimumLength(); 1207 if (firstToken == null) { 1208 lastToken = firstToken = next; 1209 } else { 1210 if (lastToken.chain(next)) { 1213 lastToken = next; 1214 } 1215 } 1216 } 1217 1218 private static REToken setRepeated(REToken current, int min, int max, int index) throws REException { 1219 if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); 1220 return new RETokenRepeated(current.subIndex,current,min,max); 1221 } 1222 1223 private static int getPosixSet(char[] pattern,int index,StringBuffer buf) { 1224 int i; 1227 for (i=index; i<(pattern.length-1); i++) { 1228 if ((pattern[i] == ':') && (pattern[i+1] == ']')) 1229 return i+2; 1230 buf.append(pattern[i]); 1231 } 1232 return index; } 1234 1235 private int getMinMax(char[] input,int index,IntPair minMax,RESyntax syntax) throws REException { 1236 1238 boolean mustMatch = !syntax.get(RESyntax.RE_NO_BK_BRACES); 1239 int startIndex = index; 1240 if (index == input.length) { 1241 if (mustMatch) 1242 throw new REException(getLocalizedMessage("unmatched.brace"),REException.REG_EBRACE,index); 1243 else 1244 return startIndex; 1245 } 1246 1247 int min,max=0; 1248 CharUnit unit = new CharUnit(); 1249 StringBuffer buf = new StringBuffer (); 1250 1251 do { 1253 index = getCharUnit(input,index,unit); 1254 if (Character.isDigit(unit.ch)) 1255 buf.append(unit.ch); 1256 } while ((index != input.length) && Character.isDigit(unit.ch)); 1257 1258 if (buf.length() == 0) { 1260 if (mustMatch) 1261 throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index); 1262 else 1263 return startIndex; 1264 } 1265 1266 min = Integer.parseInt(buf.toString()); 1267 1268 if ((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) 1269 max = min; 1270 else if (index == input.length) 1271 if (mustMatch) 1272 throw new REException(getLocalizedMessage("interval.no.end"),REException.REG_EBRACE,index); 1273 else 1274 return startIndex; 1275 else if ((unit.ch == ',') && !unit.bk) { 1276 buf = new StringBuffer (); 1277 while (((index = getCharUnit(input,index,unit)) != input.length) && Character.isDigit(unit.ch)) 1279 buf.append(unit.ch); 1280 1281 if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk))) 1282 if (mustMatch) 1283 throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index); 1284 else 1285 return startIndex; 1286 1287 if (buf.length() == 0) max = Integer.MAX_VALUE; 1289 else max = Integer.parseInt(buf.toString()); 1290 } else 1291 if (mustMatch) 1292 throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index); 1293 else 1294 return startIndex; 1295 1296 1298 minMax.first = min; 1299 minMax.second = max; 1300 1301 return index; 1303 } 1304 1305 1309 public String toString() { 1310 StringBuffer sb = new StringBuffer (); 1311 dump(sb); 1312 return sb.toString(); 1313 } 1314 1315 void dump(StringBuffer os) { 1316 os.append('('); 1317 if (subIndex == 0) 1318 os.append("?:"); 1319 if (firstToken != null) 1320 firstToken.dumpAll(os); 1321 os.append(')'); 1322 } 1323 1324 private static CharIndexed makeCharIndexed(Object input, int index) { 1326 if (input instanceof String ) 1329 return new CharIndexedString((String ) input,index); 1330 else if (input instanceof char[]) 1331 return new CharIndexedCharArray((char[]) input,index); 1332 else if (input instanceof StringBuffer ) 1333 return new CharIndexedStringBuffer((StringBuffer ) input,index); 1334 else if (input instanceof InputStream ) 1335 return new CharIndexedInputStream((InputStream ) input,index); 1336 else if (input instanceof Reader ) 1337 return new CharIndexedReader((Reader ) input, index); 1338 else if (input instanceof CharIndexed) 1339 return (CharIndexed) input; else 1341 return new CharIndexedString(input.toString(), index); 1342 } 1343} 1344 | Popular Tags |