| 1 74 75 package org.eclipse.emf.ecore.xml.type.internal; 76 77 78 import java.text.CharacterIterator ; 79 import java.util.Hashtable ; 80 import java.util.Locale ; 81 import java.util.ResourceBundle ; 82 import java.util.Vector ; 83 84 import org.eclipse.emf.ecore.plugin.EcorePlugin; 85 86 89 public final class RegEx 90 { 91 static class BMPattern 92 { 93 char[] pattern; 94 95 int[] shiftTable; 96 97 boolean ignoreCase; 98 99 public BMPattern(String pat, boolean ignoreCase) 100 { 101 this(pat, 256, ignoreCase); 102 } 103 104 public BMPattern(String pat, int tableSize, boolean ignoreCase) 105 { 106 this.pattern = pat.toCharArray(); 107 this.shiftTable = new int [tableSize]; 108 this.ignoreCase = ignoreCase; 109 int length = pattern.length; 110 for (int i = 0; i < this.shiftTable.length; i++) 111 this.shiftTable[i] = length; 112 for (int i = 0; i < length; i++) 113 { 114 char ch = this.pattern[i]; 115 int diff = length - i - 1; 116 int index = ch % this.shiftTable.length; 117 if (diff < this.shiftTable[index]) 118 this.shiftTable[index] = diff; 119 if (this.ignoreCase) 120 { 121 ch = Character.toUpperCase(ch); 122 index = ch % this.shiftTable.length; 123 if (diff < this.shiftTable[index]) 124 this.shiftTable[index] = diff; 125 ch = Character.toLowerCase(ch); 126 index = ch % this.shiftTable.length; 127 if (diff < this.shiftTable[index]) 128 this.shiftTable[index] = diff; 129 } 130 } 131 } 132 133 137 public int matches(CharacterIterator iterator, int start, int limit) 138 { 139 if (this.ignoreCase) 140 return this.matchesIgnoreCase(iterator, start, limit); 141 int plength = this.pattern.length; 142 if (plength == 0) 143 return start; 144 int index = start + plength; 145 while (index <= limit) 146 { 147 int pindex = plength; 148 int nindex = index + 1; 149 char ch; 150 do 151 { 152 if ((ch = iterator.setIndex(--index)) != this.pattern[--pindex]) 153 break; 154 if (pindex == 0) 155 return index; 156 } 157 while (pindex > 0); 158 index += this.shiftTable[ch % this.shiftTable.length] + 1; 159 if (index < nindex) 160 index = nindex; 161 } 162 return -1; 163 } 164 165 169 public int matches(String str, int start, int limit) 170 { 171 if (this.ignoreCase) 172 return this.matchesIgnoreCase(str, start, limit); 173 int plength = this.pattern.length; 174 if (plength == 0) 175 return start; 176 int index = start + plength; 177 while (index <= limit) 178 { 179 int pindex = plength; 181 int nindex = index + 1; 182 char ch; 183 do 184 { 185 if ((ch = str.charAt(--index)) != this.pattern[--pindex]) 186 break; 187 if (pindex == 0) 188 return index; 189 } 190 while (pindex > 0); 191 index += this.shiftTable[ch % this.shiftTable.length] + 1; 192 if (index < nindex) 193 index = nindex; 194 } 195 return -1; 196 } 197 198 202 public int matches(char[] chars, int start, int limit) 203 { 204 if (this.ignoreCase) 205 return this.matchesIgnoreCase(chars, start, limit); 206 int plength = this.pattern.length; 207 if (plength == 0) 208 return start; 209 int index = start + plength; 210 while (index <= limit) 211 { 212 int pindex = plength; 214 int nindex = index + 1; 215 char ch; 216 do 217 { 218 if ((ch = chars[--index]) != this.pattern[--pindex]) 219 break; 220 if (pindex == 0) 221 return index; 222 } 223 while (pindex > 0); 224 index += this.shiftTable[ch % this.shiftTable.length] + 1; 225 if (index < nindex) 226 index = nindex; 227 } 228 return -1; 229 } 230 231 int matchesIgnoreCase(CharacterIterator iterator, int start, int limit) 232 { 233 int plength = this.pattern.length; 234 if (plength == 0) 235 return start; 236 int index = start + plength; 237 while (index <= limit) 238 { 239 int pindex = plength; 240 int nindex = index + 1; 241 char ch; 242 do 243 { 244 char ch1 = ch = iterator.setIndex(--index); 245 char ch2 = this.pattern[--pindex]; 246 if (ch1 != ch2) 247 { 248 ch1 = Character.toUpperCase(ch1); 249 ch2 = Character.toUpperCase(ch2); 250 if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) 251 break; 252 } 253 if (pindex == 0) 254 return index; 255 } 256 while (pindex > 0); 257 index += this.shiftTable[ch % this.shiftTable.length] + 1; 258 if (index < nindex) 259 index = nindex; 260 } 261 return -1; 262 } 263 264 int matchesIgnoreCase(String text, int start, int limit) 265 { 266 int plength = this.pattern.length; 267 if (plength == 0) 268 return start; 269 int index = start + plength; 270 while (index <= limit) 271 { 272 int pindex = plength; 273 int nindex = index + 1; 274 char ch; 275 do 276 { 277 char ch1 = ch = text.charAt(--index); 278 char ch2 = this.pattern[--pindex]; 279 if (ch1 != ch2) 280 { 281 ch1 = Character.toUpperCase(ch1); 282 ch2 = Character.toUpperCase(ch2); 283 if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) 284 break; 285 } 286 if (pindex == 0) 287 return index; 288 } 289 while (pindex > 0); 290 index += this.shiftTable[ch % this.shiftTable.length] + 1; 291 if (index < nindex) 292 index = nindex; 293 } 294 return -1; 295 } 296 297 int matchesIgnoreCase(char[] chars, int start, int limit) 298 { 299 int plength = this.pattern.length; 300 if (plength == 0) 301 return start; 302 int index = start + plength; 303 while (index <= limit) 304 { 305 int pindex = plength; 306 int nindex = index + 1; 307 char ch; 308 do 309 { 310 char ch1 = ch = chars[--index]; 311 char ch2 = this.pattern[--pindex]; 312 if (ch1 != ch2) 313 { 314 ch1 = Character.toUpperCase(ch1); 315 ch2 = Character.toUpperCase(ch2); 316 if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) 317 break; 318 } 319 if (pindex == 0) 320 return index; 321 } 322 while (pindex > 0); 323 index += this.shiftTable[ch % this.shiftTable.length] + 1; 324 if (index < nindex) 325 index = nindex; 326 } 327 return -1; 328 } 329 345 } 346 347 public static class Match implements Cloneable { 348 int[] beginpos = null; 349 int[] endpos = null; 350 int nofgroups = 0; 351 352 CharacterIterator ciSource = null; 353 String strSource = null; 354 char[] charSource = null; 355 356 359 public Match() { 360 } 361 362 365 public synchronized Object clone() { 366 Match ma = new Match(); 367 if (this.nofgroups > 0) { 368 ma.setNumberOfGroups(this.nofgroups); 369 if (this.ciSource != null) ma.setSource(this.ciSource); 370 if (this.strSource != null) ma.setSource(this.strSource); 371 for (int i = 0; i < this.nofgroups; i ++) { 372 ma.setBeginning(i, this.getBeginning(i)); 373 ma.setEnd(i, this.getEnd(i)); 374 } 375 } 376 return ma; 377 } 378 379 382 protected void setNumberOfGroups(int n) { 383 int oldn = this.nofgroups; 384 this.nofgroups = n; 385 if (oldn <= 0 386 || oldn < n || n*2 < oldn) { 387 this.beginpos = new int[n]; 388 this.endpos = new int[n]; 389 } 390 for (int i = 0; i < n; i ++) { 391 this.beginpos[i] = -1; 392 this.endpos[i] = -1; 393 } 394 } 395 396 399 protected void setSource(CharacterIterator ci) { 400 this.ciSource = ci; 401 this.strSource = null; 402 this.charSource = null; 403 } 404 407 protected void setSource(String str) { 408 this.ciSource = null; 409 this.strSource = str; 410 this.charSource = null; 411 } 412 415 protected void setSource(char[] chars) { 416 this.ciSource = null; 417 this.strSource = null; 418 this.charSource = chars; 419 } 420 421 424 protected void setBeginning(int index, int v) { 425 this.beginpos[index] = v; 426 } 427 428 431 protected void setEnd(int index, int v) { 432 this.endpos[index] = v; 433 } 434 435 439 public int getNumberOfGroups() { 440 if (this.nofgroups <= 0) 441 throw new IllegalStateException ("A result is not set."); 442 return this.nofgroups; 443 } 444 445 450 public int getBeginning(int index) { 451 if (this.beginpos == null) 452 throw new IllegalStateException ("A result is not set."); 453 if (index < 0 || this.nofgroups <= index) 454 throw new IllegalArgumentException ("The parameter must be less than " 455 +this.nofgroups+": "+index); 456 return this.beginpos[index]; 457 } 458 459 464 public int getEnd(int index) { 465 if (this.endpos == null) 466 throw new IllegalStateException ("A result is not set."); 467 if (index < 0 || this.nofgroups <= index) 468 throw new IllegalArgumentException ("The parameter must be less than " 469 +this.nofgroups+": "+index); 470 return this.endpos[index]; 471 } 472 473 478 public String getCapturedText(int index) { 479 if (this.beginpos == null) 480 throw new IllegalStateException ("match() has never been called."); 481 if (index < 0 || this.nofgroups <= index) 482 throw new IllegalArgumentException ("The parameter must be less than " 483 +this.nofgroups+": "+index); 484 String ret; 485 int begin = this.beginpos[index], end = this.endpos[index]; 486 if (begin < 0 || end < 0) return null; 487 if (this.ciSource != null) { 488 ret = REUtil.substring(this.ciSource, begin, end); 489 } else if (this.strSource != null) { 490 ret = this.strSource.substring(begin, end); 491 } else { 492 ret = new String (this.charSource, begin, end-begin); 493 } 494 return ret; 495 } 496 } 497 498 public final static class REUtil { 499 private REUtil() { 500 } 501 502 static final int composeFromSurrogates(int high, int low) { 503 return 0x10000 + ((high-0xd800)<<10) + low-0xdc00; 504 } 505 506 static final boolean isLowSurrogate(int ch) { 507 return (ch & 0xfc00) == 0xdc00; 508 } 509 510 static final boolean isHighSurrogate(int ch) { 511 return (ch & 0xfc00) == 0xd800; 512 } 513 514 static final String decomposeToSurrogates(int ch) { 515 char[] chs = new char[2]; 516 ch -= 0x10000; 517 chs[0] = (char)((ch>>10)+0xd800); 518 chs[1] = (char)((ch&0x3ff)+0xdc00); 519 return new String (chs); 520 } 521 522 static final String substring(CharacterIterator iterator, int begin, int end) { 523 char[] src = new char[end-begin]; 524 for (int i = 0; i < src.length; i ++) 525 src[i] = iterator.setIndex(i+begin); 526 return new String (src); 527 } 528 529 531 static final int getOptionValue(int ch) { 532 int ret = 0; 533 switch (ch) { 534 case 'i': 535 ret = RegularExpression.IGNORE_CASE; 536 break; 537 case 'm': 538 ret = RegularExpression.MULTIPLE_LINES; 539 break; 540 case 's': 541 ret = RegularExpression.SINGLE_LINE; 542 break; 543 case 'x': 544 ret = RegularExpression.EXTENDED_COMMENT; 545 break; 546 case 'u': 547 ret = RegularExpression.USE_UNICODE_CATEGORY; 548 break; 549 case 'w': 550 ret = RegularExpression.UNICODE_WORD_BOUNDARY; 551 break; 552 case 'F': 553 ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION; 554 break; 555 case 'H': 556 ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION; 557 break; 558 case 'X': 559 ret = RegularExpression.XMLSCHEMA_MODE; 560 break; 561 case ',': 562 ret = RegularExpression.SPECIAL_COMMA; 563 break; 564 default: 565 } 566 return ret; 567 } 568 569 static final int parseOptions(String opts) throws ParseException { 570 if (opts == null) return 0; 571 int options = 0; 572 for (int i = 0; i < opts.length(); i ++) { 573 int v = getOptionValue(opts.charAt(i)); 574 if (v == 0) 575 throw new ParseException("Unknown Option: "+opts.substring(i), -1); 576 options |= v; 577 } 578 return options; 579 } 580 581 static final String createOptionString(int options) { 582 StringBuffer sb = new StringBuffer (9); 583 if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0) 584 sb.append('F'); 585 if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0) 586 sb.append('H'); 587 if ((options & RegularExpression.XMLSCHEMA_MODE) != 0) 588 sb.append('X'); 589 if ((options & RegularExpression.IGNORE_CASE) != 0) 590 sb.append('i'); 591 if ((options & RegularExpression.MULTIPLE_LINES) != 0) 592 sb.append('m'); 593 if ((options & RegularExpression.SINGLE_LINE) != 0) 594 sb.append('s'); 595 if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0) 596 sb.append('u'); 597 if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0) 598 sb.append('w'); 599 if ((options & RegularExpression.EXTENDED_COMMENT) != 0) 600 sb.append('x'); 601 if ((options & RegularExpression.SPECIAL_COMMA) != 0) 602 sb.append(','); 603 return sb.toString().intern(); 604 } 605 606 608 static String stripExtendedComment(String regex) { 609 int len = regex.length(); 610 StringBuffer buffer = new StringBuffer (len); 611 int offset = 0; 612 while (offset < len) { 613 int ch = regex.charAt(offset++); 614 if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ') 616 continue; 617 618 if (ch == '#') { while (offset < len) { 620 ch = regex.charAt(offset++); 621 if (ch == '\r' || ch == '\n') 622 break; 623 } 624 continue; 625 } 626 627 int next; if (ch == '\\' && offset < len) { 629 if ((next = regex.charAt(offset)) == '#' 630 || next == '\t' || next == '\n' || next == '\f' 631 || next == '\r' || next == ' ') { 632 buffer.append((char)next); 633 offset ++; 634 } else { buffer.append('\\'); 636 buffer.append((char)next); 637 offset ++; 638 } 639 } else buffer.append((char)ch); 641 } 642 return buffer.toString(); 643 } 644 645 647 651 public static void main(String [] argv) { 652 String pattern = null; 653 try { 654 String options = ""; 655 String target = null; 656 if( argv.length == 0 ) { 657 System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" ); 658 System.exit( 0 ); 659 } 660 for (int i = 0; i < argv.length; i ++) { 661 if (argv[i].length() == 0 || argv[i].charAt(0) != '-') { 662 if (pattern == null) 663 pattern = argv[i]; 664 else if (target == null) 665 target = argv[i]; 666 else 667 System.err.println("Unnecessary: "+argv[i]); 668 } else if (argv[i].equals("-i")) { 669 options += "i"; 670 } else if (argv[i].equals("-m")) { 671 options += "m"; 672 } else if (argv[i].equals("-s")) { 673 options += "s"; 674 } else if (argv[i].equals("-u")) { 675 options += "u"; 676 } else if (argv[i].equals("-w")) { 677 options += "w"; 678 } else if (argv[i].equals("-X")) { 679 options += "X"; 680 } else { 681 System.err.println("Unknown option: "+argv[i]); 682 } 683 } 684 RegularExpression reg = new RegularExpression(pattern, options); 685 System.out.println("RegularExpression: "+reg); 686 Match match = new Match(); 687 reg.matches(target, match); 688 for (int i = 0; i < match.getNumberOfGroups(); i ++) { 689 if (i == 0 ) System.out.print("Matched range for the whole pattern: "); 690 else System.out.print("["+i+"]: "); 691 if (match.getBeginning(i) < 0) 692 System.out.println("-1"); 693 else { 694 System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", "); 695 System.out.println("\""+match.getCapturedText(i)+"\""); 696 } 697 } 698 } catch (ParseException pe) { 699 if (pattern == null) { 700 pe.printStackTrace(); 701 } else { 702 System.err.println("org.apache.xerces.utils.regex.ParseException: "+pe.getMessage()); 703 String indent = " "; 704 System.err.println(indent+pattern); 705 int loc = pe.getLocation(); 706 if (loc >= 0) { 707 System.err.print(indent); 708 for (int i = 0; i < loc; i ++) System.err.print("-"); 709 System.err.println("^"); 710 } 711 } 712 } catch (Exception e) { 713 e.printStackTrace(); 714 } 715 } 716 717 static final int CACHESIZE = 20; 718 static final RegularExpression[] regexCache = new RegularExpression[CACHESIZE]; 719 725 public static RegularExpression createRegex(String pattern, String options) 726 throws ParseException { 727 RegularExpression re = null; 728 int intOptions = REUtil.parseOptions(options); 729 synchronized (REUtil.regexCache) { 730 int i; 731 for (i = 0; i < REUtil.CACHESIZE; i ++) { 732 RegularExpression cached = REUtil.regexCache[i]; 733 if (cached == null) { 734 i = -1; 735 break; 736 } 737 if (cached.equals(pattern, intOptions)) { 738 re = cached; 739 break; 740 } 741 } 742 if (re != null) { 743 if (i != 0) { 744 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, i); 745 REUtil.regexCache[0] = re; 746 } 747 } else { 748 re = new RegularExpression(pattern, options); 749 System.arraycopy(REUtil.regexCache, 0, REUtil.regexCache, 1, REUtil.CACHESIZE-1); 750 REUtil.regexCache[0] = re; 751 } 752 } 753 return re; 754 } 755 756 760 public static boolean matches(String regex, String target) throws ParseException { 761 return REUtil.createRegex(regex, null).matches(target); 762 } 763 764 768 public static boolean matches(String regex, String options, String target) throws ParseException { 769 return REUtil.createRegex(regex, options).matches(target); 770 } 771 772 774 777 public static String quoteMeta(String literal) { 778 int len = literal.length(); 779 StringBuffer buffer = null; 780 for (int i = 0; i < len; i ++) { 781 int ch = literal.charAt(i); 782 if (".*+?{[()|\\^$".indexOf(ch) >= 0) { 783 if (buffer == null) { 784 buffer = new StringBuffer (i+(len-i)*2); 785 if (i > 0) buffer.append(literal.substring(0, i)); 786 } 787 buffer.append('\\'); 788 buffer.append((char)ch); 789 } else if (buffer != null) 790 buffer.append((char)ch); 791 } 792 return buffer != null ? buffer.toString() : literal; 793 } 794 795 797 static void dumpString(String v) { 798 for (int i = 0; i < v.length(); i ++) { 799 System.out.print(Integer.toHexString(v.charAt(i))); 800 System.out.print(" "); 801 } 802 System.out.println(); 803 } 804 } 805 806 807 |