| 1 57 58 package org.enhydra.apache.xerces.utils.regex; 59 60 61 import java.text.CharacterIterator ; 62 63 522 public class RegularExpression implements java.io.Serializable { 523 static final boolean DEBUG = false; 524 525 528 private synchronized void compile(Token tok) { 529 if (this.operations != null) 530 return; 531 this.numberOfClosures = 0; 532 this.operations = this.compile(tok, null, false); 533 } 534 535 538 private Op compile(Token tok, Op next, boolean reverse) { 539 Op ret; 540 switch (tok.type) { 541 case Token.DOT: 542 ret = Op.createDot(); 543 ret.next = next; 544 break; 545 546 case Token.CHAR: 547 ret = Op.createChar(tok.getChar()); 548 ret.next = next; 549 break; 550 551 case Token.ANCHOR: 552 ret = Op.createAnchor(tok.getChar()); 553 ret.next = next; 554 break; 555 556 case Token.RANGE: 557 case Token.NRANGE: 558 ret = Op.createRange(tok); 559 ret.next = next; 560 break; 561 562 case Token.CONCAT: 563 ret = next; 564 if (!reverse) { 565 for (int i = tok.size()-1; i >= 0; i --) { 566 ret = compile(tok.getChild(i), ret, false); 567 } 568 } else { 569 for (int i = 0; i < tok.size(); i ++) { 570 ret = compile(tok.getChild(i), ret, true); 571 } 572 } 573 break; 574 575 case Token.UNION: 576 Op.UnionOp uni = Op.createUnion(tok.size()); 577 for (int i = 0; i < tok.size(); i ++) { 578 uni.addElement(compile(tok.getChild(i), next, reverse)); 579 } 580 ret = uni; break; 582 583 case Token.CLOSURE: 584 case Token.NONGREEDYCLOSURE: 585 Token child = tok.getChild(0); 586 int min = tok.getMin(); 587 int max = tok.getMax(); 588 if (min >= 0 && min == max) { ret = next; 590 for (int i = 0; i < min; i ++) { 591 ret = compile(child, ret, reverse); 592 } 593 break; 594 } 595 if (min > 0 && max > 0) 596 max -= min; 597 if (max > 0) { 598 ret = next; 600 for (int i = 0; i < max; i ++) { 601 Op.ChildOp q = Op.createQuestion(tok.type == Token.NONGREEDYCLOSURE); 602 q.next = next; 603 q.setChild(compile(child, ret, reverse)); 604 ret = q; 605 } 606 } else { 607 Op.ChildOp op; 608 if (tok.type == Token.NONGREEDYCLOSURE) { 609 op = Op.createNonGreedyClosure(); 610 } else { if (child.getMinLength() == 0) 612 op = Op.createClosure(this.numberOfClosures++); 613 else 614 op = Op.createClosure(-1); 615 } 616 op.next = next; 617 op.setChild(compile(child, op, reverse)); 618 ret = op; 619 } 620 if (min > 0) { 621 for (int i = 0; i < min; i ++) { 622 ret = compile(child, ret, reverse); 623 } 624 } 625 break; 626 627 case Token.EMPTY: 628 ret = next; 629 break; 630 631 case Token.STRING: 632 ret = Op.createString(tok.getString()); 633 ret.next = next; 634 break; 635 636 case Token.BACKREFERENCE: 637 ret = Op.createBackReference(tok.getReferenceNumber()); 638 ret.next = next; 639 break; 640 641 case Token.PAREN: 642 if (tok.getParenNumber() == 0) { 643 ret = compile(tok.getChild(0), next, reverse); 644 } else if (reverse) { 645 next = Op.createCapture(tok.getParenNumber(), next); 646 next = compile(tok.getChild(0), next, reverse); 647 ret = Op.createCapture(-tok.getParenNumber(), next); 648 } else { 649 next = Op.createCapture(-tok.getParenNumber(), next); 650 next = compile(tok.getChild(0), next, reverse); 651 ret = Op.createCapture(tok.getParenNumber(), next); 652 } 653 break; 654 655 case Token.LOOKAHEAD: 656 ret = Op.createLook(Op.LOOKAHEAD, next, compile(tok.getChild(0), null, false)); 657 break; 658 case Token.NEGATIVELOOKAHEAD: 659 ret = Op.createLook(Op.NEGATIVELOOKAHEAD, next, compile(tok.getChild(0), null, false)); 660 break; 661 case Token.LOOKBEHIND: 662 ret = Op.createLook(Op.LOOKBEHIND, next, compile(tok.getChild(0), null, true)); 663 break; 664 case Token.NEGATIVELOOKBEHIND: 665 ret = Op.createLook(Op.NEGATIVELOOKBEHIND, next, compile(tok.getChild(0), null, true)); 666 break; 667 668 case Token.INDEPENDENT: 669 ret = Op.createIndependent(next, compile(tok.getChild(0), null, reverse)); 670 break; 671 672 case Token.MODIFIERGROUP: 673 ret = Op.createModifier(next, compile(tok.getChild(0), null, reverse), 674 ((Token.ModifierToken)tok).getOptions(), 675 ((Token.ModifierToken)tok).getOptionsMask()); 676 break; 677 678 case Token.CONDITION: 679 Token.ConditionToken ctok = (Token.ConditionToken)tok; 680 int ref = ctok.refNumber; 681 Op condition = ctok.condition == null ? null : compile(ctok.condition, null, reverse); 682 Op yes = compile(ctok.yes, next, reverse); 683 Op no = ctok.no == null ? null : compile(ctok.no, next, reverse); 684 ret = Op.createCondition(next, ref, condition, yes, no); 685 break; 686 687 default: 688 throw new RuntimeException ("Unknown token type: "+tok.type); 689 } return ret; 691 } 692 693 694 696 701 public boolean matches(char[] target) { 702 return this.matches(target, 0, target .length , (Match)null); 703 } 704 705 713 public boolean matches(char[] target, int start, int end) { 714 return this.matches(target, start, end, (Match)null); 715 } 716 717 723 public boolean matches(char[] target, Match match) { 724 return this.matches(target, 0, target .length , match); 725 } 726 727 728 737 public boolean matches(char[] target, int start, int end, Match match) { 738 739 synchronized (this) { 740 if (this.operations == null) 741 this.prepare(); 742 if (this.context == null) 743 this.context = new Context(); 744 } 745 Context con = null; 746 synchronized (this.context) { 747 con = this.context.inuse ? new Context() : this.context; 748 con.reset(target, start, end, this.numberOfClosures); 749 } 750 if (match != null) { 751 match.setNumberOfGroups(this.nofparen); 752 match.setSource(target); 753 } else if (this.hasBackReferences) { 754 match = new Match(); 755 match.setNumberOfGroups(this.nofparen); 756 } 759 con.match = match; 760 761 if (isSet(this.options, XMLSCHEMA_MODE)) { 762 int matchEnd = this. matchCharArray (con, this.operations, con.start, 1, this.options); 763 if (matchEnd == con.limit) { 765 if (con.match != null) { 766 con.match.setBeginning(0, con.start); 767 con.match.setEnd(0, matchEnd); 768 } 769 con.inuse = false; 770 return true; 771 } 772 return false; 773 } 774 775 779 if (this.fixedStringOnly) { 780 int o = this.fixedStringTable.matches(target, con.start, con.limit); 782 if (o >= 0) { 783 if (con.match != null) { 784 con.match.setBeginning(0, o); 785 con.match.setEnd(0, o+this.fixedString.length()); 786 } 787 con.inuse = false; 788 return true; 789 } 790 con.inuse = false; 791 return false; 792 } 793 794 799 if (this.fixedString != null) { 800 int o = this.fixedStringTable.matches(target, con.start, con.limit); 801 if (o < 0) { 802 con.inuse = false; 804 return false; 805 } 806 } 807 808 int limit = con.limit-this.minlength; 809 int matchStart; 810 int matchEnd = -1; 811 812 815 if (this.operations != null 816 && this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { 817 if (isSet(this.options, SINGLE_LINE)) { 818 matchStart = con.start; 819 matchEnd = this. matchCharArray (con, this.operations, con.start, 1, this.options); 820 } else { 821 boolean previousIsEOL = true; 822 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 823 int ch = target [ matchStart ] ; 824 if (isEOLChar(ch)) { 825 previousIsEOL = true; 826 } else { 827 if (previousIsEOL) { 828 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, 829 matchStart, 1, this.options))) 830 break; 831 } 832 previousIsEOL = false; 833 } 834 } 835 } 836 } 837 838 841 else if (this.firstChar != null) { 842 RangeToken range = this.firstChar; 844 if (isSet(this.options, IGNORE_CASE)) { 845 range = this.firstChar.getCaseInsensitiveToken(); 846 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 847 int ch = target [ matchStart ] ; 848 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) { 849 ch = REUtil.composeFromSurrogates(ch, target [ matchStart+1 ] ); 850 if (!range.match(ch)) continue; 851 } else { 852 if (!range.match(ch)) { 853 char ch1 = Character.toUpperCase((char)ch); 854 if (!range.match(ch1)) 855 if (!range.match(Character.toLowerCase(ch1))) 856 continue; 857 } 858 } 859 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, 860 matchStart, 1, this.options))) 861 break; 862 } 863 } else { 864 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 865 int ch = target [ matchStart ] ; 866 if (REUtil.isHighSurrogate(ch) && matchStart+1 < con.limit) 867 ch = REUtil.composeFromSurrogates(ch, target [ matchStart+1 ] ); 868 if (!range.match(ch)) continue; 869 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, 870 matchStart, 1, this.options))) 871 break; 872 } 873 } 874 } 875 876 879 else { 880 for (matchStart = con.start; matchStart <= limit; matchStart ++) { 881 if (0 <= (matchEnd = this. matchCharArray (con, this.operations, matchStart, 1, this.options))) 882 break; 883 } 884 } 885 886 if (matchEnd >= 0) { 887 if (con.match != null) { 888 con.match.setBeginning(0, matchStart); 889 con.match.setEnd(0, matchEnd); 890 } 891 con.inuse = false; 892 return true; 893 } else { 894 con.inuse = false; 895 return false; 896 } 897 } 898 899 902 private int matchCharArray (Context con, Op op, int offset, int dx, int opts) { 903 904 char[] target = con.charTarget; 905 906 907 while (true) { 908 if (op == null) 909 return offset; 910 if (offset > con.limit || offset < con.start) 911 return -1; 912 switch (op.type) { 913 case Op.CHAR: 914 &
|