1 16 17 package org.apache.xerces.impl.xpath.regex; 18 19 import java.util.Vector ; 20 import java.util.Hashtable ; 21 22 29 class Token implements java.io.Serializable { 30 31 private static final long serialVersionUID = 4049923761862293040L; 32 33 static final boolean COUNTTOKENS = true; 34 static int tokens = 0; 35 36 static final int CHAR = 0; static final int DOT = 11; static final int CONCAT = 1; static final int UNION = 2; static final int CLOSURE = 3; static final int RANGE = 4; static final int NRANGE = 5; static final int PAREN = 6; static final int EMPTY = 7; static final int ANCHOR = 8; static final int NONGREEDYCLOSURE = 9; static final int STRING = 10; static final int BACKREFERENCE = 12; static final int LOOKAHEAD = 20; static final int NEGATIVELOOKAHEAD = 21; static final int LOOKBEHIND = 22; static final int NEGATIVELOOKBEHIND = 23; static final int INDEPENDENT = 24; static final int MODIFIERGROUP = 25; static final int CONDITION = 26; 57 static final int UTF16_MAX = 0x10ffff; 58 59 int type; 60 61 static Token token_dot; 62 static Token token_0to9; 63 static Token token_wordchars; 64 static Token token_not_0to9; 65 static Token token_not_wordchars; 66 static Token token_spaces; 67 static Token token_not_spaces; 68 static Token token_empty; 69 static Token token_linebeginning; 70 static Token token_linebeginning2; 71 static Token token_lineend; 72 static Token token_stringbeginning; 73 static Token token_stringend; 74 static Token token_stringend2; 75 static Token token_wordedge; 76 static Token token_not_wordedge; 77 static Token token_wordbeginning; 78 static Token token_wordend; 79 static { 80 Token.token_empty = new Token(Token.EMPTY); 81 82 Token.token_linebeginning = Token.createAnchor('^'); 83 Token.token_linebeginning2 = Token.createAnchor('@'); 84 Token.token_lineend = Token.createAnchor('$'); 85 Token.token_stringbeginning = Token.createAnchor('A'); 86 Token.token_stringend = Token.createAnchor('z'); 87 Token.token_stringend2 = Token.createAnchor('Z'); 88 Token.token_wordedge = Token.createAnchor('b'); 89 Token.token_not_wordedge = Token.createAnchor('B'); 90 Token.token_wordbeginning = Token.createAnchor('<'); 91 Token.token_wordend = Token.createAnchor('>'); 92 93 Token.token_dot = new Token(Token.DOT); 94 95 Token.token_0to9 = Token.createRange(); 96 Token.token_0to9.addRange('0', '9'); 97 Token.token_wordchars = Token.createRange(); 98 Token.token_wordchars.addRange('0', '9'); 99 Token.token_wordchars.addRange('A', 'Z'); 100 Token.token_wordchars.addRange('_', '_'); 101 Token.token_wordchars.addRange('a', 'z'); 102 Token.token_spaces = Token.createRange(); 103 Token.token_spaces.addRange('\t', '\t'); 104 Token.token_spaces.addRange('\n', '\n'); 105 Token.token_spaces.addRange('\f', '\f'); 106 Token.token_spaces.addRange('\r', '\r'); 107 Token.token_spaces.addRange(' ', ' '); 108 109 Token.token_not_0to9 = Token.complementRanges(Token.token_0to9); 110 Token.token_not_wordchars = Token.complementRanges(Token.token_wordchars); 111 Token.token_not_spaces = Token.complementRanges(Token.token_spaces); 112 } 113 114 static Token.ParenToken createLook(int type, Token child) { 115 if (COUNTTOKENS) Token.tokens ++; 116 return new Token.ParenToken(type, child, 0); 117 } 118 static Token.ParenToken createParen(Token child, int pnumber) { 119 if (COUNTTOKENS) Token.tokens ++; 120 return new Token.ParenToken(Token.PAREN, child, pnumber); 121 } 122 static Token.ClosureToken createClosure(Token tok) { 123 if (COUNTTOKENS) Token.tokens ++; 124 return new Token.ClosureToken(Token.CLOSURE, tok); 125 } 126 static Token.ClosureToken createNGClosure(Token tok) { 127 if (COUNTTOKENS) Token.tokens ++; 128 return new Token.ClosureToken(Token.NONGREEDYCLOSURE, tok); 129 } 130 static Token.ConcatToken createConcat(Token tok1, Token tok2) { 131 if (COUNTTOKENS) Token.tokens ++; 132 return new Token.ConcatToken(tok1, tok2); 133 } 134 static Token.UnionToken createConcat() { 135 if (COUNTTOKENS) Token.tokens ++; 136 return new Token.UnionToken(Token.CONCAT); } 138 static Token.UnionToken createUnion() { 139 if (COUNTTOKENS) Token.tokens ++; 140 return new Token.UnionToken(Token.UNION); 141 } 142 static Token createEmpty() { 143 return Token.token_empty; 144 } 145 static RangeToken createRange() { 146 if (COUNTTOKENS) Token.tokens ++; 147 return new RangeToken(Token.RANGE); 148 } 149 static RangeToken createNRange() { 150 if (COUNTTOKENS) Token.tokens ++; 151 return new RangeToken(Token.NRANGE); 152 } 153 static Token.CharToken createChar(int ch) { 154 if (COUNTTOKENS) Token.tokens ++; 155 return new Token.CharToken(Token.CHAR, ch); 156 } 157 static private Token.CharToken createAnchor(int ch) { 158 if (COUNTTOKENS) Token.tokens ++; 159 return new Token.CharToken(Token.ANCHOR, ch); 160 } 161 static Token.StringToken createBackReference(int refno) { 162 if (COUNTTOKENS) Token.tokens ++; 163 return new Token.StringToken(Token.BACKREFERENCE, null, refno); 164 } 165 static Token.StringToken createString(String str) { 166 if (COUNTTOKENS) Token.tokens ++; 167 return new Token.StringToken(Token.STRING, str, 0); 168 } 169 static Token.ModifierToken createModifierGroup(Token child, int add, int mask) { 170 if (COUNTTOKENS) Token.tokens ++; 171 return new Token.ModifierToken(child, add, mask); 172 } 173 static Token.ConditionToken createCondition(int refno, Token condition, 174 Token yespat, Token nopat) { 175 if (COUNTTOKENS) Token.tokens ++; 176 return new Token.ConditionToken(refno, condition, yespat, nopat); 177 } 178 179 protected Token(int type) { 180 this.type = type; 181 } 182 183 186 int size() { 187 return 0; 188 } 189 Token getChild(int index) { 190 return null; 191 } 192 void addChild(Token tok) { 193 throw new RuntimeException ("Not supported."); 194 } 195 196 protected void addRange(int start, int end) { 198 throw new RuntimeException ("Not supported."); 199 } 200 protected void sortRanges() { 201 throw new RuntimeException ("Not supported."); 202 } 203 protected void compactRanges() { 204 throw new RuntimeException ("Not supported."); 205 } 206 protected void mergeRanges(Token tok) { 207 throw new RuntimeException ("Not supported."); 208 } 209 protected void subtractRanges(Token tok) { 210 throw new RuntimeException ("Not supported."); 211 } 212 protected void intersectRanges(Token tok) { 213 throw new RuntimeException ("Not supported."); 214 } 215 static Token complementRanges(Token tok) { 216 return RangeToken.complementRanges(tok); 217 } 218 219 220 void setMin(int min) { } 222 void setMax(int max) { } 224 int getMin() { return -1; 226 } 227 int getMax() { return -1; 229 } 230 int getReferenceNumber() { return 0; 232 } 233 String getString() { return null; 235 } 236 237 int getParenNumber() { 238 return 0; 239 } 240 int getChar() { 241 return -1; 242 } 243 244 public String toString() { 245 return this.toString(0); 246 } 247 public String toString(int options) { 248 return this.type == Token.DOT ? "." : ""; 249 } 250 251 254 final int getMinLength() { 255 switch (this.type) { 256 case CONCAT: 257 int sum = 0; 258 for (int i = 0; i < this.size(); i ++) 259 sum += this.getChild(i).getMinLength(); 260 return sum; 261 262 case CONDITION: 263 case UNION: 264 if (this.size() == 0) 265 return 0; 266 int ret = this.getChild(0).getMinLength(); 267 for (int i = 1; i < this.size(); i ++) { 268 int min = this.getChild(i).getMinLength(); 269 if (min < ret) ret = min; 270 } 271 return ret; 272 273 case CLOSURE: 274 case NONGREEDYCLOSURE: 275 if (this.getMin() >= 0) 276 return this.getMin() * this.getChild(0).getMinLength(); 277 return 0; 278 279 case EMPTY: 280 case ANCHOR: 281 return 0; 282 283 case DOT: 284 case CHAR: 285 case RANGE: 286 case NRANGE: 287 return 1; 288 289 case INDEPENDENT: 290 case PAREN: 291 case MODIFIERGROUP: 292 return this.getChild(0).getMinLength(); 293 294 case BACKREFERENCE: 295 return 0; 297 case STRING: 298 return this.getString().length(); 299 300 case LOOKAHEAD: 301 case NEGATIVELOOKAHEAD: 302 case LOOKBEHIND: 303 case NEGATIVELOOKBEHIND: 304 return 0; 306 default: 307 throw new RuntimeException ("Token#getMinLength(): Invalid Type: "+this.type); 308 } 309 } 310 311 final int getMaxLength() { 312 switch (this.type) { 313 case CONCAT: 314 int sum = 0; 315 for (int i = 0; i < this.size(); i ++) { 316 int d = this.getChild(i).getMaxLength(); 317 if (d < 0) return -1; 318 sum += d; 319 } 320 return sum; 321 322 case CONDITION: 323 case UNION: 324 if (this.size() == 0) 325 return 0; 326 int ret = this.getChild(0).getMaxLength(); 327 for (int i = 1; ret >= 0 && i < this.size(); i ++) { 328 int max = this.getChild(i).getMaxLength(); 329 if (max < 0) { ret = -1; 331 break; 332 } 333 if (max > ret) ret = max; 334 } 335 return ret; 336 337 case CLOSURE: 338 case NONGREEDYCLOSURE: 339 if (this.getMax() >= 0) 340 return this.getMax() * this.getChild(0).getMaxLength(); 343 return -1; 344 345 case EMPTY: 346 case ANCHOR: 347 return 0; 348 349 case CHAR: 350 return 1; 351 case DOT: 352 case RANGE: 353 case NRANGE: 354 return 2; 355 356 case INDEPENDENT: 357 case PAREN: 358 case MODIFIERGROUP: 359 return this.getChild(0).getMaxLength(); 360 361 case BACKREFERENCE: 362 return -1; 364 case STRING: 365 return this.getString().length(); 366 367 case LOOKAHEAD: 368 case NEGATIVELOOKAHEAD: 369 case LOOKBEHIND: 370 case NEGATIVELOOKBEHIND: 371 return 0; 373 default: 374 throw new RuntimeException ("Token#getMaxLength(): Invalid Type: "+this.type); 375 } 376 } 377 378 static final int FC_CONTINUE = 0; 379 static final int FC_TERMINAL = 1; 380 static final int FC_ANY = 2; 381 private static final boolean isSet(int options, int flag) { 382 return (options & flag) == flag; 383 } 384 final int analyzeFirstCharacter(RangeToken result, int options) { 385 switch (this.type) { 386 case CONCAT: 387 int ret = FC_CONTINUE; 388 for (int i = 0; i < this.size(); i ++) 389 if ((ret = this.getChild(i).analyzeFirstCharacter(result, options)) != FC_CONTINUE) 390 break; 391 return ret; 392 393 case UNION: 394 if (this.size() == 0) 395 return FC_CONTINUE; 396 401 int ret2 = FC_CONTINUE; 402 boolean hasEmpty = false; 403 for (int i = 0; i < this.size(); i ++) { 404 ret2 = this.getChild(i).analyzeFirstCharacter(result, options); 405 if (ret2 == FC_ANY) 406 break; 407 else if (ret2 == FC_CONTINUE) 408 hasEmpty = true; 409 } 410 return hasEmpty ? FC_CONTINUE : ret2; 411 412 case CONDITION: 413 int ret3 = this.getChild(0).analyzeFirstCharacter(result, options); 414 if (this.size() == 1) return FC_CONTINUE; 415 if (ret3 == FC_ANY) return ret3; 416 int ret4 = this.getChild(1).analyzeFirstCharacter(result, options); 417 if (ret4 == FC_ANY) return ret4; 418 return ret3 == FC_CONTINUE || ret4 == FC_CONTINUE ? FC_CONTINUE : FC_TERMINAL; 419 420 case CLOSURE: 421 case NONGREEDYCLOSURE: 422 this.getChild(0).analyzeFirstCharacter(result, options); 423 return FC_CONTINUE; 424 425 case EMPTY: 426 case ANCHOR: 427 return FC_CONTINUE; 428 429 case CHAR: 430 int ch = this.getChar(); 431 result.addRange(ch, ch); 432 if (ch < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) { 433 ch = Character.toUpperCase((char)ch); 434 result.addRange(ch, ch); 435 ch = Character.toLowerCase((char)ch); 436 result.addRange(ch, ch); 437 } 438 return FC_TERMINAL; 439 440 case DOT: if (isSet(options, RegularExpression.SINGLE_LINE)) { 442 return FC_CONTINUE; } else { 444 return FC_CONTINUE; 445 453 } 454 455 case RANGE: 456 if (isSet(options, RegularExpression.IGNORE_CASE)) { 457 result.mergeRanges(((RangeToken)this).getCaseInsensitiveToken()); 458 } else { 459 result.mergeRanges(this); 460 } 461 return FC_TERMINAL; 462 463 case NRANGE: if (isSet(options, RegularExpression.IGNORE_CASE)) { 465 result.mergeRanges(Token.complementRanges(((RangeToken)this).getCaseInsensitiveToken())); 466 } else { 467 result.mergeRanges(Token.complementRanges(this)); 468 } 469 return FC_TERMINAL; 470 471 case INDEPENDENT: 472 case PAREN: 473 return this.getChild(0).analyzeFirstCharacter(result, options); 474 475 case MODIFIERGROUP: 476 options |= ((ModifierToken)this).getOptions(); 477 options &= ~((ModifierToken)this).getOptionsMask(); 478 return this.getChild(0).analyzeFirstCharacter(result, options); 479 480 case BACKREFERENCE: 481 result.addRange(0, UTF16_MAX); return FC_ANY; 483 484 case STRING: 485 int cha = this.getString().charAt(0); 486 int ch2; 487 if (REUtil.isHighSurrogate(cha) 488 && this.getString().length() >= 2 489 && REUtil.isLowSurrogate((ch2 = this.getString().charAt(1)))) 490 cha = REUtil.composeFromSurrogates(cha, ch2); 491 result.addRange(cha, cha); 492 if (cha < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) { 493 cha = Character.toUpperCase((char)cha); 494 result.addRange(cha, cha); 495 cha = Character.toLowerCase((char)cha); 496 result.addRange(cha, cha); 497 } 498 return FC_TERMINAL; 499 500 case LOOKAHEAD: 501 case NEGATIVELOOKAHEAD: 502 case LOOKBEHIND: 503 case NEGATIVELOOKBEHIND: 504 return FC_CONTINUE; 505 506 default: 507 throw new RuntimeException ("Token#analyzeHeadCharacter(): Invalid Type: "+this.type); 508 } 509 } 510 511 private final boolean isShorterThan(Token tok) { 512 if (tok == null) return false; 513 523 int mylength; 524 if (this.type == STRING) mylength = this.getString().length(); 525 else throw new RuntimeException ("Internal Error: Illegal type: "+this.type); 526 int otherlength; 527 if (tok.type == STRING) otherlength = tok.getString().length(); 528 else throw new RuntimeException ("Internal Error: Illegal type: "+tok.type); 529 return mylength < otherlength; 530 } 531 532 static class FixedStringContainer { 533 Token token = null; 534 int options = 0; 535 FixedStringContainer() { 536 } 537 } 538 539 final void findFixedString(FixedStringContainer container, int options) { 540 switch (this.type) { 541 case CONCAT: 542 Token prevToken = null; 543 int prevOptions = 0; 544 for (int i = 0; i < this.size(); i ++) { 545 this.getChild(i).findFixedString(container, options); 546 if (prevToken == null || prevToken.isShorterThan(container.token)) { 547 prevToken = container.token; 548 prevOptions = container.options; 549 } 550 } 551 container.token = prevToken; 552 container.options = prevOptions; 553 return; 554 555 case UNION: 556 case CLOSURE: 557 case NONGREEDYCLOSURE: 558 case EMPTY: 559 case ANCHOR: 560 case RANGE: 561 case DOT: 562 case NRANGE: 563 case BACKREFERENCE: 564 case LOOKAHEAD: 565 case NEGATIVELOOKAHEAD: 566 case LOOKBEHIND: 567 case NEGATIVELOOKBEHIND: 568 case CONDITION: 569 container.token = null; 570 return; 571 572 case CHAR: container.token = null; return; 576 case STRING: 577 container.token = this; 578 container.options = options; 579 return; 580 581 case INDEPENDENT: 582 case PAREN: 583 this.getChild(0).findFixedString(container, options); 584 return; 585 586 case MODIFIERGROUP: 587 options |= ((ModifierToken)this).getOptions(); 588 options &= ~((ModifierToken)this).getOptionsMask(); 589 this.getChild(0).findFixedString(container, options); 590 return; 591 592 default: 593 throw new RuntimeException ("Token#findFixedString(): Invalid Type: "+this.type); 594 } 595 } 596 597 boolean match(int ch) { 598 throw new RuntimeException ("NFAArrow#match(): Internal error: "+this.type); 599 } 600 601 private final static Hashtable categories = new Hashtable (); 603 private final static Hashtable categories2 = new Hashtable (); 604 private static final String [] categoryNames = { 605 "Cn", "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd", 606 "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", null, "Co", "Cs", 607 "Pd", "Ps", "Pe", "Pc", "Po", "Sm", "Sc", "Sk", "So", "Pi", "Pf", "L", "M", "N", "Z", "C", "P", "S", }; 611 612 static final int CHAR_INIT_QUOTE = 29; static final int CHAR_FINAL_QUOTE = 30; static final int CHAR_LETTER = 31; 616 static final int CHAR_MARK = 32; 617 static final int CHAR_NUMBER = 33; 618 static final int CHAR_SEPARATOR = 34; 619 static final int CHAR_OTHER = 35; 620 static final int CHAR_PUNCTUATION = 36; 621 static final int CHAR_SYMBOL = 37; 622 623 private static final String [] blockNames = { 625 "Basic Latin", 626 "Latin-1 Supplement", 627 "Latin Extended-A", 628 "Latin Extended-B", 629 "IPA Extensions", 630 "Spacing Modifier Letters", 631 "Combining Diacritical Marks", 632 "Greek", 633 "Cyrillic", 634 "Armenian", 635 "Hebrew", 636 "Arabic", 637 "Syriac", 638 "Thaana", 639 "Devanagari", 640 "Bengali", 641 "Gurmukhi", 642 "Gujarati", 643 "Oriya", 644 "Tamil", 645 "Telugu", 646 "Kannada", 647 "Malayalam", 648 "Sinhala", 649 "Thai", 650 "Lao", 651 "Tibetan", 652 "Myanmar", 653 "Georgian", 654 "Hangul Jamo", 655 "Ethiopic", 656 "Cherokee", 657 "Unified Canadian Aboriginal Syllabics", 658 "Ogham", 659 "Runic", 660 "Khmer", 661 "Mongolian", 662 "Latin Extended Additional", 663 "Greek Extended", 664 "General Punctuation", 665 "Superscripts and Subscripts", 666 "Currency Symbols", 667 "Combining Marks for Symbols", 668 "Letterlike Symbols", 669 "Number Forms", 670 "Arrows", 671 "Mathematical Operators", 672 "Miscellaneous Technical", 673 "Control Pictures", 674 "Optical Character Recognition", 675 "Enclosed Alphanumerics", 676 "Box Drawing", 677 "Block Elements", 678 "Geometric Shapes", 679 "Miscellaneous Symbols", 680 "Dingbats", 681 "Braille Patterns", 682 "CJK Radicals Supplement", 683 "Kangxi Radicals", 684 "Ideographic Description Characters", 685 "CJK Symbols and Punctuation", 686 "Hiragana", 687 "Katakana", 688 "Bopomofo", 689 "Hangul Compatibility Jamo", 690 "Kanbun", 691 "Bopomofo Extended", 692 "Enclosed CJK Letters and Months", 693 "CJK Compatibility", 694 "CJK Unified Ideographs Extension A", 695 "CJK Unified Ideographs", 696 "Yi Syllables", 697 "Yi Radicals", 698 "Hangul Syllables", 699 "Private Use", 700 "CJK Compatibility Ideographs", 701 "Alphabetic Presentation Forms", 702 "Arabic Presentation Forms-A", 703 "Combining Half Marks", 704 "CJK Compatibility Forms", 705 "Small Form Variants", 706 "Arabic Presentation Forms-B", 707 "Specials", 708 "Halfwidth and Fullwidth Forms", 709 "Old Italic", "Gothic", 712 "Deseret", 713 "Byzantine Musical Symbols", 714 "Musical Symbols", 715 "Mathematical Alphanumeric Symbols", 716 "CJK Unified Ideographs Extension B", 717 "CJK Compatibility Ideographs Supplement", 718 "Tags", 719 721 }; 722 static final String blockRanges = 727 "\u0000\u007F\u0080\u00FF\u0100\u017F\u0180\u024F\u0250\u02AF\u02B0\u02FF\u0300\u036F" 728 +"\u0370\u03FF\u0400\u04FF\u0530\u058F\u0590\u05FF\u0600\u06FF\u0700\u074F\u0780\u07BF" 729 +"\u0900\u097F\u0980\u09FF\u0A00\u0A7F\u0A80\u0AFF\u0B00\u0B7F\u0B80\u0BFF\u0C00\u0C7F\u0C80\u0CFF" 730 +"\u0D00\u0D7F\u0D80\u0DFF\u0E00\u0E7F\u0E80\u0EFF\u0F00\u0FFF\u1000\u109F\u10A0\u10FF\u1100\u11FF" 731 +"\u1200\u137F\u13A0\u13FF\u1400\u167F\u1680\u169F\u16A0\u16FF\u1780\u17FF\u1800\u18AF\u1E00\u1EFF" 732 +"\u1F00\u1FFF\u2000\u206F\u2070\u209F\u20A0\u20CF\u20D0\u20FF\u2100\u214F\u2150\u218F\u2190\u21FF\u2200\u22FF" 733 +"\u2300\u23FF\u2400\u243F\u2440\u245F\u2460\u24FF\u2500\u257F\u2580\u259F\u25A0\u25FF\u2600\u26FF\u2700\u27BF" 734 +"\u2800\u28FF\u2E80\u2EFF\u2F00\u2FDF\u2FF0\u2FFF\u3000\u303F\u3040\u309F\u30A0\u30FF\u3100\u312F\u3130\u318F" 735 +"\u3190\u319F\u31A0\u31BF\u3200\u32FF\u3300\u33FF\u3400\u4DB5\u4E00\u9FFF\uA000\uA48F\uA490\uA4CF" 736 +"\uAC00\uD7A3\uE000\uF8FF\uF900\uFAFF\uFB00\uFB4F\uFB50\uFDFF" 737 +"\uFE20\uFE2F\uFE30\uFE4F\uFE50\uFE6F\uFE70\uFEFE\uFEFF\uFEFF\uFF00\uFFEF"; 738 static final int[] nonBMPBlockRanges = { 739 0x10300, 0x1032F, 0x10330, 0x1034F, 741 0x10400, 0x1044F, 742 0x1D000, 0x1D0FF, 743 0x1D100, 0x1D1FF, 744 0x1D400, 0x1D7FF, 745 0x20000, 0x2A6D6, 746 0x2F800, 0x2FA1F, 747 0xE0000, 0xE007F 748 }; 749 private static final int NONBMP_BLOCK_START = 84; 750 751 static protected RangeToken getRange(String name, boolean positive) { 752 if (Token.categories.size() == 0) { 753 synchronized (Token.categories) { 754 Token[] ranges = new Token[Token.categoryNames.length]; 755 for (int i = 0; i < ranges.length; i ++) { 756 ranges[i] = Token.createRange(); 757 } 758 int type; 759 for (int i = 0; i < 0x10000; i ++) { 760 type = Character.getType((char)i); 761 if (type == Character.START_PUNCTUATION || 762 type == Character.END_PUNCTUATION) { 763 if (i == 0x00AB || i == 0x2018 || i == 0x201B || i == 0x201C || 765 i == 0x201F || i == 0x2039) { 766 type = CHAR_INIT_QUOTE; 767 } 768 if (i == 0x00BB || i == 0x2019 || i == 0x201D || i == 0x203A ) { 770 type = CHAR_FINAL_QUOTE; 771 } 772 } 773 ranges[type].addRange(i, i); 774 switch (type) { 775 case Character.UPPERCASE_LETTER: 776 case Character.LOWERCASE_LETTER: 777 case Character.TITLECASE_LETTER: 778 case Character.MODIFIER_LETTER: 779 case Character.OTHER_LETTER: 780 type = CHAR_LETTER; 781 break; 782 case Character.NON_SPACING_MARK: 783 case Character.COMBINING_SPACING_MARK: 784 case Character.ENCLOSING_MARK: 785 type = CHAR_MARK; 786 break; 787 case Character.DECIMAL_DIGIT_NUMBER: 788 case Character.LETTER_NUMBER: 789 case Character.OTHER_NUMBER: 790 type = CHAR_NUMBER; 791 break; 792 case Character.SPACE_SEPARATOR: 793 case Character.LINE_SEPARATOR: 794 case Character.PARAGRAPH_SEPARATOR: 795 type = CHAR_SEPARATOR; 796 break; 797 case Character.CONTROL: 798 case Character.FORMAT: 799 case Character.SURROGATE: 800 case Character.PRIVATE_USE: 801 case Character.UNASSIGNED: 802 type = CHAR_OTHER; 803 break; 804 case Character.CONNECTOR_PUNCTUATION: 805 case Character.DASH_PUNCTUATION: 806 case Character.START_PUNCTUATION: 807 case Character.END_PUNCTUATION: 808 case CHAR_INIT_QUOTE: 809 case CHAR_FINAL_QUOTE: 810 case Character.OTHER_PUNCTUATION: 811 type = CHAR_PUNCTUATION; 812 break; 813 case Character.MATH_SYMBOL: 814 case Character.CURRENCY_SYMBOL: 815 case Character.MODIFIER_SYMBOL: 816 case Character.OTHER_SYMBOL: 817 type = CHAR_SYMBOL; 818 break; 819 default: 820 throw new RuntimeException ("org.apache.xerces.utils.regex.Token#getRange(): Unknown Unicode category: "+type); 821 } 822 ranges[type].addRange(i, i); 823 } ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX); 825 826 for (int i = 0; i < ranges.length; i ++) { 827 if (Token.categoryNames[i] != null) { 828 if (i == Character.UNASSIGNED) { ranges[i].addRange(0x10000, Token.UTF16_MAX); 830 } 831 Token.categories.put(Token.categoryNames[i], ranges[i]); 832 Token.categories2.put(Token.categoryNames[i], 833 Token.complementRanges(ranges[i])); 834 } 835 } 836 StringBuffer buffer = new StringBuffer (50); 840 for (int i = 0; i < Token.blockNames.length; i ++) { 841 Token r1 = Token.createRange(); 842 int location; 843 if (i < NONBMP_BLOCK_START) { 844 location = i*2; 845 int rstart = Token.blockRanges.charAt(location); 846 int rend = Token.blockRanges.charAt(location+1); 847 r1.addRange(rstart, rend); 851 } else { 852 location = (i - NONBMP_BLOCK_START) * 2; 853 r1.addRange(Token.nonBMPBlockRanges[location], 854 Token.nonBMPBlockRanges[location + 1]); 855 } 856 String n = Token.blockNames[i]; 857 if (n.equals("Specials")) 858 r1.addRange(0xfff0, 0xfffd); 859 if (n.equals("Private Use")) { 860 r1.addRange(0xF0000,0xFFFFD); 861 r1.addRange(0x100000,0x10FFFD); 862 } 863 Token.categories.put(n, r1); 864 Token.categories2.put(n, Token.complementRanges(r1)); 865 buffer.setLength(0); 866 buffer.append("Is"); 867 if (n.indexOf(' ') >= 0) { 868 for (int ci = 0; ci < n.length(); ci ++) 869 if (n.charAt(ci) != ' ') buffer.append((char)n.charAt(ci)); 870 } 871 else { 872 buffer.append(n); 873 } 874 Token.setAlias(buffer.toString(), n, true); 875 } 876 877 Token.setAlias("ASSIGNED", "Cn", false); 879 Token.setAlias("UNASSIGNED", "Cn", true); 880 Token all = Token.createRange(); 881 all.addRange(0, Token.UTF16_MAX); 882 Token.categories.put("ALL", all); 883 Token.categories2.put("ALL", Token.complementRanges(all)); 884 Token.registerNonXS("ASSIGNED"); 885 Token.registerNonXS("UNASSIGNED"); 886 Token.registerNonXS("ALL"); 887 888 Token isalpha = Token.createRange(); 889 isalpha.mergeRanges(ranges[Character.UPPERCASE_LETTER]); isalpha.mergeRanges(ranges[Character.LOWERCASE_LETTER]); isalpha.mergeRanges(ranges[Character.OTHER_LETTER]); Token.categories.put("IsAlpha", isalpha); 893 Token.categories2.put("IsAlpha", Token.complementRanges(isalpha)); 894 Token.registerNonXS("IsAlpha"); 895 896 Token isalnum = Token.createRange(); 897 isalnum.mergeRanges(isalpha); isalnum.mergeRanges(ranges[Character.DECIMAL_DIGIT_NUMBER]); Token.categories.put("IsAlnum", isalnum); 900 Token.categories2.put("IsAlnum", Token.complementRanges(isalnum)); 901 Token.registerNonXS("IsAlnum"); 902 903 Token isspace = Token.createRange(); 904 isspace.mergeRanges(Token.token_spaces); 905 isspace.mergeRanges(ranges[CHAR_SEPARATOR]); Token.categories.put("IsSpace", isspace); 907 Token.categories2.put("IsSpace", Token.complementRanges(isspace)); 908 Token.registerNonXS("IsSpace"); 909 910 Token isword = Token.createRange(); 911 isword.mergeRanges(isalnum); isword.addRange('_', '_'); 913 Token.categories.put("IsWord", isword); 914 Token.categories2.put("IsWord", Token.complementRanges(isword)); 915 Token.registerNonXS("IsWord"); 916 917 Token isascii = Token.createRange(); 918 isascii.addRange(0, 127); 919 Token.categories.put("IsASCII", isascii); 920 Token.categories2.put("IsASCII", Token.complementRanges(isascii)); 921 Token.registerNonXS("IsASCII"); 922 923 Token isnotgraph = Token.createRange(); 924 isnotgraph.mergeRanges(ranges[CHAR_OTHER]); 925 isnotgraph.addRange(' ', ' '); 926 Token.categories.put("IsGraph", Token.complementRanges(isnotgraph)); 927 Token.categories2.put("IsGraph", isnotgraph); 928 Token.registerNonXS("IsGraph"); 929 930 Token isxdigit = Token.createRange(); 931 isxdigit.addRange('0', '9'); 932 isxdigit.addRange('A', 'F'); 933 isxdigit.addRange('a', 'f'); 934 Token.categories.put("IsXDigit", Token.complementRanges(isxdigit)); 935 Token.categories2.put("IsXDigit", isxdigit); 936 Token.registerNonXS("IsXDigit"); 937 938 Token.setAlias("IsDigit", "Nd", true); 939 Token.setAlias("IsUpper", "Lu", true); 940 Token.setAlias("IsLower", "Ll", true); 941 Token.setAlias("IsCntrl", "C", true); 942 Token.setAlias("IsPrint", "C", false); 943 Token.setAlias("IsPunct", "P", true); 944 Token.registerNonXS("IsDigit"); 945 Token.registerNonXS("IsUpper"); 946 Token.registerNonXS("IsLower"); 947 Token.registerNonXS("IsCntrl"); 948 Token.registerNonXS("IsPrint"); 949 Token.registerNonXS("IsPunct"); 950 951 Token.setAlias("alpha", "IsAlpha", true); 952 Token.setAlias("alnum", "IsAlnum", true); 953 Token.setAlias("ascii", "IsASCII", true); 954 Token.setAlias("cntrl", "IsCntrl", true); 955 Token.setAlias("digit", "IsDigit", true); 956 Token.setAlias("graph", "IsGraph", true); 957 Token.setAlias("lower", "IsLower", true); 958 Token.setAlias("print", "IsPrint", true); 959 Token.setAlias("punct", "IsPunct", true); 960 Token.setAlias("space", "IsSpace", true); 961 Token.setAlias("upper", "IsUpper", true); 962 Token.setAlias("word", "IsWord", true); Token.setAlias("xdigit", "IsXDigit", true); 964 Token.registerNonXS("alpha"); 965 Token.registerNonXS("alnum"); 966 Token.registerNonXS("ascii"); 967 Token.registerNonXS("cntrl"); 968 Token.registerNonXS("digit"); 969 Token.registerNonXS("graph"); 970 Token.registerNonXS("lower"); 971 Token.registerNonXS("print"); 972 Token.registerNonXS("punct"); 973 Token.registerNonXS("space"); 974 Token.registerNonXS("upper"); 975 Token.registerNonXS("word"); 976 Token.registerNonXS("xdigit"); 977 } } RangeToken tok = positive ? (RangeToken)Token.categories.get(name) 980 : (RangeToken)Token.categories2.get(name); 981 return tok; 983 } 984 static protected RangeToken getRange(String name, boolean positive, boolean xs) { 985 RangeToken range = Token.getRange(name, positive); 986 if (xs && range != null && Token.isRegisterNonXS(name)) 987 range = null; 988 return range; 989 } 990 991 static Hashtable nonxs = null; 992 996 static protected void registerNonXS(String name) { 997 if (Token.nonxs == null) 998 Token.nonxs = new Hashtable (); 999 Token.nonxs.put(name, name); 1000 } 1001 static protected boolean isRegisterNonXS(String name) { 1002 if (Token.nonxs == null) 1003 return false; 1004 return Token.nonxs.containsKey(name); 1007 } 1008 1009 private static void setAlias(String newName, String name, boolean positive) { 1010 Token t1 = (Token)Token.categories.get(name); 1011 Token t2 = (Token)Token.categories2.get(name); 1012 if (positive) { 1013 Token.categories.put(newName, t1); 1014 Token.categories2.put(newName, t2); 1015 } else { 1016 Token.categories2.put(newName, t1); 1017 Token.categories.put(newName, t2); 1018 } 1019 } 1020 1021 1023 static final String viramaString = 1024 "\u094D" +"\u09CD" +"\u0A4D" +"\u0ACD" +"\u0B4D" +"\u0BCD" +"\u0C4D" +"\u0CCD" +"\u0D4D" +"\u0E3A" +"\u0F84"; 1036 static private Token token_grapheme = null; 1037 static synchronized Token getGraphemePattern() { 1038 if (Token.token_grapheme != null) 1039 return Token.token_grapheme; 1040 1041 Token base_char = Token.createRange(); base_char.mergeRanges(Token.getRange("ASSIGNED", true)); 1043 base_char.subtractRanges(Token.getRange("M", true)); 1044 base_char.subtractRanges(Token.getRange("C", true)); 1045 1046 Token virama = Token.createRange(); 1047 for (int i = 0; i < Token.viramaString.length(); i ++) { 1048 int ch = viramaString.charAt(i); 1049 virama.addRange(i, i); 1050 } 1051 1052 Token combiner_wo_virama = Token.createRange(); 1053 combiner_wo_virama.mergeRanges(Token.getRange("M", true)); 1054 combiner_wo_virama.addRange(0x1160, 0x11ff); combiner_wo_virama.addRange(0xff9e, 0xff9f); 1057 Token left = Token.createUnion(); left.addChild(base_char); 1059 left.addChild(Token.token_empty); 1060 1061 Token foo = Token.createUnion(); 1062 foo.addChild(Token.createConcat(virama, Token.getRange("L", true))); 1063 foo.addChild(combiner_wo_virama); 1064 1065 foo = Token.createClosure(foo); 1066 1067 foo = Token.createConcat(left, foo); 1068 1069 Token.token_grapheme = foo; 1070 return Token.token_grapheme; 1071 } 1072 1073 1076 static private Token token_ccs = null; 1077 static synchronized Token getCombiningCharacterSequence() { 1078 if (Token.token_ccs != null) 1079 return Token.token_ccs; 1080 1081 Token foo = Token.createClosure(Token.getRange("M", true)); foo = Token.createConcat(Token.getRange("M", false), foo); Token.token_ccs = foo; 1084 return Token.token_ccs; 1085 } 1086 1087 1089 1093 static class StringToken extends Token implements java.io.Serializable { 1094 1095 private static final long serialVersionUID = 3257288015452780086L; 1096 1097 String string; 1098 int refNumber; 1099 1100 StringToken(int type, String str, int n) { 1101 super(type); 1102 this.string = str; 1103 this.refNumber = n; 1104 } 1105 1106 int getReferenceNumber() { return this.refNumber; 1108 } 1109 String getString() { return this.string; 1111 } 1112 1113 public String toString(int options) { 1114 if (this.type == BACKREFERENCE) 1115 return "\\"+this.refNumber; 1116 else 1117 return REUtil.quoteMeta(this.string); 1118 } 1119 } 1120 1121 1124 static class ConcatToken extends Token implements java.io.Serializable { 1125 1126 private static final long serialVersionUID = 4050760502994940212L; 1127 1128 Token child; 1129 Token child2; 1130 1131 ConcatToken(Token t1, Token t2) { 1132 super(Token.CONCAT); 1133 this.child = t1; 1134 this.child2 = t2; 1135 } 1136 1137 int size() { 1138 return 2; 1139 } 1140 Token getChild(int index) { 1141 return index == 0 ? this.child : this.child2; 1142 } 1143 1144 public String toString(int options) { 1145 String ret; 1146 if (this.child2.type == CLOSURE && this.child2.getChild(0) == this.child) { 1147 ret = this.child.toString(options)+"+"; 1148 } else if (this.child2.type == NONGREEDYCLOSURE && this.child2.getChild(0) == this.child) { 1149 ret = this.child.toString(options)+"+?"; 1150 } else 1151 ret = this.child.toString(options)+this.child2.toString(options); 1152 return ret; 1153 } 1154 } 1155 1156 1159 static class CharToken extends Token implements java.io.Serializable { 1160 1161 private static final long serialVersionUID = 3257284751277569842L; 1162 1163 int chardata; 1164 1165 CharToken(int type, int ch) { 1166 super(type); 1167 this.chardata = ch; 1168 } 1169 1170 int getChar() { 1171 return this.chardata; 1172 } 1173 1174 public String toString(int options) { 1175 String ret; 1176 switch (this.type) { 1177 case CHAR: 1178 switch (this.chardata) { 1179 case '|': case '*': case '+': case '?': 1180 case '(': case ')': case '.': case '[': 1181 case '{': case '\\': 1182 ret = "\\"+(char)this.chardata; 1183 break; 1184 case '\f': ret = "\\f"; break; 1185 case '\n': ret = "\\n"; break; 1186 case '\r': ret = "\\r"; break; 1187 case '\t': ret = "\\t"; break; 1188 case 0x1b: ret = "\\e"; break; 1189 default: 1191 if (this.chardata >= 0x10000) { 1192 String pre = "0"+Integer.toHexString(this.chardata); 1193 ret = "\\v"+pre.substring(pre.length()-6, pre.length()); 1194 } else 1195 ret = ""+(char)this.chardata; 1196 } 1197 break; 1198 1199 case ANCHOR: 1200 if (this == Token.token_linebeginning || this == Token.token_lineend) 1201 ret = ""+(char)this.chardata; 1202 else 1203 ret = "\\"+(char)this.chardata; 1204 break; 1205 1206 default: 1207 ret = null; 1208 } 1209 return ret; 1210 } 1211 1212 boolean match(int ch) { 1213 if (this.type == CHAR) { 1214 return ch == this.chardata; 1215 } else 1216 throw new RuntimeException ("NFAArrow#match(): Internal error: "+this.type); 1217 } 1218 } 1219 1220 1223 static class ClosureToken extends Token implements java.io.Serializable { 1224 1225 private static final long serialVersionUID = 3545230349706932537L; 1226 1227 int min; 1228 int max; 1229 Token child; 1230 1231 ClosureToken(int type, Token tok) { 1232 super(type); 1233 this.child = tok; 1234 this.setMin(-1); 1235 this.setMax(-1); 1236 } 1237 1238 int size() { 1239 return 1; 1240 } 1241 Token getChild(int index) { 1242 return this.child; 1243 } 1244 1245 final void setMin(int min) { 1246 this.min = min; 1247 } 1248 final void setMax(int max) { 1249 this.max = max; 1250 } 1251 final int getMin() { 1252 return this.min; 1253 } 1254 final int getMax() { 1255 return this.max; 1256 } 1257 1258 public String toString(int options) { 1259 String ret; 1260 if (this.type == CLOSURE) { 1261 if (this.getMin() < 0 && this.getMax() < 0) { 1262 ret = this.child.toString(options)+"*"; 1263 } else if (this.getMin() == this.getMax()) { 1264 ret = this.child.toString(options)+"{"+this.getMin()+"}"; 1265 } else if (this.getMin() >= 0 && this.getMax() >= 0) { 1266 ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}"; 1267 } else if (this.getMin() >= 0 && this.getMax() < 0) { 1268 ret = this.child.toString(options)+"{"+this.getMin()+",}"; 1269 } else 1270 throw new RuntimeException ("Token#toString(): CLOSURE " 1271 +this.getMin()+", "+this.getMax()); 1272 } else { 1273 if (this.getMin() < 0 && this.getMax() < 0) { 1274 ret = this.child.toString(options)+"*?"; 1275 } else if (this.getMin() == this.getMax()) { 1276 ret = this.child.toString(options)+"{"+this.getMin()+"}?"; 1277 } else if (this.getMin() >= 0 && this.getMax() >= 0) { 1278 ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}?"; 1279 } else if (this.getMin() >= 0 && this.getMax() < 0) { 1280 ret = this.child.toString(options)+"{"+this.getMin()+",}?"; 1281 } else 1282 throw new RuntimeException ("Token#toString(): NONGREEDYCLOSURE " 1283 +this.getMin()+", "+this.getMax()); 1284 } 1285 return ret; 1286 } 1287 } 1288 1289 1292 static class ParenToken extends Token implements java.io.Serializable { 1293 1294 private static final long serialVersionUID = 3257572797621219636L; 1295 1296 Token child; 1297 int parennumber; 1298 1299 ParenToken(int type, Token tok, int paren) { 1300 super(type); 1301 this.child = tok; 1302 this.parennumber = paren; 1303 } 1304 1305 int size() { 1306 return 1; 1307 } 1308 Token getChild(int index) { 1309 return this.child; 1310 } 1311 1312 int getParenNumber() { 1313 return this.parennumber; 1314 } 1315 1316 public String toString(int options) { 1317 String ret = null; 1318 switch (this.type) { 1319 case PAREN: 1320 if (this.parennumber == 0) { 1321 ret = "(?:"+this.child.toString(options)+")"; 1322 } else { 1323 ret = "("+this.child.toString(options)+")"; 1324 } 1325 break; 1326 1327 case LOOKAHEAD: 1328 ret = "(?="+this.child.toString(options)+")"; 1329 break; 1330 case NEGATIVELOOKAHEAD: 1331 ret = "(?!"+this.child.toString(options)+")"; 1332 break; 1333 case LOOKBEHIND: 1334 ret = "(?<="+this.child.toString(options)+")"; 1335 break; 1336 case NEGATIVELOOKBEHIND: 1337 ret = "(?<!"+this.child.toString(options)+")"; 1338 break; 1339 case INDEPENDENT: 1340 ret = "(?>"+this.child.toString(options)+")"; 1341 break; 1342 } 1343 return ret; 1344 } 1345 } 1346 1347 1350 static class ConditionToken extends Token implements java.io.Serializable { 1351 1352 private static final long serialVersionUID = 3761408607870399794L; 1353 1354 int refNumber; 1355 Token condition; 1356 Token yes; 1357 Token no; 1358 ConditionToken(int refno, Token cond, Token yespat, Token nopat) { 1359 super(Token.CONDITION); 1360 this.refNumber = refno; 1361 this.condition = cond; 1362 this.yes = yespat; 1363 this.no = nopat; 1364 } 1365 int size() { 1366 return this.no == null ? 1 : 2; 1367 } 1368 Token getChild(int index) { 1369 if (index == 0) return this.yes; 1370 if (index == 1) return this.no; 1371 throw new RuntimeException ("Internal Error: "+index); 1372 } 1373 1374 public String toString(int options) { 1375 String ret; 1376 if (refNumber > 0) { 1377 ret = "(?("+refNumber+")"; 1378 } else if (this.condition.type == Token.ANCHOR) { 1379 ret = "(?("+this.condition+")"; 1380 } else { 1381 ret = "(?"+this.condition; 1382 } 1383 1384 if (this.no == null) { 1385 ret += this.yes+")"; 1386 } else { 1387 ret += this.yes+"|"+this.no+")"; 1388 } 1389 return ret; 1390 } 1391 } 1392 1393 1396 static class ModifierToken extends Token implements java.io.Serializable { 1397 1398 private static final long serialVersionUID = 3258689892778324790L; 1399 1400 Token child; 1401 int add; 1402 int mask; 1403 1404 ModifierToken(Token tok, int add, int mask) { 1405 super(Token.MODIFIERGROUP); 1406 this.child = tok; 1407 this.add = add; 1408 this.mask = mask; 1409 } 1410 1411 int size() { 1412 return 1; 1413 } 1414 Token getChild(int index) { 1415 return this.child; 1416 } 1417 1418 int getOptions() { 1419 return this.add; 1420 } 1421 int getOptionsMask() { 1422 return this.mask; 1423 } 1424 1425 public String toString(int options) { 1426 return "(?" 1427 +(this.add == 0 ? "" : REUtil.createOptionString(this.add)) 1428 +(this.mask == 0 ? "" : REUtil.createOptionString(this.mask)) 1429 +":" 1430 +this.child.toString(options) 1431 +")"; 1432 } 1433 } 1434 1435 1439 static class UnionToken extends Token implements java.io.Serializable { 1440 1441 private static final long serialVersionUID = 3256723987530003507L; 1442 1443 Vector children; 1444 1445 UnionToken(int type) { 1446 super(type); 1447 } 1448 1449 void addChild(Token tok) { 1450 if (tok == null) return; 1451 if (this.children == null) this.children = new Vector (); 1452 if (this.type == UNION) { 1453 this.children.addElement(tok); 1454 return; 1455 } 1456 if (tok.type == CONCAT) { 1458 for (int i = 0; i < tok.size(); i ++) 1459 this.addChild(tok.getChild(i)); return; 1461 } 1462 int size = this.children.size(); 1463 if (size == 0) { 1464 this.children.addElement(tok); 1465 return; 1466 } 1467 Token previous = (Token)this.children.elementAt(size-1); 1468 if (!((previous.type == CHAR || previous.type == STRING) 1469 && (tok.type == CHAR || tok.type == STRING))) { 1470 this.children.addElement(tok); 1471 return; 1472 } 1473 1474 1476 StringBuffer buffer; 1477 int nextMaxLength = (tok.type == CHAR ? 2 : tok.getString().length()); 1478 if (previous.type == CHAR) { buffer = new StringBuffer (2 + nextMaxLength); 1480 int ch = previous.getChar(); 1481 if (ch >= 0x10000) 1482 buffer.append(REUtil.decomposeToSurrogates(ch)); 1483 else 1484 buffer.append((char)ch); 1485 previous = Token.createString(null); 1486 this.children.setElementAt(previous, size-1); 1487 } else { buffer = new StringBuffer (previous.getString().length() + nextMaxLength); 1489 buffer.append(previous.getString()); 1490 } 1491 1492 if (tok.type == CHAR) { 1493 int ch = tok.getChar(); 1494 if (ch >= 0x10000) 1495 buffer.append(REUtil.decomposeToSurrogates(ch)); 1496 else 1497 buffer.append((char)ch); 1498 } else { 1499 buffer.append(tok.getString()); 1500 } 1501 1502 ((StringToken)previous).string = new String (buffer); 1503 } 1504 1505 int size() { 1506 return this.children == null ? 0 : this.children.size(); 1507 } 1508 Token getChild(int index) { 1509 return (Token)this.children.elementAt(index); 1510 } 1511 1512 public String toString(int options) { 1513 String ret; 1514 if (this.type == CONCAT) { 1515 if (this.children.size() == 2) { 1516 Token ch = this.getChild(0); 1517 Token ch2 = this.getChild(1); 1518 if (ch2.type == CLOSURE && ch2.getChild(0) == ch) { 1519 ret = ch.toString(options)+"+"; 1520 } else if (ch2.type == NONGREEDYCLOSURE && ch2.getChild(0) == ch) { 1521 ret = ch.toString(options)+"+?"; 1522 } else 1523 ret = ch.toString(options)+ch2.toString(options); 1524 } else { 1525 StringBuffer sb = new StringBuffer (); 1526 for (int i = 0; i < this.children.size(); i ++) { 1527 sb.append(((Token)this.children.elementAt(i)).toString(options)); 1528 } 1529 ret = new String (sb); 1530 } 1531 return ret; 1532 } 1533 if (this.children.size() == 2 && this.getChild(1).type == EMPTY) { 1534 ret = this.getChild(0).toString(options)+"?"; 1535 } else if (this.children.size() == 2 1536 && this.getChild(0).type == EMPTY) { 1537 ret = this.getChild(1).toString(options)+"??"; 1538 } else { 1539 StringBuffer sb = new StringBuffer (); 1540 sb.append(((Token)this.children.elementAt(0)).toString(options)); 1541 for (int i = 1; i < this.children.size(); i ++) { 1542 sb.append((char)'|'); 1543 sb.append(((Token)this.children.elementAt(i)).toString(options)); 1544 } 1545 ret = new String (sb); 1546 } 1547 return ret; 1548 } 1549 } 1550} 1551 | Popular Tags |