|                                                                                                              1
 57
 58  package com.sun.org.apache.xerces.internal.impl.xpath.regex;
 59
 60  import java.util.Vector
  ; 61  import java.util.Hashtable
  ; 62
 63
 68  class Token implements java.io.Serializable
  { 69      static final boolean COUNTTOKENS = true;
 70      static int tokens = 0;
 71
 72      static final int CHAR = 0;                      static final int DOT = 11;                      static final int CONCAT = 1;                    static final int UNION = 2;                     static final int CLOSURE = 3;                   static final int RANGE = 4;                     static final int NRANGE = 5;                    static final int PAREN = 6;                     static final int EMPTY = 7;                     static final int ANCHOR = 8;                    static final int NONGREEDYCLOSURE = 9;          static final int STRING = 10;                   static final int BACKREFERENCE = 12;            static final int LOOKAHEAD = 20;                static final int NEGATIVELOOKAHEAD = 21;        static final int LOOKBEHIND = 22;               static final int NEGATIVELOOKBEHIND = 23;       static final int INDEPENDENT = 24;              static final int MODIFIERGROUP = 25;            static final int CONDITION = 26;
 93      static final int UTF16_MAX = 0x10ffff;
 94
 95      int type;
 96
 97      static Token token_dot;
 98      static Token token_0to9;
 99      static Token token_wordchars;
 100     static Token token_not_0to9;
 101     static Token token_not_wordchars;
 102     static Token token_spaces;
 103     static Token token_not_spaces;
 104     static Token token_empty;
 105     static Token token_linebeginning;
 106     static Token token_linebeginning2;
 107     static Token token_lineend;
 108     static Token token_stringbeginning;
 109     static Token token_stringend;
 110     static Token token_stringend2;
 111     static Token token_wordedge;
 112     static Token token_not_wordedge;
 113     static Token token_wordbeginning;
 114     static Token token_wordend;
 115     static {
 116         Token.token_empty = new Token(Token.EMPTY);
 117
 118         Token.token_linebeginning = Token.createAnchor('^');
 119         Token.token_linebeginning2 = Token.createAnchor('@');
 120         Token.token_lineend = Token.createAnchor('$');
 121         Token.token_stringbeginning = Token.createAnchor('A');
 122         Token.token_stringend = Token.createAnchor('z');
 123         Token.token_stringend2 = Token.createAnchor('Z');
 124         Token.token_wordedge = Token.createAnchor('b');
 125         Token.token_not_wordedge = Token.createAnchor('B');
 126         Token.token_wordbeginning = Token.createAnchor('<');
 127         Token.token_wordend = Token.createAnchor('>');
 128
 129         Token.token_dot = new Token(Token.DOT);
 130
 131         Token.token_0to9 = Token.createRange();
 132         Token.token_0to9.addRange('0', '9');
 133         Token.token_wordchars = Token.createRange();
 134         Token.token_wordchars.addRange('0', '9');
 135         Token.token_wordchars.addRange('A', 'Z');
 136         Token.token_wordchars.addRange('_', '_');
 137         Token.token_wordchars.addRange('a', 'z');
 138         Token.token_spaces = Token.createRange();
 139         Token.token_spaces.addRange('\t', '\t');
 140         Token.token_spaces.addRange('\n', '\n');
 141         Token.token_spaces.addRange('\f', '\f');
 142         Token.token_spaces.addRange('\r', '\r');
 143         Token.token_spaces.addRange(' ', ' ');
 144
 145         Token.token_not_0to9 = Token.complementRanges(Token.token_0to9);
 146         Token.token_not_wordchars = Token.complementRanges(Token.token_wordchars);
 147         Token.token_not_spaces = Token.complementRanges(Token.token_spaces);
 148     }
 149
 150     static Token.ParenToken createLook(int type, Token child) {
 151         if (COUNTTOKENS)  Token.tokens ++;
 152         return new Token.ParenToken(type, child, 0);
 153     }
 154     static Token.ParenToken createParen(Token child, int pnumber) {
 155         if (COUNTTOKENS)  Token.tokens ++;
 156         return new Token.ParenToken(Token.PAREN, child, pnumber);
 157     }
 158     static Token.ClosureToken createClosure(Token tok) {
 159         if (COUNTTOKENS)  Token.tokens ++;
 160         return new Token.ClosureToken(Token.CLOSURE, tok);
 161     }
 162     static Token.ClosureToken createNGClosure(Token tok) {
 163         if (COUNTTOKENS)  Token.tokens ++;
 164         return new Token.ClosureToken(Token.NONGREEDYCLOSURE, tok);
 165     }
 166     static Token.ConcatToken createConcat(Token tok1, Token tok2) {
 167         if (COUNTTOKENS)  Token.tokens ++;
 168         return new Token.ConcatToken(tok1, tok2);
 169     }
 170     static Token.UnionToken createConcat() {
 171         if (COUNTTOKENS)  Token.tokens ++;
 172         return new Token.UnionToken(Token.CONCAT);     }
 174     static Token.UnionToken createUnion() {
 175         if (COUNTTOKENS)  Token.tokens ++;
 176         return new Token.UnionToken(Token.UNION);
 177     }
 178     static Token createEmpty() {
 179         return Token.token_empty;
 180     }
 181     static RangeToken createRange() {
 182         if (COUNTTOKENS)  Token.tokens ++;
 183         return new RangeToken(Token.RANGE);
 184     }
 185     static RangeToken createNRange() {
 186         if (COUNTTOKENS)  Token.tokens ++;
 187         return new RangeToken(Token.NRANGE);
 188     }
 189     static Token.CharToken createChar(int ch) {
 190         if (COUNTTOKENS)  Token.tokens ++;
 191         return new Token.CharToken(Token.CHAR, ch);
 192     }
 193     static private Token.CharToken createAnchor(int ch) {
 194         if (COUNTTOKENS)  Token.tokens ++;
 195         return new Token.CharToken(Token.ANCHOR, ch);
 196     }
 197     static Token.StringToken createBackReference(int refno) {
 198         if (COUNTTOKENS)  Token.tokens ++;
 199         return new Token.StringToken(Token.BACKREFERENCE, null, refno);
 200     }
 201     static Token.StringToken createString(String
  str) { 202         if (COUNTTOKENS)  Token.tokens ++;
 203         return new Token.StringToken(Token.STRING, str, 0);
 204     }
 205     static Token.ModifierToken createModifierGroup(Token child, int add, int mask) {
 206         if (COUNTTOKENS)  Token.tokens ++;
 207         return new Token.ModifierToken(child, add, mask);
 208     }
 209     static Token.ConditionToken createCondition(int refno, Token condition,
 210                                                 Token yespat, Token nopat) {
 211         if (COUNTTOKENS)  Token.tokens ++;
 212         return new Token.ConditionToken(refno, condition, yespat, nopat);
 213     }
 214
 215     protected Token(int type) {
 216         this.type = type;
 217     }
 218
 219
 222     int size() {
 223         return 0;
 224     }
 225     Token getChild(int index) {
 226         return null;
 227     }
 228     void addChild(Token tok) {
 229         throw new RuntimeException
  ("Not supported."); 230     }
 231
 232                                                     protected void addRange(int start, int end) {
 234         throw new RuntimeException
  ("Not supported."); 235     }
 236     protected void sortRanges() {
 237         throw new RuntimeException
  ("Not supported."); 238     }
 239     protected void compactRanges() {
 240         throw new RuntimeException
  ("Not supported."); 241     }
 242     protected void mergeRanges(Token tok) {
 243         throw new RuntimeException
  ("Not supported."); 244     }
 245     protected void subtractRanges(Token tok) {
 246         throw new RuntimeException
  ("Not supported."); 247     }
 248     protected void intersectRanges(Token tok) {
 249         throw new RuntimeException
  ("Not supported."); 250     }
 251     static Token complementRanges(Token tok) {
 252         return RangeToken.complementRanges(tok);
 253     }
 254
 255
 256     void setMin(int min) {                          }
 258     void setMax(int max) {                          }
 260     int getMin() {                                      return -1;
 262     }
 263     int getMax() {                                      return -1;
 265     }
 266     int getReferenceNumber() {                          return 0;
 268     }
 269     String
  getString() {                                return null; 271     }
 272
 273     int getParenNumber() {
 274         return 0;
 275     }
 276     int getChar() {
 277         return -1;
 278     }
 279
 280     public String
  toString() { 281         return this.toString(0);
 282     }
 283     public String
  toString(int options) { 284         return this.type == Token.DOT ? "." : "";
 285     }
 286
 287
 290     final int getMinLength() {
 291         switch (this.type) {
 292           case CONCAT:
 293             int sum = 0;
 294             for (int i = 0;  i < this.size();  i ++)
 295                 sum += this.getChild(i).getMinLength();
 296             return sum;
 297
 298           case CONDITION:
 299           case UNION:
 300             if (this.size() == 0)
 301                 return 0;
 302             int ret = this.getChild(0).getMinLength();
 303             for (int i = 1;  i < this.size();  i ++) {
 304                 int min = this.getChild(i).getMinLength();
 305                 if (min < ret)  ret = min;
 306             }
 307             return ret;
 308
 309           case CLOSURE:
 310           case NONGREEDYCLOSURE:
 311             if (this.getMin() >= 0)
 312                 return this.getMin() * this.getChild(0).getMinLength();
 313             return 0;
 314
 315           case EMPTY:
 316           case ANCHOR:
 317             return 0;
 318
 319           case DOT:
 320           case CHAR:
 321           case RANGE:
 322           case NRANGE:
 323             return 1;
 324
 325           case INDEPENDENT:
 326           case PAREN:
 327           case MODIFIERGROUP:
 328             return this.getChild(0).getMinLength();
 329
 330           case BACKREFERENCE:
 331             return 0;
 333           case STRING:
 334             return this.getString().length();
 335
 336           case LOOKAHEAD:
 337           case NEGATIVELOOKAHEAD:
 338           case LOOKBEHIND:
 339           case NEGATIVELOOKBEHIND:
 340             return 0;
 342           default:
 343             throw new RuntimeException
  ("Token#getMinLength(): Invalid Type: "+this.type); 344         }
 345     }
 346
 347     final int getMaxLength() {
 348         switch (this.type) {
 349           case CONCAT:
 350             int sum = 0;
 351             for (int i = 0;  i < this.size();  i ++) {
 352                 int d = this.getChild(i).getMaxLength();
 353                 if (d < 0)  return -1;
 354                 sum += d;
 355             }
 356             return sum;
 357
 358           case CONDITION:
 359           case UNION:
 360             if (this.size() == 0)
 361                 return 0;
 362             int ret = this.getChild(0).getMaxLength();
 363             for (int i = 1;  ret >= 0 && i < this.size();  i ++) {
 364                 int max = this.getChild(i).getMaxLength();
 365                 if (max < 0) {                                      ret = -1;
 367                     break;
 368                 }
 369                 if (max > ret)  ret = max;
 370             }
 371             return ret;
 372
 373           case CLOSURE:
 374           case NONGREEDYCLOSURE:
 375             if (this.getMax() >= 0)
 376                                                                                                                 return this.getMax() * this.getChild(0).getMaxLength();
 379             return -1;
 380
 381           case EMPTY:
 382           case ANCHOR:
 383             return 0;
 384
 385           case CHAR:
 386             return 1;
 387           case DOT:
 388           case RANGE:
 389           case NRANGE:
 390             return 2;
 391
 392           case INDEPENDENT:
 393           case PAREN:
 394           case MODIFIERGROUP:
 395             return this.getChild(0).getMaxLength();
 396
 397           case BACKREFERENCE:
 398             return -1;
 400           case STRING:
 401             return this.getString().length();
 402
 403           case LOOKAHEAD:
 404           case NEGATIVELOOKAHEAD:
 405           case LOOKBEHIND:
 406           case NEGATIVELOOKBEHIND:
 407             return 0;
 409           default:
 410             throw new RuntimeException
  ("Token#getMaxLength(): Invalid Type: "+this.type); 411         }
 412     }
 413
 414     static final int FC_CONTINUE = 0;
 415     static final int FC_TERMINAL = 1;
 416     static final int FC_ANY = 2;
 417     private static final boolean isSet(int options, int flag) {
 418         return (options & flag) == flag;
 419     }
 420     final int analyzeFirstCharacter(RangeToken result, int options) {
 421         switch (this.type) {
 422           case CONCAT:
 423             int ret = FC_CONTINUE;
 424             for (int i = 0;  i < this.size();  i ++)
 425                 if ((ret = this.getChild(i).analyzeFirstCharacter(result, options)) != FC_CONTINUE)
 426                     break;
 427             return ret;
 428
 429           case UNION:
 430             if (this.size() == 0)
 431                 return FC_CONTINUE;
 432
 437             int ret2 = FC_CONTINUE;
 438             boolean hasEmpty = false;
 439             for (int i = 0;  i < this.size();  i ++) {
 440                 ret2 = this.getChild(i).analyzeFirstCharacter(result, options);
 441                 if (ret2 == FC_ANY)
 442                     break;
 443                 else if (ret2 == FC_CONTINUE)
 444                     hasEmpty = true;
 445             }
 446             return hasEmpty ? FC_CONTINUE : ret2;
 447
 448           case CONDITION:
 449             int ret3 = this.getChild(0).analyzeFirstCharacter(result, options);
 450             if (this.size() == 1)  return FC_CONTINUE;
 451             if (ret3 == FC_ANY)  return ret3;
 452             int ret4 = this.getChild(1).analyzeFirstCharacter(result, options);
 453             if (ret4 == FC_ANY)  return ret4;
 454             return ret3 == FC_CONTINUE || ret4 == FC_CONTINUE ? FC_CONTINUE : FC_TERMINAL;
 455
 456           case CLOSURE:
 457           case NONGREEDYCLOSURE:
 458             this.getChild(0).analyzeFirstCharacter(result, options);
 459             return FC_CONTINUE;
 460
 461           case EMPTY:
 462           case ANCHOR:
 463             return FC_CONTINUE;
 464
 465           case CHAR:
 466             int ch = this.getChar();
 467             result.addRange(ch, ch);
 468             if (ch < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
 469                 ch = Character.toUpperCase((char)ch);
 470                 result.addRange(ch, ch);
 471                 ch = Character.toLowerCase((char)ch);
 472                 result.addRange(ch, ch);
 473             }
 474             return FC_TERMINAL;
 475
 476           case DOT:                                         if (isSet(options, RegularExpression.SINGLE_LINE)) {
 478                 return FC_CONTINUE;                         } else {
 480                 return FC_CONTINUE;
 481
 489             }
 490
 491           case RANGE:
 492             if (isSet(options, RegularExpression.IGNORE_CASE)) {
 493                 result.mergeRanges(((RangeToken)this).getCaseInsensitiveToken());
 494             } else {
 495                 result.mergeRanges(this);
 496             }
 497             return FC_TERMINAL;
 498
 499           case NRANGE:                                      if (isSet(options, RegularExpression.IGNORE_CASE)) {
 501                 result.mergeRanges(Token.complementRanges(((RangeToken)this).getCaseInsensitiveToken()));
 502             } else {
 503                 result.mergeRanges(Token.complementRanges(this));
 504             }
 505             return FC_TERMINAL;
 506
 507           case INDEPENDENT:
 508           case PAREN:
 509             return this.getChild(0).analyzeFirstCharacter(result, options);
 510
 511           case MODIFIERGROUP:
 512             options |= ((ModifierToken)this).getOptions();
 513             options &= ~((ModifierToken)this).getOptionsMask();
 514             return this.getChild(0).analyzeFirstCharacter(result, options);
 515
 516           case BACKREFERENCE:
 517             result.addRange(0, UTF16_MAX);              return FC_ANY;
 519
 520           case STRING:
 521             int cha = this.getString().charAt(0);
 522             int ch2;
 523             if (REUtil.isHighSurrogate(cha)
 524                 && this.getString().length() >= 2
 525                 && REUtil.isLowSurrogate((ch2 = this.getString().charAt(1))))
 526                 cha = REUtil.composeFromSurrogates(cha, ch2);
 527             result.addRange(cha, cha);
 528             if (cha < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
 529                 cha = Character.toUpperCase((char)cha);
 530                 result.addRange(cha, cha);
 531                 cha = Character.toLowerCase((char)cha);
 532                 result.addRange(cha, cha);
 533             }
 534             return FC_TERMINAL;
 535
 536           case LOOKAHEAD:
 537           case NEGATIVELOOKAHEAD:
 538           case LOOKBEHIND:
 539           case NEGATIVELOOKBEHIND:
 540             return FC_CONTINUE;
 541
 542           default:
 543             throw new RuntimeException
  ("Token#analyzeHeadCharacter(): Invalid Type: "+this.type); 544         }
 545     }
 546
 547     private final boolean isShorterThan(Token tok) {
 548         if (tok == null)  return false;
 549
 559         int mylength;
 560         if (this.type == STRING)  mylength = this.getString().length();
 561         else throw new RuntimeException
  ("Internal Error: Illegal type: "+this.type); 562         int otherlength;
 563         if (tok.type == STRING)  otherlength = tok.getString().length();
 564         else throw new RuntimeException
  ("Internal Error: Illegal type: "+tok.type); 565         return mylength < otherlength;
 566     }
 567
 568     static class FixedStringContainer {
 569         Token token = null;
 570         int options = 0;
 571         FixedStringContainer() {
 572         }
 573     }
 574
 575     final void findFixedString(FixedStringContainer container, int options) {
 576         switch (this.type) {
 577           case CONCAT:
 578             Token prevToken = null;
 579             int prevOptions = 0;
 580             for (int i = 0;  i < this.size();  i ++) {
 581                 this.getChild(i).findFixedString(container, options);
 582                 if (prevToken == null || prevToken.isShorterThan(container.token)) {
 583                     prevToken = container.token;
 584                     prevOptions = container.options;
 585                 }
 586             }
 587             container.token = prevToken;
 588             container.options = prevOptions;
 589             return;
 590
 591           case UNION:
 592           case CLOSURE:
 593           case NONGREEDYCLOSURE:
 594           case EMPTY:
 595           case ANCHOR:
 596           case RANGE:
 597           case DOT:
 598           case NRANGE:
 599           case BACKREFERENCE:
 600           case LOOKAHEAD:
 601           case NEGATIVELOOKAHEAD:
 602           case LOOKBEHIND:
 603           case NEGATIVELOOKBEHIND:
 604           case CONDITION:
 605             container.token = null;
 606             return;
 607
 608           case CHAR:                                        container.token = null;                         return;
 612           case STRING:
 613             container.token = this;
 614             container.options = options;
 615             return;
 616
 617           case INDEPENDENT:
 618           case PAREN:
 619             this.getChild(0).findFixedString(container, options);
 620             return;
 621
 622           case MODIFIERGROUP:
 623             options |= ((ModifierToken)this).getOptions();
 624             options &= ~((ModifierToken)this).getOptionsMask();
 625             this.getChild(0).findFixedString(container, options);
 626             return;
 627
 628           default:
 629             throw new RuntimeException
  ("Token#findFixedString(): Invalid Type: "+this.type); 630         }
 631     }
 632
 633     boolean match(int ch) {
 634         throw new RuntimeException
  ("NFAArrow#match(): Internal error: "+this.type); 635     }
 636
 637         private final static Hashtable
  categories = new Hashtable  (); 639     private final static Hashtable
  categories2 = new Hashtable  (); 640     private static final String
  [] categoryNames = { 641         "Cn", "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd",
 642         "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", null, "Co", "Cs",
 643         "Pd", "Ps", "Pe", "Pc", "Po", "Sm", "Sc", "Sk", "So",         "Pi", "Pf",          "L", "M", "N", "Z", "C", "P", "S",          };
 647
 648         static final int CHAR_INIT_QUOTE  = 29;         static final int CHAR_FINAL_QUOTE = 30;         static final int CHAR_LETTER = 31;
 652     static final int CHAR_MARK = 32;
 653     static final int CHAR_NUMBER = 33;
 654     static final int CHAR_SEPARATOR = 34;
 655     static final int CHAR_OTHER = 35;
 656     static final int CHAR_PUNCTUATION = 36;
 657     static final int CHAR_SYMBOL = 37;
 658
 659         private static final String
  [] blockNames = { 661          "Basic Latin",
 662          "Latin-1 Supplement",
 663          "Latin Extended-A",
 664          "Latin Extended-B",
 665          "IPA Extensions",
 666          "Spacing Modifier Letters",
 667          "Combining Diacritical Marks",
 668          "Greek",
 669          "Cyrillic",
 670          "Armenian",
 671          "Hebrew",
 672          "Arabic",
 673          "Syriac",
 674          "Thaana",
 675          "Devanagari",
 676          "Bengali",
 677          "Gurmukhi",
 678          "Gujarati",
 679          "Oriya",
 680          "Tamil",
 681          "Telugu",
 682          "Kannada",
 683          "Malayalam",
 684          "Sinhala",
 685          "Thai",
 686          "Lao",
 687          "Tibetan",
 688          "Myanmar",
 689          "Georgian",
 690          "Hangul Jamo",
 691          "Ethiopic",
 692          "Cherokee",
 693          "Unified Canadian Aboriginal Syllabics",
 694          "Ogham",
 695          "Runic",
 696          "Khmer",
 697          "Mongolian",
 698          "Latin Extended Additional",
 699          "Greek Extended",
 700          "General Punctuation",
 701          "Superscripts and Subscripts",
 702          "Currency Symbols",
 703          "Combining Marks for Symbols",
 704          "Letterlike Symbols",
 705          "Number Forms",
 706          "Arrows",
 707          "Mathematical Operators",
 708          "Miscellaneous Technical",
 709          "Control Pictures",
 710          "Optical Character Recognition",
 711          "Enclosed Alphanumerics",
 712          "Box Drawing",
 713          "Block Elements",
 714          "Geometric Shapes",
 715          "Miscellaneous Symbols",
 716          "Dingbats",
 717          "Braille Patterns",
 718          "CJK Radicals Supplement",
 719          "Kangxi Radicals",
 720          "Ideographic Description Characters",
 721          "CJK Symbols and Punctuation",
 722          "Hiragana",
 723          "Katakana",
 724          "Bopomofo",
 725          "Hangul Compatibility Jamo",
 726          "Kanbun",
 727          "Bopomofo Extended",
 728          "Enclosed CJK Letters and Months",
 729          "CJK Compatibility",
 730          "CJK Unified Ideographs Extension A",
 731          "CJK Unified Ideographs",
 732          "Yi Syllables",
 733          "Yi Radicals",
 734          "Hangul Syllables",
 735          "Private Use",
 736          "CJK Compatibility Ideographs",
 737          "Alphabetic Presentation Forms",
 738          "Arabic Presentation Forms-A",
 739          "Combining Half Marks",
 740          "CJK Compatibility Forms",
 741          "Small Form Variants",
 742          "Arabic Presentation Forms-B",
 743          "Specials",
 744          "Halfwidth and Fullwidth Forms",
 745                   "Old Italic",              "Gothic",
 748          "Deseret",
 749          "Byzantine Musical Symbols",
 750          "Musical Symbols",
 751          "Mathematical Alphanumeric Symbols",
 752          "CJK Unified Ideographs Extension B",
 753          "CJK Compatibility Ideographs Supplement",
 754          "Tags",
 755
 757     };
 758                     static final String
  blockRanges = 763        "\u0000\u007F\u0080\u00FF\u0100\u017F\u0180\u024F\u0250\u02AF\u02B0\u02FF\u0300\u036F"
 764         +"\u0370\u03FF\u0400\u04FF\u0530\u058F\u0590\u05FF\u0600\u06FF\u0700\u074F\u0780\u07BF"
 765         +"\u0900\u097F\u0980\u09FF\u0A00\u0A7F\u0A80\u0AFF\u0B00\u0B7F\u0B80\u0BFF\u0C00\u0C7F\u0C80\u0CFF"
 766         +"\u0D00\u0D7F\u0D80\u0DFF\u0E00\u0E7F\u0E80\u0EFF\u0F00\u0FFF\u1000\u109F\u10A0\u10FF\u1100\u11FF"
 767         +"\u1200\u137F\u13A0\u13FF\u1400\u167F\u1680\u169F\u16A0\u16FF\u1780\u17FF\u1800\u18AF\u1E00\u1EFF"
 768         +"\u1F00\u1FFF\u2000\u206F\u2070\u209F\u20A0\u20CF\u20D0\u20FF\u2100\u214F\u2150\u218F\u2190\u21FF\u2200\u22FF"
 769         +"\u2300\u23FF\u2400\u243F\u2440\u245F\u2460\u24FF\u2500\u257F\u2580\u259F\u25A0\u25FF\u2600\u26FF\u2700\u27BF"
 770         +"\u2800\u28FF\u2E80\u2EFF\u2F00\u2FDF\u2FF0\u2FFF\u3000\u303F\u3040\u309F\u30A0\u30FF\u3100\u312F\u3130\u318F"
 771         +"\u3190\u319F\u31A0\u31BF\u3200\u32FF\u3300\u33FF\u3400\u4DB5\u4E00\u9FFF\uA000\uA48F\uA490\uA4CF"
 772         +"\uAC00\uD7A3\uE000\uF8FF\uF900\uFAFF\uFB00\uFB4F\uFB50\uFDFF"
 773         +"\uFE20\uFE2F\uFE30\uFE4F\uFE50\uFE6F\uFE70\uFEFE\uFEFF\uFEFF\uFF00\uFFEF";
 774     static final int[] nonBMPBlockRanges = {
 775         0x10300, 0x1032F,               0x10330, 0x1034F,
 777         0x10400, 0x1044F,
 778         0x1D000, 0x1D0FF,
 779         0x1D100, 0x1D1FF,
 780         0x1D400, 0x1D7FF,
 781         0x20000, 0x2A6D6,
 782         0x2F800, 0x2FA1F,
 783         0xE0000, 0xE007F
 784     };
 785     private static final int NONBMP_BLOCK_START = 84;
 786
 787     static protected RangeToken getRange(String
  name, boolean positive) { 788         if (Token.categories.size() == 0) {
 789             synchronized (Token.categories) {
 790                 Token[] ranges = new Token[Token.categoryNames.length];
 791                 for (int i = 0;  i < ranges.length;  i ++) {
 792                     ranges[i] = Token.createRange();
 793                 }
 794                 int type;
 795                 for (int i = 0;  i < 0x10000;  i ++) {
 796                     type = Character.getType((char)i);
 797                     if (type == Character.START_PUNCTUATION ||
 798                         type == Character.END_PUNCTUATION) {
 799                                                 if (i == 0x00AB || i == 0x2018 || i == 0x201B || i == 0x201C ||
 801                             i == 0x201F || i == 0x2039) {
 802                             type = CHAR_INIT_QUOTE;
 803                         }
 804                                                 if (i == 0x00BB || i == 0x2019 || i == 0x201D || i == 0x203A ) {
 806                             type = CHAR_FINAL_QUOTE;
 807                         }
 808                     }
 809                     ranges[type].addRange(i, i);
 810                     switch (type) {
 811                       case Character.UPPERCASE_LETTER:
 812                       case Character.LOWERCASE_LETTER:
 813                       case Character.TITLECASE_LETTER:
 814                       case Character.MODIFIER_LETTER:
 815                       case Character.OTHER_LETTER:
 816                         type = CHAR_LETTER;
 817                         break;
 818                       case Character.NON_SPACING_MARK:
 819                       case Character.COMBINING_SPACING_MARK:
 820                       case Character.ENCLOSING_MARK:
 821                         type = CHAR_MARK;
 822                         break;
 823                       case Character.DECIMAL_DIGIT_NUMBER:
 824                       case Character.LETTER_NUMBER:
 825                       case Character.OTHER_NUMBER:
 826                         type = CHAR_NUMBER;
 827                         break;
 828                       case Character.SPACE_SEPARATOR:
 829                       case Character.LINE_SEPARATOR:
 830                       case Character.PARAGRAPH_SEPARATOR:
 831                         type = CHAR_SEPARATOR;
 832                         break;
 833                       case Character.CONTROL:
 834                       case Character.FORMAT:
 835                       case Character.SURROGATE:
 836                       case Character.PRIVATE_USE:
 837                       case Character.UNASSIGNED:
 838                         type = CHAR_OTHER;
 839                         break;
 840                       case Character.CONNECTOR_PUNCTUATION:
 841                       case Character.DASH_PUNCTUATION:
 842                       case Character.START_PUNCTUATION:
 843                       case Character.END_PUNCTUATION:
 844                       case CHAR_INIT_QUOTE:
 845                       case CHAR_FINAL_QUOTE:
 846                       case Character.OTHER_PUNCTUATION:
 847                         type = CHAR_PUNCTUATION;
 848                         break;
 849                       case Character.MATH_SYMBOL:
 850                       case Character.CURRENCY_SYMBOL:
 851                       case Character.MODIFIER_SYMBOL:
 852                       case Character.OTHER_SYMBOL:
 853                         type = CHAR_SYMBOL;
 854                         break;
 855                       default:
 856                         throw new RuntimeException
  ("com.sun.org.apache.xerces.internal.utils.regex.Token#getRange(): Unknown Unicode category: "+type); 857                     }
 858                     ranges[type].addRange(i, i);
 859                 }                 ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX);
 861
 862                 for (int i = 0;  i < ranges.length;  i ++) {
 863                     if (Token.categoryNames[i] != null) {
 864                         if (i == Character.UNASSIGNED) {                             ranges[i].addRange(0x10000, Token.UTF16_MAX);
 866                         }
 867                         Token.categories.put(Token.categoryNames[i], ranges[i]);
 868                         Token.categories2.put(Token.categoryNames[i],
 869                                               Token.complementRanges(ranges[i]));
 870                     }
 871                 }
 872                                                                 StringBuffer
  buffer = new StringBuffer  (50); 876                 for (int i = 0;  i < Token.blockNames.length;  i ++) {
 877                     Token r1 = Token.createRange();
 878                     int location;
 879                     if (i < NONBMP_BLOCK_START) {
 880                         location = i*2;
 881                         int rstart = Token.blockRanges.charAt(location);
 882                         int rend = Token.blockRanges.charAt(location+1);
 883                                                                                                 r1.addRange(rstart, rend);
 887                     } else {
 888                         location = (i - NONBMP_BLOCK_START) * 2;
 889                         r1.addRange(Token.nonBMPBlockRanges[location],
 890                                     Token.nonBMPBlockRanges[location + 1]);
 891                     }
 892                     String
  n = Token.blockNames[i]; 893                     if (n.equals("Specials"))
 894                         r1.addRange(0xfff0, 0xfffd);
 895                     if (n.equals("Private Use")) {
 896                         r1.addRange(0xF0000,0xFFFFD);
 897                         r1.addRange(0x100000,0x10FFFD);
 898                     }
 899                     Token.categories.put(n, r1);
 900                     Token.categories2.put(n, Token.complementRanges(r1));
 901                     buffer.setLength(0);
 902                     buffer.append("Is");
 903                     if (n.indexOf(' ') >= 0) {
 904                         for (int ci = 0;  ci < n.length();  ci ++)
 905                             if (n.charAt(ci) != ' ')  buffer.append((char)n.charAt(ci));
 906                     }
 907                     else {
 908                         buffer.append(n);
 909                     }
 910                     Token.setAlias(buffer.toString(), n, true);
 911                 }
 912
 913                                 Token.setAlias("ASSIGNED", "Cn", false);
 915                 Token.setAlias("UNASSIGNED", "Cn", true);
 916                 Token all = Token.createRange();
 917                 all.addRange(0, Token.UTF16_MAX);
 918                 Token.categories.put("ALL", all);
 919                 Token.categories2.put("ALL", Token.complementRanges(all));
 920                 Token.registerNonXS("ASSIGNED");
 921                 Token.registerNonXS("UNASSIGNED");
 922                 Token.registerNonXS("ALL");
 923
 924                 Token isalpha = Token.createRange();
 925                 isalpha.mergeRanges(ranges[Character.UPPERCASE_LETTER]);                 isalpha.mergeRanges(ranges[Character.LOWERCASE_LETTER]);                 isalpha.mergeRanges(ranges[Character.OTHER_LETTER]);                 Token.categories.put("IsAlpha", isalpha);
 929                 Token.categories2.put("IsAlpha", Token.complementRanges(isalpha));
 930                 Token.registerNonXS("IsAlpha");
 931
 932                 Token isalnum = Token.createRange();
 933                 isalnum.mergeRanges(isalpha);                   isalnum.mergeRanges(ranges[Character.DECIMAL_DIGIT_NUMBER]);                 Token.categories.put("IsAlnum", isalnum);
 936                 Token.categories2.put("IsAlnum", Token.complementRanges(isalnum));
 937                 Token.registerNonXS("IsAlnum");
 938
 939                 Token isspace = Token.createRange();
 940                 isspace.mergeRanges(Token.token_spaces);
 941                 isspace.mergeRanges(ranges[CHAR_SEPARATOR]);                 Token.categories.put("IsSpace", isspace);
 943                 Token.categories2.put("IsSpace", Token.complementRanges(isspace));
 944                 Token.registerNonXS("IsSpace");
 945
 946                 Token isword = Token.createRange();
 947                 isword.mergeRanges(isalnum);                     isword.addRange('_', '_');
 949                 Token.categories.put("IsWord", isword);
 950                 Token.categories2.put("IsWord", Token.complementRanges(isword));
 951                 Token.registerNonXS("IsWord");
 952
 953                 Token isascii = Token.createRange();
 954                 isascii.addRange(0, 127);
 955                 Token.categories.put("IsASCII", isascii);
 956                 Token.categories2.put("IsASCII", Token.complementRanges(isascii));
 957                 Token.registerNonXS("IsASCII");
 958
 959                 Token isnotgraph = Token.createRange();
 960                 isnotgraph.mergeRanges(ranges[CHAR_OTHER]);
 961                 isnotgraph.addRange(' ', ' ');
 962                 Token.categories.put("IsGraph", Token.complementRanges(isnotgraph));
 963                 Token.categories2.put("IsGraph", isnotgraph);
 964                 Token.registerNonXS("IsGraph");
 965
 966                 Token isxdigit = Token.createRange();
 967                 isxdigit.addRange('0', '9');
 968                 isxdigit.addRange('A', 'F');
 969                 isxdigit.addRange('a', 'f');
 970                 Token.categories.put("IsXDigit", Token.complementRanges(isxdigit));
 971                 Token.categories2.put("IsXDigit", isxdigit);
 972                 Token.registerNonXS("IsXDigit");
 973
 974                 Token.setAlias("IsDigit", "Nd", true);
 975                 Token.setAlias("IsUpper", "Lu", true);
 976                 Token.setAlias("IsLower", "Ll", true);
 977                 Token.setAlias("IsCntrl", "C", true);
 978                 Token.setAlias("IsPrint", "C", false);
 979                 Token.setAlias("IsPunct", "P", true);
 980                 Token.registerNonXS("IsDigit");
 981                 Token.registerNonXS("IsUpper");
 982                 Token.registerNonXS("IsLower");
 983                 Token.registerNonXS("IsCntrl");
 984                 Token.registerNonXS("IsPrint");
 985                 Token.registerNonXS("IsPunct");
 986
 987                 Token.setAlias("alpha", "IsAlpha", true);
 988                 Token.setAlias("alnum", "IsAlnum", true);
 989                 Token.setAlias("ascii", "IsASCII", true);
 990                 Token.setAlias("cntrl", "IsCntrl", true);
 991                 Token.setAlias("digit", "IsDigit", true);
 992                 Token.setAlias("graph", "IsGraph", true);
 993                 Token.setAlias("lower", "IsLower", true);
 994                 Token.setAlias("print", "IsPrint", true);
 995                 Token.setAlias("punct", "IsPunct", true);
 996                 Token.setAlias("space", "IsSpace", true);
 997                 Token.setAlias("upper", "IsUpper", true);
 998                 Token.setAlias("word", "IsWord", true);                 Token.setAlias("xdigit", "IsXDigit", true);
 1000                Token.registerNonXS("alpha");
 1001                Token.registerNonXS("alnum");
 1002                Token.registerNonXS("ascii");
 1003                Token.registerNonXS("cntrl");
 1004                Token.registerNonXS("digit");
 1005                Token.registerNonXS("graph");
 1006                Token.registerNonXS("lower");
 1007                Token.registerNonXS("print");
 1008                Token.registerNonXS("punct");
 1009                Token.registerNonXS("space");
 1010                Token.registerNonXS("upper");
 1011                Token.registerNonXS("word");
 1012                Token.registerNonXS("xdigit");
 1013            }         }         RangeToken tok = positive ? (RangeToken)Token.categories.get(name)
 1016            : (RangeToken)Token.categories2.get(name);
 1017                return tok;
 1019    }
 1020    static protected RangeToken getRange(String
  name, boolean positive, boolean xs) { 1021        RangeToken range = Token.getRange(name, positive);
 1022        if (xs && range != null && Token.isRegisterNonXS(name))
 1023            range = null;
 1024        return range;
 1025    }
 1026
 1027    static Hashtable
  nonxs = null; 1028
 1032    static protected void registerNonXS(String
  name) { 1033        if (Token.nonxs == null)
 1034            Token.nonxs = new Hashtable
  (); 1035        Token.nonxs.put(name, name);
 1036    }
 1037    static protected boolean isRegisterNonXS(String
  name) { 1038        if (Token.nonxs == null)
 1039            return false;
 1040                        return Token.nonxs.containsKey(name);
 1043    }
 1044
 1045    private static void setAlias(String
  newName, String  name, boolean positive) { 1046        Token t1 = (Token)Token.categories.get(name);
 1047        Token t2 = (Token)Token.categories2.get(name);
 1048        if (positive) {
 1049            Token.categories.put(newName, t1);
 1050            Token.categories2.put(newName, t2);
 1051        } else {
 1052            Token.categories2.put(newName, t1);
 1053            Token.categories.put(newName, t2);
 1054        }
 1055    }
 1056
 1057
 1059    static final String
  viramaString = 1060    "\u094D"    +"\u09CD"    +"\u0A4D"    +"\u0ACD"    +"\u0B4D"    +"\u0BCD"    +"\u0C4D"    +"\u0CCD"    +"\u0D4D"    +"\u0E3A"    +"\u0F84";
 1072    static private Token token_grapheme = null;
 1073    static synchronized Token getGraphemePattern() {
 1074        if (Token.token_grapheme != null)
 1075            return Token.token_grapheme;
 1076
 1077        Token base_char = Token.createRange();          base_char.mergeRanges(Token.getRange("ASSIGNED", true));
 1079        base_char.subtractRanges(Token.getRange("M", true));
 1080        base_char.subtractRanges(Token.getRange("C", true));
 1081
 1082        Token virama = Token.createRange();
 1083        for (int i = 0;  i < Token.viramaString.length();  i ++) {
 1084            int ch = viramaString.charAt(i);
 1085            virama.addRange(i, i);
 1086        }
 1087
 1088        Token combiner_wo_virama = Token.createRange();
 1089        combiner_wo_virama.mergeRanges(Token.getRange("M", true));
 1090        combiner_wo_virama.addRange(0x1160, 0x11ff);         combiner_wo_virama.addRange(0xff9e, 0xff9f);
 1093        Token left = Token.createUnion();               left.addChild(base_char);
 1095        left.addChild(Token.token_empty);
 1096
 1097        Token foo = Token.createUnion();
 1098        foo.addChild(Token.createConcat(virama, Token.getRange("L", true)));
 1099        foo.addChild(combiner_wo_virama);
 1100
 1101        foo = Token.createClosure(foo);
 1102
 1103        foo = Token.createConcat(left, foo);
 1104
 1105        Token.token_grapheme = foo;
 1106        return Token.token_grapheme;
 1107    }
 1108
 1109
 1112    static private Token token_ccs = null;
 1113    static synchronized Token getCombiningCharacterSequence() {
 1114        if (Token.token_ccs != null)
 1115            return Token.token_ccs;
 1116
 1117        Token foo = Token.createClosure(Token.getRange("M", true));         foo = Token.createConcat(Token.getRange("M", false), foo);         Token.token_ccs = foo;
 1120        return Token.token_ccs;
 1121    }
 1122
 1123
 1125
 1129    static class StringToken extends Token implements java.io.Serializable
  { 1130        String
  string; 1131        int refNumber;
 1132
 1133        StringToken(int type, String
  str, int n) { 1134            super(type);
 1135            this.string = str;
 1136            this.refNumber = n;
 1137        }
 1138
 1139        int getReferenceNumber() {                          return this.refNumber;
 1141        }
 1142        String
  getString() {                                return this.string; 1144        }
 1145
 1146        public String
  toString(int options) { 1147            if (this.type == BACKREFERENCE)
 1148                return "\\"+this.refNumber;
 1149            else
 1150                return REUtil.quoteMeta(this.string);
 1151        }
 1152    }
 1153
 1154
 1157    static class ConcatToken extends Token implements java.io.Serializable
  { 1158        Token child;
 1159        Token child2;
 1160
 1161        ConcatToken(Token t1, Token t2) {
 1162            super(Token.CONCAT);
 1163            this.child = t1;
 1164            this.child2 = t2;
 1165        }
 1166
 1167        int size() {
 1168            return 2;
 1169        }
 1170        Token getChild(int index) {
 1171            return index == 0 ? this.child : this.child2;
 1172        }
 1173
 1174        public String
  toString(int options) { 1175            String
  ret; 1176            if (this.child2.type == CLOSURE && this.child2.getChild(0) == this.child) {
 1177                ret = this.child.toString(options)+"+";
 1178            } else if (this.child2.type == NONGREEDYCLOSURE && this.child2.getChild(0) == this.child) {
 1179                ret = this.child.toString(options)+"+?";
 1180            } else
 1181                ret = this.child.toString(options)+this.child2.toString(options);
 1182            return ret;
 1183        }
 1184    }
 1185
 1186
 1189    static class CharToken extends Token implements java.io.Serializable
  { 1190        int chardata;
 1191
 1192        CharToken(int type, int ch) {
 1193            super(type);
 1194            this.chardata = ch;
 1195        }
 1196
 1197        int getChar() {
 1198            return this.chardata;
 1199        }
 1200
 1201        public String
  toString(int options) { 1202            String
  ret; 1203            switch (this.type) {
 1204              case CHAR:
 1205                switch (this.chardata) {
 1206                  case '|':  case '*':  case '+':  case '?':
 1207                  case '(':  case ')':  case '.':  case '[':
 1208                  case '{':  case '\\':
 1209                    ret = "\\"+(char)this.chardata;
 1210                    break;
 1211                  case '\f':  ret = "\\f";  break;
 1212                  case '\n':  ret = "\\n";  break;
 1213                  case '\r':  ret = "\\r";  break;
 1214                  case '\t':  ret = "\\t";  break;
 1215                  case 0x1b:  ret = "\\e";  break;
 1216                                      default:
 1218                    if (this.chardata >= 0x10000) {
 1219                        String
  pre = "0"+Integer.toHexString(this.chardata); 1220                        ret = "\\v"+pre.substring(pre.length()-6, pre.length());
 1221                    } else
 1222                        ret = ""+(char)this.chardata;
 1223                }
 1224                break;
 1225
 1226              case ANCHOR:
 1227                if (this == Token.token_linebeginning || this == Token.token_lineend)
 1228                    ret = ""+(char)this.chardata;
 1229                else
 1230                    ret = "\\"+(char)this.chardata;
 1231                break;
 1232
 1233              default:
 1234                ret = null;
 1235            }
 1236            return ret;
 1237        }
 1238
 1239        boolean match(int ch) {
 1240            if (this.type == CHAR) {
 1241                return ch == this.chardata;
 1242            } else
 1243                throw new RuntimeException
  ("NFAArrow#match(): Internal error: "+this.type); 1244        }
 1245    }
 1246
 1247
 1250    static class ClosureToken extends Token implements java.io.Serializable
  { 1251        int min;
 1252        int max;
 1253        Token child;
 1254
 1255        ClosureToken(int type, Token tok) {
 1256            super(type);
 1257            this.child = tok;
 1258            this.setMin(-1);
 1259            this.setMax(-1);
 1260        }
 1261
 1262        int size() {
 1263            return 1;
 1264        }
 1265        Token getChild(int index) {
 1266            return this.child;
 1267        }
 1268
 1269        final void setMin(int min) {
 1270            this.min = min;
 1271        }
 1272        final void setMax(int max) {
 1273            this.max = max;
 1274        }
 1275        final int getMin() {
 1276            return this.min;
 1277        }
 1278        final int getMax() {
 1279            return this.max;
 1280        }
 1281
 1282        public String
  toString(int options) { 1283            String
  ret; 1284            if (this.type == CLOSURE) {
 1285                if (this.getMin() < 0 && this.getMax() < 0) {
 1286                    ret = this.child.toString(options)+"*";
 1287                } else if (this.getMin() == this.getMax()) {
 1288                    ret = this.child.toString(options)+"{"+this.getMin()+"}";
 1289                } else if (this.getMin() >= 0 && this.getMax() >= 0) {
 1290                    ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}";
 1291                } else if (this.getMin() >= 0 && this.getMax() < 0) {
 1292                    ret = this.child.toString(options)+"{"+this.getMin()+",}";
 1293                } else
 1294                    throw new RuntimeException
  ("Token#toString(): CLOSURE " 1295                                               +this.getMin()+", "+this.getMax());
 1296            } else {
 1297                if (this.getMin() < 0 && this.getMax() < 0) {
 1298                    ret = this.child.toString(options)+"*?";
 1299                } else if (this.getMin() == this.getMax()) {
 1300                    ret = this.child.toString(options)+"{"+this.getMin()+"}?";
 1301                } else if (this.getMin() >= 0 && this.getMax() >= 0) {
 1302                    ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}?";
 1303                } else if (this.getMin() >= 0 && this.getMax() < 0) {
 1304                    ret = this.child.toString(options)+"{"+this.getMin()+",}?";
 1305                } else
 1306                    throw new RuntimeException
  ("Token#toString(): NONGREEDYCLOSURE " 1307                                               +this.getMin()+", "+this.getMax());
 1308            }
 1309            return ret;
 1310        }
 1311    }
 1312
 1313
 1316    static class ParenToken extends Token implements java.io.Serializable
  { 1317        Token child;
 1318        int parennumber;
 1319
 1320        ParenToken(int type, Token tok, int paren) {
 1321            super(type);
 1322            this.child = tok;
 1323            this.parennumber = paren;
 1324        }
 1325
 1326        int size() {
 1327            return 1;
 1328        }
 1329        Token getChild(int index) {
 1330            return this.child;
 1331        }
 1332
 1333        int getParenNumber() {
 1334            return this.parennumber;
 1335        }
 1336
 1337        public String
  toString(int options) { 1338            String
  ret = null; 1339            switch (this.type) {
 1340              case PAREN:
 1341                if (this.parennumber == 0) {
 1342                    ret = "(?:"+this.child.toString(options)+")";
 1343                } else {
 1344                    ret = "("+this.child.toString(options)+")";
 1345                }
 1346                break;
 1347
 1348              case LOOKAHEAD:
 1349                ret = "(?="+this.child.toString(options)+")";
 1350                break;
 1351              case NEGATIVELOOKAHEAD:
 1352                ret = "(?!"+this.child.toString(options)+")";
 1353                break;
 1354              case LOOKBEHIND:
 1355                ret = "(?<="+this.child.toString(options)+")";
 1356                break;
 1357              case NEGATIVELOOKBEHIND:
 1358                ret = "(?<!"+this.child.toString(options)+")";
 1359                break;
 1360              case INDEPENDENT:
 1361                ret = "(?>"+this.child.toString(options)+")";
 1362                break;
 1363            }
 1364            return ret;
 1365        }
 1366    }
 1367
 1368
 1371    static class ConditionToken extends Token implements java.io.Serializable
  { 1372        int refNumber;
 1373        Token condition;
 1374        Token yes;
 1375        Token no;
 1376        ConditionToken(int refno, Token cond, Token yespat, Token nopat) {
 1377            super(Token.CONDITION);
 1378            this.refNumber = refno;
 1379            this.condition = cond;
 1380            this.yes = yespat;
 1381            this.no = nopat;
 1382        }
 1383        int size() {
 1384            return this.no == null ? 1 : 2;
 1385        }
 1386        Token getChild(int index) {
 1387            if (index == 0)  return this.yes;
 1388            if (index == 1)  return this.no;
 1389            throw new RuntimeException
  ("Internal Error: "+index); 1390        }
 1391
 1392        public String
  toString(int options) { 1393            String
  ret; 1394            if (refNumber > 0) {
 1395                ret = "(?("+refNumber+")";
 1396            } else if (this.condition.type == Token.ANCHOR) {
 1397                ret = "(?("+this.condition+")";
 1398            } else {
 1399                ret = "(?"+this.condition;
 1400            }
 1401
 1402            if (this.no == null) {
 1403                ret += this.yes+")";
 1404            } else {
 1405                ret += this.yes+"|"+this.no+")";
 1406            }
 1407            return ret;
 1408        }
 1409    }
 1410
 1411
 1414    static class ModifierToken extends Token implements java.io.Serializable
  { 1415        Token child;
 1416        int add;
 1417        int mask;
 1418
 1419        ModifierToken(Token tok, int add, int mask) {
 1420            super(Token.MODIFIERGROUP);
 1421            this.child = tok;
 1422            this.add = add;
 1423            this.mask = mask;
 1424        }
 1425
 1426        int size() {
 1427            return 1;
 1428        }
 1429        Token getChild(int index) {
 1430            return this.child;
 1431        }
 1432
 1433        int getOptions() {
 1434            return this.add;
 1435        }
 1436        int getOptionsMask() {
 1437            return this.mask;
 1438        }
 1439
 1440        public String
  toString(int options) { 1441            return "(?"
 1442                +(this.add == 0 ? "" : REUtil.createOptionString(this.add))
 1443                +(this.mask == 0 ? "" : REUtil.createOptionString(this.mask))
 1444                +":"
 1445                +this.child.toString(options)
 1446                +")";
 1447        }
 1448    }
 1449
 1450
 1454    static class UnionToken extends Token implements java.io.Serializable
  { 1455        Vector
  children; 1456
 1457        UnionToken(int type) {
 1458            super(type);
 1459        }
 1460
 1461        void addChild(Token tok) {
 1462            if (tok == null)  return;
 1463            if (this.children == null)  this.children = new Vector
  (); 1464            if (this.type == UNION) {
 1465                this.children.addElement(tok);
 1466                return;
 1467            }
 1468                                                            if (tok.type == CONCAT) {
 1470                for (int i = 0;  i < tok.size();  i ++)
 1471                    this.addChild(tok.getChild(i));                 return;
 1473            }
 1474            int size = this.children.size();
 1475            if (size == 0) {
 1476                this.children.addElement(tok);
 1477                return;
 1478            }
 1479            Token previous = (Token)this.children.elementAt(size-1);
 1480            if (!((previous.type == CHAR || previous.type == STRING)
 1481                  && (tok.type == CHAR || tok.type == STRING))) {
 1482                this.children.addElement(tok);
 1483                return;
 1484            }
 1485
 1486
 1488            StringBuffer
  buffer; 1489            int nextMaxLength = (tok.type == CHAR ? 2 : tok.getString().length());
 1490            if (previous.type == CHAR) {                        buffer = new StringBuffer
  (2 + nextMaxLength); 1492                int ch = previous.getChar();
 1493                if (ch >= 0x10000)
 1494                    buffer.append(REUtil.decomposeToSurrogates(ch));
 1495                else
 1496                    buffer.append((char)ch);
 1497                previous = Token.createString(null);
 1498                this.children.setElementAt(previous, size-1);
 1499            } else {                                            buffer = new StringBuffer
  (previous.getString().length() + nextMaxLength); 1501                buffer.append(previous.getString());
 1502            }
 1503
 1504            if (tok.type == CHAR) {
 1505                int ch = tok.getChar();
 1506                if (ch >= 0x10000)
 1507                    buffer.append(REUtil.decomposeToSurrogates(ch));
 1508                else
 1509                    buffer.append((char)ch);
 1510            } else {
 1511                buffer.append(tok.getString());
 1512            }
 1513
 1514            ((StringToken)previous).string = new String
  (buffer); 1515        }
 1516
 1517        int size() {
 1518            return this.children == null ? 0 : this.children.size();
 1519        }
 1520        Token getChild(int index) {
 1521            return (Token)this.children.elementAt(index);
 1522        }
 1523
 1524        public String
  toString(int options) { 1525            String
  ret; 1526            if (this.type == CONCAT) {
 1527                if (this.children.size() == 2) {
 1528                    Token ch = this.getChild(0);
 1529                    Token ch2 = this.getChild(1);
 1530                    if (ch2.type == CLOSURE && ch2.getChild(0) == ch) {
 1531                        ret = ch.toString(options)+"+";
 1532                    } else if (ch2.type == NONGREEDYCLOSURE && ch2.getChild(0) == ch) {
 1533                        ret = ch.toString(options)+"+?";
 1534                    } else
 1535                        ret = ch.toString(options)+ch2.toString(options);
 1536                } else {
 1537                    StringBuffer
  sb = new StringBuffer  (); 1538                    for (int i = 0;  i < this.children.size();  i ++) {
 1539                        sb.append(((Token)this.children.elementAt(i)).toString(options));
 1540                    }
 1541                    ret = new String
  (sb); 1542                }
 1543                return ret;
 1544            }
 1545            if (this.children.size() == 2 && this.getChild(1).type == EMPTY) {
 1546                ret = this.getChild(0).toString(options)+"?";
 1547            } else if (this.children.size() == 2
 1548                       && this.getChild(0).type == EMPTY) {
 1549                ret = this.getChild(1).toString(options)+"??";
 1550            } else {
 1551                StringBuffer
  sb = new StringBuffer  (); 1552                sb.append(((Token)this.children.elementAt(0)).toString(options));
 1553                for (int i = 1;  i < this.children.size();  i ++) {
 1554                    sb.append((char)'|');
 1555                    sb.append(((Token)this.children.elementAt(i)).toString(options));
 1556                }
 1557                ret = new String
  (sb); 1558            }
 1559            return ret;
 1560        }
 1561    }
 1562}
 1563
                                                                                                                                                                                                             |                                                                       
 
 
 
 
 
                                                                                   Popular Tags                                                                                                                                                                                              |