1 57 58 package org.enhydra.apache.xerces.utils.regex; 59 60 61 64 final class RangeToken extends Token implements java.io.Serializable { 65 66 int[] ranges; 67 boolean sorted; 68 boolean compacted; 69 RangeToken icaseCache = null; 70 int[] map = null; 71 int nonMapIndex; 72 73 RangeToken(int type) { 74 super(type); 75 this.setSorted(false); 76 } 77 78 protected void addRange(int start, int end) { 80 this.icaseCache = null; 81 int r1, r2; 83 if (start <= end) { 84 r1 = start; 85 r2 = end; 86 } else { 87 r1 = end; 88 r2 = start; 89 } 90 91 int pos = 0; 92 if (this.ranges == null) { 93 this.ranges = new int[2]; 94 this.ranges[0] = r1; 95 this.ranges[1] = r2; 96 this.setSorted(true); 97 } else { 98 pos = this.ranges.length; 99 if (this.ranges[pos-1]+1 == r1) { 100 this.ranges[pos-1] = r2; 101 return; 102 } 103 int[] temp = new int[pos+2]; 104 System.arraycopy(this.ranges, 0, temp, 0, pos); 105 this.ranges = temp; 106 if (this.ranges[pos-1] >= r1) 107 this.setSorted(false); 108 this.ranges[pos++] = r1; 109 this.ranges[pos] = r2; 110 if (!this.sorted) 111 this.sortRanges(); 112 } 113 } 114 115 private final boolean isSorted() { 116 return this.sorted; 117 } 118 private final void setSorted(boolean sort) { 119 this.sorted = sort; 120 if (!sort) this.compacted = false; 121 } 122 private final boolean isCompacted() { 123 return this.compacted; 124 } 125 private final void setCompacted() { 126 this.compacted = true; 127 } 128 129 protected void sortRanges() { 130 if (this.isSorted()) 131 return; 132 if (this.ranges == null) 133 return; 134 136 for (int i = this.ranges.length-4; i >= 0; i -= 2) { 140 for (int j = 0; j <= i; j += 2) { 141 if (this.ranges[j] > this.ranges[j+2] 142 || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) { 143 int tmp; 144 tmp = this.ranges[j+2]; 145 this.ranges[j+2] = this.ranges[j]; 146 this.ranges[j] = tmp; 147 tmp = this.ranges[j+3]; 148 this.ranges[j+3] = this.ranges[j+1]; 149 this.ranges[j+1] = tmp; 150 } 151 } 152 } 153 this.setSorted(true); 154 } 155 156 159 protected void compactRanges() { 160 boolean DEBUG = false; 161 if (this.ranges == null || this.ranges.length <= 2) 162 return; 163 if (this.isCompacted()) 164 return; 165 int base = 0; int target = 0; 168 while (target < this.ranges.length) { 169 if (base != target) { 170 this.ranges[base] = this.ranges[target++]; 171 this.ranges[base+1] = this.ranges[target++]; 172 } else 173 target += 2; 174 int baseend = this.ranges[base+1]; 175 while (target < this.ranges.length) { 176 if (baseend+1 < this.ranges[target]) 177 break; 178 if (baseend+1 == this.ranges[target]) { 179 if (DEBUG) 180 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] 181 +", "+this.ranges[base+1] 182 +"], ["+this.ranges[target] 183 +", "+this.ranges[target+1] 184 +"] -> ["+this.ranges[base] 185 +", "+this.ranges[target+1] 186 +"]"); 187 this.ranges[base+1] = this.ranges[target+1]; 188 baseend = this.ranges[base+1]; 189 target += 2; 190 } else if (baseend >= this.ranges[target+1]) { 191 if (DEBUG) 192 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] 193 +", "+this.ranges[base+1] 194 +"], ["+this.ranges[target] 195 +", "+this.ranges[target+1] 196 +"] -> ["+this.ranges[base] 197 +", "+this.ranges[base+1] 198 +"]"); 199 target += 2; 200 } else if (baseend < this.ranges[target+1]) { 201 if (DEBUG) 202 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] 203 +", "+this.ranges[base+1] 204 +"], ["+this.ranges[target] 205 +", "+this.ranges[target+1] 206 +"] -> ["+this.ranges[base] 207 +", "+this.ranges[target+1] 208 +"]"); 209 this.ranges[base+1] = this.ranges[target+1]; 210 baseend = this.ranges[base+1]; 211 target += 2; 212 } else { 213 throw new RuntimeException ("Token#compactRanges(): Internel Error: [" 214 +this.ranges[base] 215 +","+this.ranges[base+1] 216 +"] ["+this.ranges[target] 217 +","+this.ranges[target+1]+"]"); 218 } 219 } base += 2; 221 } 222 223 if (base != this.ranges.length) { 224 int[] result = new int[base]; 225 System.arraycopy(this.ranges, 0, result, 0, base); 226 this.ranges = result; 227 } 228 this.setCompacted(); 229 } 230 231 protected void mergeRanges(Token token) { 232 if (token.type != this.type) 233 throw new IllegalArgumentException ("Token#mergeRanges(): Mismatched Type: "+token.type); 234 RangeToken tok = (RangeToken)token; 235 this.sortRanges(); 236 tok.sortRanges(); 237 if (tok.ranges == null) 238 return; 239 this.icaseCache = null; 240 this.setSorted(true); 241 if (this.ranges == null) { 242 this.ranges = new int[tok.ranges.length]; 243 System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length); 244 return; 245 } 246 int[] result = new int[this.ranges.length+tok.ranges.length]; 247 for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) { 248 if (i >= this.ranges.length) { 249 result[k++] = tok.ranges[j++]; 250 result[k++] = tok.ranges[j++]; 251 } else if (j >= tok.ranges.length) { 252 result[k++] = this.ranges[i++]; 253 result[k++] = this.ranges[i++]; 254 } else if (tok.ranges[j] < this.ranges[i] 255 || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) { 256 result[k++] = tok.ranges[j++]; 257 result[k++] = tok.ranges[j++]; 258 } else { 259 result[k++] = this.ranges[i++]; 260 result[k++] = this.ranges[i++]; 261 } 262 } 263 this.ranges = result; 264 } 265 266 protected void subtractRanges(Token token) { 267 if (token.type == NRANGE) { 268 this.intersectRanges(token); 269 return; 270 } 271 RangeToken tok = (RangeToken)token; 272 if (tok.ranges == null || this.ranges == null) 273 return; 274 this.icaseCache = null; 275 this.sortRanges(); 276 this.compactRanges(); 277 tok.sortRanges(); 278 tok.compactRanges(); 279 280 282 int[] result = new int[this.ranges.length+tok.ranges.length]; 283 int wp = 0, src = 0, sub = 0; 284 while (src < this.ranges.length && sub < tok.ranges.length) { 285 int srcbegin = this.ranges[src]; 286 int srcend = this.ranges[src+1]; 287 int subbegin = tok.ranges[sub]; 288 int subend = tok.ranges[sub+1]; 289 if (srcend < subbegin) { result[wp++] = this.ranges[src++]; 295 result[wp++] = this.ranges[src++]; 296 } else if (srcend >= subbegin 297 && srcbegin <= subend) { if (subbegin <= srcbegin && srcend <= subend) { 304 src += 2; 309 } else if (subbegin <= srcbegin) { 310 this.ranges[src] = subend+1; 315 sub += 2; 316 } else if (srcend <= subend) { 317 result[wp++] = srcbegin; 322 result[wp++] = subbegin-1; 323 src += 2; 324 } else { 325 result[wp++] = srcbegin; 330 result[wp++] = subbegin-1; 331 this.ranges[src] = subend+1; 332 sub += 2; 333 } 334 } else if (subend < srcbegin) { 335 sub += 2; 339 } else { 340 throw new RuntimeException ("Token#subtractRanges(): Internal Error: ["+this.ranges[src] 341 +","+this.ranges[src+1] 342 +"] - ["+tok.ranges[sub] 343 +","+tok.ranges[sub+1] 344 +"]"); 345 } 346 } 347 while (src < this.ranges.length) { 348 result[wp++] = this.ranges[src++]; 349 result[wp++] = this.ranges[src++]; 350 } 351 this.ranges = new int[wp]; 352 System.arraycopy(result, 0, this.ranges, 0, wp); 353 } 355 356 359 protected void intersectRanges(Token token) { 360 RangeToken tok = (RangeToken)token; 361 if (tok.ranges == null || this.ranges == null) 362 return; 363 this.icaseCache = null; 364 this.sortRanges(); 365 this.compactRanges(); 366 tok.sortRanges(); 367 tok.compactRanges(); 368 369 int[] result = new int[this.ranges.length+tok.ranges.length]; 370 int wp = 0, src1 = 0, src2 = 0; 371 while (src1 < this.ranges.length && src2 < tok.ranges.length) { 372 int src1begin = this.ranges[src1]; 373 int src1end = this.ranges[src1+1]; 374 int src2begin = tok.ranges[src2]; 375 int src2end = tok.ranges[src2+1]; 376 if (src1end < src2begin) { src1 += 2; 382 } else if (src1end >= src2begin 383 && src1begin <= src2end) { if (src2begin <= src2begin && src1end <= src2end) { 390 result[wp++] = src1begin; 395 result[wp++] = src1end; 396 src1 += 2; 397 } else if (src2begin <= src1begin) { 398 result[wp++] = src1begin; 403 result[wp++] = src2end; 404 this.ranges[src1] = src2end+1; 405 src2 += 2; 406 } else if (src1end <= src2end) { 407 result[wp++] = src2begin; 412 result[wp++] = src1end; 413 src1 += 2; 414 } else { 415 result[wp++] = src2begin; 420 result[wp++] = src2end; 421 this.ranges[src1] = src2end+1; 422 } 423 } else if (src2end < src1begin) { 424 src2 += 2; 428 } else { 429 throw new RuntimeException ("Token#intersectRanges(): Internal Error: [" 430 +this.ranges[src1] 431 +","+this.ranges[src1+1] 432 +"] & ["+tok.ranges[src2] 433 +","+tok.ranges[src2+1] 434 +"]"); 435 } 436 } 437 while (src1 < this.ranges.length) { 438 result[wp++] = this.ranges[src1++]; 439 result[wp++] = this.ranges[src1++]; 440 } 441 this.ranges = new int[wp]; 442 System.arraycopy(result, 0, this.ranges, 0, wp); 443 } 445 446 450 static Token complementRanges(Token token) { 451 if (token.type != RANGE && token.type != NRANGE) 452 throw new IllegalArgumentException ("Token#complementRanges(): must be RANGE: "+token.type); 453 RangeToken tok = (RangeToken)token; 454 tok.sortRanges(); 455 tok.compactRanges(); 456 int len = tok.ranges.length+2; 457 if (tok.ranges[0] == 0) 458 len -= 2; 459 int last = tok.ranges[tok.ranges.length-1]; 460 if (last == UTF16_MAX) 461 len -= 2; 462 RangeToken ret = Token.createRange(); 463 ret.ranges = new int[len]; 464 int wp = 0; 465 if (tok.ranges[0] > 0) { 466 ret.ranges[wp++] = 0; 467 ret.ranges[wp++] = tok.ranges[0]-1; 468 } 469 for (int i = 1; i < tok.ranges.length-2; i += 2) { 470 ret.ranges[wp++] = tok.ranges[i]+1; 471 ret.ranges[wp++] = tok.ranges[i+1]-1; 472 } 473 if (last != UTF16_MAX) { 474 ret.ranges[wp++] = last+1; 475 ret.ranges[wp] = UTF16_MAX; 476 } 477 ret.setCompacted(); 478 return ret; 479 } 480 481 synchronized RangeToken getCaseInsensitiveToken() { 482 if (this.icaseCache != null) 483 return this.icaseCache; 484 485 RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange(); 486 for (int i = 0; i < this.ranges.length; i += 2) { 487 for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) { 488 if (ch > 0xffff) 489 uppers.addRange(ch, ch); 490 else { 491 char uch = Character.toUpperCase((char)ch); 492 uppers.addRange(uch, uch); 493 } 494 } 495 } 496 RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange(); 497 for (int i = 0; i < uppers.ranges.length; i += 2) { 498 for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) { 499 if (ch > 0xffff) 500 lowers.addRange(ch, ch); 501 else { 502 char uch = Character.toUpperCase((char)ch); 503 lowers.addRange(uch, uch); 504 } 505 } 506 } 507 lowers.mergeRanges(uppers); 508 lowers.mergeRanges(this); 509 lowers.compactRanges(); 510 511 this.icaseCache = lowers; 512 return lowers; 513 } 514 515 void dumpRanges() { 516 System.err.print("RANGE: "); 517 if (this.ranges == null) 518 System.err.println(" NULL"); 519 for (int i = 0; i < this.ranges.length; i += 2) { 520 System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] "); 521 } 522 System.err.println(""); 523 } 524 525 boolean match(int ch) { 526 if (this.map == null) this.createMap(); 527 boolean ret; 528 if (this.type == RANGE) { 529 if (ch < MAPSIZE) 530 return (this.map[ch/32] & (1<<(ch&0x1f))) != 0; 531 ret = false; 532 for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) { 533 if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) 534 return true; 535 } 536 } else { 537 if (ch < MAPSIZE) 538 return (this.map[ch/32] & (1<<(ch&0x1f))) == 0; 539 ret = true; 540 for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) { 541 if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) 542 return false; 543 } 544 } 545 return ret; 546 } 547 548 private static final int MAPSIZE = 256; 549 private void createMap() { 550 int asize = MAPSIZE/32; this.map = new int[asize]; 552 this.nonMapIndex = this.ranges.length; 553 for (int i = 0; i < asize; i ++) this.map[i] = 0; 554 for (int i = 0; i < this.ranges.length; i += 2) { 555 int s = this.ranges[i]; 556 int e = this.ranges[i+1]; 557 if (s < MAPSIZE) { 558 for (int j = s; j <= e && j < MAPSIZE; j ++) 559 this.map[j/32] |= 1<<(j&0x1f); } else { 561 this.nonMapIndex = i; 562 break; 563 } 564 if (e >= MAPSIZE) { 565 this.nonMapIndex = i; 566 break; 567 } 568 } 569 } 571 572 public String toString(int options) { 573 String ret; 574 if (this.type == RANGE) { 575 if (this == Token.token_dot) 576 ret = "."; 577 else if (this == Token.token_0to9) 578 ret = "\\d"; 579 else if (this == Token.token_wordchars) 580 ret = "\\w"; 581 else if (this == Token.token_spaces) 582 ret = "\\s"; 583 else { 584 StringBuffer sb = new StringBuffer (); 585 sb.append("["); 586 for (int i = 0; i < this.ranges.length; i += 2) { 587 if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(","); 588 if (this.ranges[i] == this.ranges[i+1]) { 589 sb.append(escapeCharInCharClass(this.ranges[i])); 590 } else { 591 sb.append(escapeCharInCharClass(this.ranges[i])); 592 sb.append((char)'-'); 593 sb.append(escapeCharInCharClass(this.ranges[i+1])); 594 } 595 } 596 sb.append("]"); 597 ret = sb.toString(); 598 } 599 } else { 600 if (this == Token.token_not_0to9) 601 ret = "\\D"; 602 else if (this == Token.token_not_wordchars) 603 ret = "\\W"; 604 else if (this == Token.token_not_spaces) 605 ret = "\\S"; 606 else { 607 StringBuffer sb = new StringBuffer (); 608 sb.append("[^"); 609 for (int i = 0; i < this.ranges.length; i += 2) { 610 if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(","); 611 if (this.ranges[i] == this.ranges[i+1]) { 612 sb.append(escapeCharInCharClass(this.ranges[i])); 613 } else { 614 sb.append(escapeCharInCharClass(this.ranges[i])); 615 sb.append('-'); 616 sb.append(escapeCharInCharClass(this.ranges[i+1])); 617 } 618 } 619 sb.append("]"); 620 ret = sb.toString(); 621 } 622 } 623 return ret; 624 } 625 626 private static String escapeCharInCharClass(int ch) { 627 String ret; 628 switch (ch) { 629 case '[': case ']': case '-': case '^': 630 case ',': case '\\': 631 ret = "\\"+(char)ch; 632 break; 633 case '\f': ret = "\\f"; break; 634 case '\n': ret = "\\n"; break; 635 case '\r': ret = "\\r"; break; 636 case '\t': ret = "\\t"; break; 637 case 0x1b: ret = "\\e"; break; 638 default: 640 if (ch < 0x20) { 641 String pre = "0"+Integer.toHexString(ch); 642 ret = "\\x"+pre.substring(pre.length()-2, pre.length()); 643 } else if (ch >= 0x10000) { 644 String pre = "0"+Integer.toHexString(ch); 645 ret = "\\v"+pre.substring(pre.length()-6, pre.length()); 646 } else 647 ret = ""+(char)ch; 648 } 649 return ret; 650 } 651 652 } 653 | Popular Tags |