1 57 58 package com.sun.org.apache.xerces.internal.impl.xpath.regex; 59 60 65 final class RangeToken extends Token implements java.io.Serializable { 66 67 int[] ranges; 68 boolean sorted; 69 boolean compacted; 70 RangeToken icaseCache = null; 71 int[] map = null; 72 int nonMapIndex; 73 74 RangeToken(int type) { 75 super(type); 76 this.setSorted(false); 77 } 78 79 protected void addRange(int start, int end) { 81 this.icaseCache = null; 82 int r1, r2; 84 if (start <= end) { 85 r1 = start; 86 r2 = end; 87 } else { 88 r1 = end; 89 r2 = start; 90 } 91 92 int pos = 0; 93 if (this.ranges == null) { 94 this.ranges = new int[2]; 95 this.ranges[0] = r1; 96 this.ranges[1] = r2; 97 this.setSorted(true); 98 } else { 99 pos = this.ranges.length; 100 if (this.ranges[pos-1]+1 == r1) { 101 this.ranges[pos-1] = r2; 102 return; 103 } 104 int[] temp = new int[pos+2]; 105 System.arraycopy(this.ranges, 0, temp, 0, pos); 106 this.ranges = temp; 107 if (this.ranges[pos-1] >= r1) 108 this.setSorted(false); 109 this.ranges[pos++] = r1; 110 this.ranges[pos] = r2; 111 if (!this.sorted) 112 this.sortRanges(); 113 } 114 } 115 116 private final boolean isSorted() { 117 return this.sorted; 118 } 119 private final void setSorted(boolean sort) { 120 this.sorted = sort; 121 if (!sort) this.compacted = false; 122 } 123 private final boolean isCompacted() { 124 return this.compacted; 125 } 126 private final void setCompacted() { 127 this.compacted = true; 128 } 129 130 protected void sortRanges() { 131 if (this.isSorted()) 132 return; 133 if (this.ranges == null) 134 return; 135 137 for (int i = this.ranges.length-4; i >= 0; i -= 2) { 141 for (int j = 0; j <= i; j += 2) { 142 if (this.ranges[j] > this.ranges[j+2] 143 || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) { 144 int tmp; 145 tmp = this.ranges[j+2]; 146 this.ranges[j+2] = this.ranges[j]; 147 this.ranges[j] = tmp; 148 tmp = this.ranges[j+3]; 149 this.ranges[j+3] = this.ranges[j+1]; 150 this.ranges[j+1] = tmp; 151 } 152 } 153 } 154 this.setSorted(true); 155 } 156 157 160 protected void compactRanges() { 161 boolean DEBUG = false; 162 if (this.ranges == null || this.ranges.length <= 2) 163 return; 164 if (this.isCompacted()) 165 return; 166 int base = 0; int target = 0; 169 while (target < this.ranges.length) { 170 if (base != target) { 171 this.ranges[base] = this.ranges[target++]; 172 this.ranges[base+1] = this.ranges[target++]; 173 } else 174 target += 2; 175 int baseend = this.ranges[base+1]; 176 while (target < this.ranges.length) { 177 if (baseend+1 < this.ranges[target]) 178 break; 179 if (baseend+1 == this.ranges[target]) { 180 if (DEBUG) 181 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] 182 +", "+this.ranges[base+1] 183 +"], ["+this.ranges[target] 184 +", "+this.ranges[target+1] 185 +"] -> ["+this.ranges[base] 186 +", "+this.ranges[target+1] 187 +"]"); 188 this.ranges[base+1] = this.ranges[target+1]; 189 baseend = this.ranges[base+1]; 190 target += 2; 191 } else if (baseend >= this.ranges[target+1]) { 192 if (DEBUG) 193 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] 194 +", "+this.ranges[base+1] 195 +"], ["+this.ranges[target] 196 +", "+this.ranges[target+1] 197 +"] -> ["+this.ranges[base] 198 +", "+this.ranges[base+1] 199 +"]"); 200 target += 2; 201 } else if (baseend < this.ranges[target+1]) { 202 if (DEBUG) 203 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] 204 +", "+this.ranges[base+1] 205 +"], ["+this.ranges[target] 206 +", "+this.ranges[target+1] 207 +"] -> ["+this.ranges[base] 208 +", "+this.ranges[target+1] 209 +"]"); 210 this.ranges[base+1] = this.ranges[target+1]; 211 baseend = this.ranges[base+1]; 212 target += 2; 213 } else { 214 throw new RuntimeException ("Token#compactRanges(): Internel Error: [" 215 +this.ranges[base] 216 +","+this.ranges[base+1] 217 +"] ["+this.ranges[target] 218 +","+this.ranges[target+1]+"]"); 219 } 220 } base += 2; 222 } 223 224 if (base != this.ranges.length) { 225 int[] result = new int[base]; 226 System.arraycopy(this.ranges, 0, result, 0, base); 227 this.ranges = result; 228 } 229 this.setCompacted(); 230 } 231 232 protected void mergeRanges(Token token) { 233 RangeToken tok = (RangeToken)token; 234 this.sortRanges(); 235 tok.sortRanges(); 236 if (tok.ranges == null) 237 return; 238 this.icaseCache = null; 239 this.setSorted(true); 240 if (this.ranges == null) { 241 this.ranges = new int[tok.ranges.length]; 242 System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length); 243 return; 244 } 245 int[] result = new int[this.ranges.length+tok.ranges.length]; 246 for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) { 247 if (i >= this.ranges.length) { 248 result[k++] = tok.ranges[j++]; 249 result[k++] = tok.ranges[j++]; 250 } else if (j >= tok.ranges.length) { 251 result[k++] = this.ranges[i++]; 252 result[k++] = this.ranges[i++]; 253 } else if (tok.ranges[j] < this.ranges[i] 254 || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) { 255 result[k++] = tok.ranges[j++]; 256 result[k++] = tok.ranges[j++]; 257 } else { 258 result[k++] = this.ranges[i++]; 259 result[k++] = this.ranges[i++]; 260 } 261 } 262 this.ranges = result; 263 } 264 265 protected void subtractRanges(Token token) { 266 if (token.type == NRANGE) { 267 this.intersectRanges(token); 268 return; 269 } 270 RangeToken tok = (RangeToken)token; 271 if (tok.ranges == null || this.ranges == null) 272 return; 273 this.icaseCache = null; 274 this.sortRanges(); 275 this.compactRanges(); 276 tok.sortRanges(); 277 tok.compactRanges(); 278 279 281 int[] result = new int[this.ranges.length+tok.ranges.length]; 282 int wp = 0, src = 0, sub = 0; 283 while (src < this.ranges.length && sub < tok.ranges.length) { 284 int srcbegin = this.ranges[src]; 285 int srcend = this.ranges[src+1]; 286 int subbegin = tok.ranges[sub]; 287 int subend = tok.ranges[sub+1]; 288 if (srcend < subbegin) { result[wp++] = this.ranges[src++]; 294 result[wp++] = this.ranges[src++]; 295 } else if (srcend >= subbegin 296 && srcbegin <= subend) { if (subbegin <= srcbegin && srcend <= subend) { 303 src += 2; 308 } else if (subbegin <= srcbegin) { 309 this.ranges[src] = subend+1; 314 sub += 2; 315 } else if (srcend <= subend) { 316 result[wp++] = srcbegin; 321 result[wp++] = subbegin-1; 322 src += 2; 323 } else { 324 result[wp++] = srcbegin; 329 result[wp++] = subbegin-1; 330 this.ranges[src] = subend+1; 331 sub += 2; 332 } 333 } else if (subend < srcbegin) { 334 sub += 2; 338 } else { 339 throw new RuntimeException ("Token#subtractRanges(): Internal Error: ["+this.ranges[src] 340 +","+this.ranges[src+1] 341 +"] - ["+tok.ranges[sub] 342 +","+tok.ranges[sub+1] 343 +"]"); 344 } 345 } 346 while (src < this.ranges.length) { 347 result[wp++] = this.ranges[src++]; 348 result[wp++] = this.ranges[src++]; 349 } 350 this.ranges = new int[wp]; 351 System.arraycopy(result, 0, this.ranges, 0, wp); 352 } 354 355 358 protected void intersectRanges(Token token) { 359 RangeToken tok = (RangeToken)token; 360 if (tok.ranges == null || this.ranges == null) 361 return; 362 this.icaseCache = null; 363 this.sortRanges(); 364 this.compactRanges(); 365 tok.sortRanges(); 366 tok.compactRanges(); 367 368 int[] result = new int[this.ranges.length+tok.ranges.length]; 369 int wp = 0, src1 = 0, src2 = 0; 370 while (src1 < this.ranges.length && src2 < tok.ranges.length) { 371 int src1begin = this.ranges[src1]; 372 int src1end = this.ranges[src1+1]; 373 int src2begin = tok.ranges[src2]; 374 int src2end = tok.ranges[src2+1]; 375 if (src1end < src2begin) { src1 += 2; 381 } else if (src1end >= src2begin 382 && src1begin <= src2end) { if (src2begin <= src2begin && src1end <= src2end) { 389 result[wp++] = src1begin; 394 result[wp++] = src1end; 395 src1 += 2; 396 } else if (src2begin <= src1begin) { 397 result[wp++] = src1begin; 402 result[wp++] = src2end; 403 this.ranges[src1] = src2end+1; 404 src2 += 2; 405 } else if (src1end <= src2end) { 406 result[wp++] = src2begin; 411 result[wp++] = src1end; 412 src1 += 2; 413 } else { 414 result[wp++] = src2begin; 419 result[wp++] = src2end; 420 this.ranges[src1] = src2end+1; 421 } 422 } else if (src2end < src1begin) { 423 src2 += 2; 427 } else { 428 throw new RuntimeException ("Token#intersectRanges(): Internal Error: [" 429 +this.ranges[src1] 430 +","+this.ranges[src1+1] 431 +"] & ["+tok.ranges[src2] 432 +","+tok.ranges[src2+1] 433 +"]"); 434 } 435 } 436 while (src1 < this.ranges.length) { 437 result[wp++] = this.ranges[src1++]; 438 result[wp++] = this.ranges[src1++]; 439 } 440 this.ranges = new int[wp]; 441 System.arraycopy(result, 0, this.ranges, 0, wp); 442 } 444 445 449 static Token complementRanges(Token token) { 450 if (token.type != RANGE && token.type != NRANGE) 451 throw new IllegalArgumentException ("Token#complementRanges(): must be RANGE: "+token.type); 452 RangeToken tok = (RangeToken)token; 453 tok.sortRanges(); 454 tok.compactRanges(); 455 int len = tok.ranges.length+2; 456 if (tok.ranges[0] == 0) 457 len -= 2; 458 int last = tok.ranges[tok.ranges.length-1]; 459 if (last == UTF16_MAX) 460 len -= 2; 461 RangeToken ret = Token.createRange(); 462 ret.ranges = new int[len]; 463 int wp = 0; 464 if (tok.ranges[0] > 0) { 465 ret.ranges[wp++] = 0; 466 ret.ranges[wp++] = tok.ranges[0]-1; 467 } 468 for (int i = 1; i < tok.ranges.length-2; i += 2) { 469 ret.ranges[wp++] = tok.ranges[i]+1; 470 ret.ranges[wp++] = tok.ranges[i+1]-1; 471 } 472 if (last != UTF16_MAX) { 473 ret.ranges[wp++] = last+1; 474 ret.ranges[wp] = UTF16_MAX; 475 } 476 ret.setCompacted(); 477 return ret; 478 } 479 480 synchronized RangeToken getCaseInsensitiveToken() { 481 if (this.icaseCache != null) 482 return this.icaseCache; 483 484 RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange(); 485 for (int i = 0; i < this.ranges.length; i += 2) { 486 for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) { 487 if (ch > 0xffff) 488 uppers.addRange(ch, ch); 489 else { 490 char uch = Character.toUpperCase((char)ch); 491 uppers.addRange(uch, uch); 492 } 493 } 494 } 495 RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange(); 496 for (int i = 0; i < uppers.ranges.length; i += 2) { 497 for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) { 498 if (ch > 0xffff) 499 lowers.addRange(ch, ch); 500 else { 501 char uch = Character.toUpperCase((char)ch); 502 lowers.addRange(uch, uch); 503 } 504 } 505 } 506 lowers.mergeRanges(uppers); 507 lowers.mergeRanges(this); 508 lowers.compactRanges(); 509 510 this.icaseCache = lowers; 511 return lowers; 512 } 513 514 void dumpRanges() { 515 System.err.print("RANGE: "); 516 if (this.ranges == null) 517 System.err.println(" NULL"); 518 for (int i = 0; i < this.ranges.length; i += 2) { 519 System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] "); 520 } 521 System.err.println(""); 522 } 523 524 boolean match(int ch) { 525 if (this.map == null) this.createMap(); 526 boolean ret; 527 if (this.type == RANGE) { 528 if (ch < MAPSIZE) 529 return (this.map[ch/32] & (1<<(ch&0x1f))) != 0; 530 ret = false; 531 for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) { 532 if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) 533 return true; 534 } 535 } else { 536 if (ch < MAPSIZE) 537 return (this.map[ch/32] & (1<<(ch&0x1f))) == 0; 538 ret = true; 539 for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) { 540 if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) 541 return false; 542 } 543 } 544 return ret; 545 } 546 547 private static final int MAPSIZE = 256; 548 private void createMap() { 549 int asize = MAPSIZE/32; this.map = new int[asize]; 551 this.nonMapIndex = this.ranges.length; 552 for (int i = 0; i < asize; i ++) this.map[i] = 0; 553 for (int i = 0; i < this.ranges.length; i += 2) { 554 int s = this.ranges[i]; 555 int e = this.ranges[i+1]; 556 if (s < MAPSIZE) { 557 for (int j = s; j <= e && j < MAPSIZE; j ++) 558 this.map[j/32] |= 1<<(j&0x1f); } else { 560 this.nonMapIndex = i; 561 break; 562 } 563 if (e >= MAPSIZE) { 564 this.nonMapIndex = i; 565 break; 566 } 567 } 568 } 570 571 public String toString(int options) { 572 String ret; 573 if (this.type == RANGE) { 574 if (this == Token.token_dot) 575 ret = "."; 576 else if (this == Token.token_0to9) 577 ret = "\\d"; 578 else if (this == Token.token_wordchars) 579 ret = "\\w"; 580 else if (this == Token.token_spaces) 581 ret = "\\s"; 582 else { 583 StringBuffer sb = new StringBuffer (); 584 sb.append("["); 585 for (int i = 0; i < this.ranges.length; i += 2) { 586 if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(","); 587 if (this.ranges[i] == this.ranges[i+1]) { 588 sb.append(escapeCharInCharClass(this.ranges[i])); 589 } else { 590 sb.append(escapeCharInCharClass(this.ranges[i])); 591 sb.append((char)'-'); 592 sb.append(escapeCharInCharClass(this.ranges[i+1])); 593 } 594 } 595 sb.append("]"); 596 ret = sb.toString(); 597 } 598 } else { 599 if (this == Token.token_not_0to9) 600 ret = "\\D"; 601 else if (this == Token.token_not_wordchars) 602 ret = "\\W"; 603 else if (this == Token.token_not_spaces) 604 ret = "\\S"; 605 else { 606 StringBuffer sb = new StringBuffer (); 607 sb.append("[^"); 608 for (int i = 0; i < this.ranges.length; i += 2) { 609 if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(","); 610 if (this.ranges[i] == this.ranges[i+1]) { 611 sb.append(escapeCharInCharClass(this.ranges[i])); 612 } else { 613 sb.append(escapeCharInCharClass(this.ranges[i])); 614 sb.append('-'); 615 sb.append(escapeCharInCharClass(this.ranges[i+1])); 616 } 617 } 618 sb.append("]"); 619 ret = sb.toString(); 620 } 621 } 622 return ret; 623 } 624 625 private static String escapeCharInCharClass(int ch) { 626 String ret; 627 switch (ch) { 628 case '[': case ']': case '-': case '^': 629 case ',': case '\\': 630 ret = "\\"+(char)ch; 631 break; 632 case '\f': ret = "\\f"; break; 633 case '\n': ret = "\\n"; break; 634 case '\r': ret = "\\r"; break; 635 case '\t': ret = "\\t"; break; 636 case 0x1b: ret = "\\e"; break; 637 default: 639 if (ch < 0x20) { 640 String pre = "0"+Integer.toHexString(ch); 641 ret = "\\x"+pre.substring(pre.length()-2, pre.length()); 642 } else if (ch >= 0x10000) { 643 String pre = "0"+Integer.toHexString(ch); 644 ret = "\\v"+pre.substring(pre.length()-6, pre.length()); 645 } else 646 ret = ""+(char)ch; 647 } 648 return ret; 649 } 650 651 } 652 | Popular Tags |