1 16 17 package org.apache.xerces.impl.xpath.regex; 18 19 26 final class RangeToken extends Token implements java.io.Serializable { 27 28 private static final long serialVersionUID = 3257568399592010545L; 29 30 int[] ranges; 31 boolean sorted; 32 boolean compacted; 33 RangeToken icaseCache = null; 34 int[] map = null; 35 int nonMapIndex; 36 37 RangeToken(int type) { 38 super(type); 39 this.setSorted(false); 40 } 41 42 protected void addRange(int start, int end) { 44 this.icaseCache = null; 45 int r1, r2; 47 if (start <= end) { 48 r1 = start; 49 r2 = end; 50 } else { 51 r1 = end; 52 r2 = start; 53 } 54 55 int pos = 0; 56 if (this.ranges == null) { 57 this.ranges = new int[2]; 58 this.ranges[0] = r1; 59 this.ranges[1] = r2; 60 this.setSorted(true); 61 } else { 62 pos = this.ranges.length; 63 if (this.ranges[pos-1]+1 == r1) { 64 this.ranges[pos-1] = r2; 65 return; 66 } 67 int[] temp = new int[pos+2]; 68 System.arraycopy(this.ranges, 0, temp, 0, pos); 69 this.ranges = temp; 70 if (this.ranges[pos-1] >= r1) 71 this.setSorted(false); 72 this.ranges[pos++] = r1; 73 this.ranges[pos] = r2; 74 if (!this.sorted) 75 this.sortRanges(); 76 } 77 } 78 79 private final boolean isSorted() { 80 return this.sorted; 81 } 82 private final void setSorted(boolean sort) { 83 this.sorted = sort; 84 if (!sort) this.compacted = false; 85 } 86 private final boolean isCompacted() { 87 return this.compacted; 88 } 89 private final void setCompacted() { 90 this.compacted = true; 91 } 92 93 protected void sortRanges() { 94 if (this.isSorted()) 95 return; 96 if (this.ranges == null) 97 return; 98 100 for (int i = this.ranges.length-4; i >= 0; i -= 2) { 104 for (int j = 0; j <= i; j += 2) { 105 if (this.ranges[j] > this.ranges[j+2] 106 || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) { 107 int tmp; 108 tmp = this.ranges[j+2]; 109 this.ranges[j+2] = this.ranges[j]; 110 this.ranges[j] = tmp; 111 tmp = this.ranges[j+3]; 112 this.ranges[j+3] = this.ranges[j+1]; 113 this.ranges[j+1] = tmp; 114 } 115 } 116 } 117 this.setSorted(true); 118 } 119 120 123 protected void compactRanges() { 124 boolean DEBUG = false; 125 if (this.ranges == null || this.ranges.length <= 2) 126 return; 127 if (this.isCompacted()) 128 return; 129 int base = 0; int target = 0; 132 while (target < this.ranges.length) { 133 if (base != target) { 134 this.ranges[base] = this.ranges[target++]; 135 this.ranges[base+1] = this.ranges[target++]; 136 } else 137 target += 2; 138 int baseend = this.ranges[base+1]; 139 while (target < this.ranges.length) { 140 if (baseend+1 < this.ranges[target]) 141 break; 142 if (baseend+1 == this.ranges[target]) { 143 if (DEBUG) 144 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] 145 +", "+this.ranges[base+1] 146 +"], ["+this.ranges[target] 147 +", "+this.ranges[target+1] 148 +"] -> ["+this.ranges[base] 149 +", "+this.ranges[target+1] 150 +"]"); 151 this.ranges[base+1] = this.ranges[target+1]; 152 baseend = this.ranges[base+1]; 153 target += 2; 154 } else if (baseend >= this.ranges[target+1]) { 155 if (DEBUG) 156 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] 157 +", "+this.ranges[base+1] 158 +"], ["+this.ranges[target] 159 +", "+this.ranges[target+1] 160 +"] -> ["+this.ranges[base] 161 +", "+this.ranges[base+1] 162 +"]"); 163 target += 2; 164 } else if (baseend < this.ranges[target+1]) { 165 if (DEBUG) 166 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base] 167 +", "+this.ranges[base+1] 168 +"], ["+this.ranges[target] 169 +", "+this.ranges[target+1] 170 +"] -> ["+this.ranges[base] 171 +", "+this.ranges[target+1] 172 +"]"); 173 this.ranges[base+1] = this.ranges[target+1]; 174 baseend = this.ranges[base+1]; 175 target += 2; 176 } else { 177 throw new RuntimeException ("Token#compactRanges(): Internel Error: [" 178 +this.ranges[base] 179 +","+this.ranges[base+1] 180 +"] ["+this.ranges[target] 181 +","+this.ranges[target+1]+"]"); 182 } 183 } base += 2; 185 } 186 187 if (base != this.ranges.length) { 188 int[] result = new int[base]; 189 System.arraycopy(this.ranges, 0, result, 0, base); 190 this.ranges = result; 191 } 192 this.setCompacted(); 193 } 194 195 protected void mergeRanges(Token token) { 196 RangeToken tok = (RangeToken)token; 197 this.sortRanges(); 198 tok.sortRanges(); 199 if (tok.ranges == null) 200 return; 201 this.icaseCache = null; 202 this.setSorted(true); 203 if (this.ranges == null) { 204 this.ranges = new int[tok.ranges.length]; 205 System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length); 206 return; 207 } 208 int[] result = new int[this.ranges.length+tok.ranges.length]; 209 for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) { 210 if (i >= this.ranges.length) { 211 result[k++] = tok.ranges[j++]; 212 result[k++] = tok.ranges[j++]; 213 } else if (j >= tok.ranges.length) { 214 result[k++] = this.ranges[i++]; 215 result[k++] = this.ranges[i++]; 216 } else if (tok.ranges[j] < this.ranges[i] 217 || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) { 218 result[k++] = tok.ranges[j++]; 219 result[k++] = tok.ranges[j++]; 220 } else { 221 result[k++] = this.ranges[i++]; 222 result[k++] = this.ranges[i++]; 223 } 224 } 225 this.ranges = result; 226 } 227 228 protected void subtractRanges(Token token) { 229 if (token.type == NRANGE) { 230 this.intersectRanges(token); 231 return; 232 } 233 RangeToken tok = (RangeToken)token; 234 if (tok.ranges == null || this.ranges == null) 235 return; 236 this.icaseCache = null; 237 this.sortRanges(); 238 this.compactRanges(); 239 tok.sortRanges(); 240 tok.compactRanges(); 241 242 244 int[] result = new int[this.ranges.length+tok.ranges.length]; 245 int wp = 0, src = 0, sub = 0; 246 while (src < this.ranges.length && sub < tok.ranges.length) { 247 int srcbegin = this.ranges[src]; 248 int srcend = this.ranges[src+1]; 249 int subbegin = tok.ranges[sub]; 250 int subend = tok.ranges[sub+1]; 251 if (srcend < subbegin) { result[wp++] = this.ranges[src++]; 257 result[wp++] = this.ranges[src++]; 258 } else if (srcend >= subbegin 259 && srcbegin <= subend) { if (subbegin <= srcbegin && srcend <= subend) { 266 src += 2; 271 } else if (subbegin <= srcbegin) { 272 this.ranges[src] = subend+1; 277 sub += 2; 278 } else if (srcend <= subend) { 279 result[wp++] = srcbegin; 284 result[wp++] = subbegin-1; 285 src += 2; 286 } else { 287 result[wp++] = srcbegin; 292 result[wp++] = subbegin-1; 293 this.ranges[src] = subend+1; 294 sub += 2; 295 } 296 } else if (subend < srcbegin) { 297 sub += 2; 301 } else { 302 throw new RuntimeException ("Token#subtractRanges(): Internal Error: ["+this.ranges[src] 303 +","+this.ranges[src+1] 304 +"] - ["+tok.ranges[sub] 305 +","+tok.ranges[sub+1] 306 +"]"); 307 } 308 } 309 while (src < this.ranges.length) { 310 result[wp++] = this.ranges[src++]; 311 result[wp++] = this.ranges[src++]; 312 } 313 this.ranges = new int[wp]; 314 System.arraycopy(result, 0, this.ranges, 0, wp); 315 } 317 318 321 protected void intersectRanges(Token token) { 322 RangeToken tok = (RangeToken)token; 323 if (tok.ranges == null || this.ranges == null) 324 return; 325 this.icaseCache = null; 326 this.sortRanges(); 327 this.compactRanges(); 328 tok.sortRanges(); 329 tok.compactRanges(); 330 331 int[] result = new int[this.ranges.length+tok.ranges.length]; 332 int wp = 0, src1 = 0, src2 = 0; 333 while (src1 < this.ranges.length && src2 < tok.ranges.length) { 334 int src1begin = this.ranges[src1]; 335 int src1end = this.ranges[src1+1]; 336 int src2begin = tok.ranges[src2]; 337 int src2end = tok.ranges[src2+1]; 338 if (src1end < src2begin) { src1 += 2; 344 } else if (src1end >= src2begin 345 && src1begin <= src2end) { if (src2begin <= src2begin && src1end <= src2end) { 352 result[wp++] = src1begin; 357 result[wp++] = src1end; 358 src1 += 2; 359 } else if (src2begin <= src1begin) { 360 result[wp++] = src1begin; 365 result[wp++] = src2end; 366 this.ranges[src1] = src2end+1; 367 src2 += 2; 368 } else if (src1end <= src2end) { 369 result[wp++] = src2begin; 374 result[wp++] = src1end; 375 src1 += 2; 376 } else { 377 result[wp++] = src2begin; 382 result[wp++] = src2end; 383 this.ranges[src1] = src2end+1; 384 } 385 } else if (src2end < src1begin) { 386 src2 += 2; 390 } else { 391 throw new RuntimeException ("Token#intersectRanges(): Internal Error: [" 392 +this.ranges[src1] 393 +","+this.ranges[src1+1] 394 +"] & ["+tok.ranges[src2] 395 +","+tok.ranges[src2+1] 396 +"]"); 397 } 398 } 399 while (src1 < this.ranges.length) { 400 result[wp++] = this.ranges[src1++]; 401 result[wp++] = this.ranges[src1++]; 402 } 403 this.ranges = new int[wp]; 404 System.arraycopy(result, 0, this.ranges, 0, wp); 405 } 407 408 412 static Token complementRanges(Token token) { 413 if (token.type != RANGE && token.type != NRANGE) 414 throw new IllegalArgumentException ("Token#complementRanges(): must be RANGE: "+token.type); 415 RangeToken tok = (RangeToken)token; 416 tok.sortRanges(); 417 tok.compactRanges(); 418 int len = tok.ranges.length+2; 419 if (tok.ranges[0] == 0) 420 len -= 2; 421 int last = tok.ranges[tok.ranges.length-1]; 422 if (last == UTF16_MAX) 423 len -= 2; 424 RangeToken ret = Token.createRange(); 425 ret.ranges = new int[len]; 426 int wp = 0; 427 if (tok.ranges[0] > 0) { 428 ret.ranges[wp++] = 0; 429 ret.ranges[wp++] = tok.ranges[0]-1; 430 } 431 for (int i = 1; i < tok.ranges.length-2; i += 2) { 432 ret.ranges[wp++] = tok.ranges[i]+1; 433 ret.ranges[wp++] = tok.ranges[i+1]-1; 434 } 435 if (last != UTF16_MAX) { 436 ret.ranges[wp++] = last+1; 437 ret.ranges[wp] = UTF16_MAX; 438 } 439 ret.setCompacted(); 440 return ret; 441 } 442 443 synchronized RangeToken getCaseInsensitiveToken() { 444 if (this.icaseCache != null) 445 return this.icaseCache; 446 447 RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange(); 448 for (int i = 0; i < this.ranges.length; i += 2) { 449 for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) { 450 if (ch > 0xffff) 451 uppers.addRange(ch, ch); 452 else { 453 char uch = Character.toUpperCase((char)ch); 454 uppers.addRange(uch, uch); 455 } 456 } 457 } 458 RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange(); 459 for (int i = 0; i < uppers.ranges.length; i += 2) { 460 for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) { 461 if (ch > 0xffff) 462 lowers.addRange(ch, ch); 463 else { 464 char uch = Character.toUpperCase((char)ch); 465 lowers.addRange(uch, uch); 466 } 467 } 468 } 469 lowers.mergeRanges(uppers); 470 lowers.mergeRanges(this); 471 lowers.compactRanges(); 472 473 this.icaseCache = lowers; 474 return lowers; 475 } 476 477 void dumpRanges() { 478 System.err.print("RANGE: "); 479 if (this.ranges == null) 480 System.err.println(" NULL"); 481 for (int i = 0; i < this.ranges.length; i += 2) { 482 System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] "); 483 } 484 System.err.println(""); 485 } 486 487 boolean match(int ch) { 488 if (this.map == null) this.createMap(); 489 boolean ret; 490 if (this.type == RANGE) { 491 if (ch < MAPSIZE) 492 return (this.map[ch/32] & (1<<(ch&0x1f))) != 0; 493 ret = false; 494 for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) { 495 if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) 496 return true; 497 } 498 } else { 499 if (ch < MAPSIZE) 500 return (this.map[ch/32] & (1<<(ch&0x1f))) == 0; 501 ret = true; 502 for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) { 503 if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) 504 return false; 505 } 506 } 507 return ret; 508 } 509 510 private static final int MAPSIZE = 256; 511 private void createMap() { 512 int asize = MAPSIZE/32; this.map = new int[asize]; 514 this.nonMapIndex = this.ranges.length; 515 for (int i = 0; i < asize; i ++) this.map[i] = 0; 516 for (int i = 0; i < this.ranges.length; i += 2) { 517 int s = this.ranges[i]; 518 int e = this.ranges[i+1]; 519 if (s < MAPSIZE) { 520 for (int j = s; j <= e && j < MAPSIZE; j ++) 521 this.map[j/32] |= 1<<(j&0x1f); } else { 523 this.nonMapIndex = i; 524 break; 525 } 526 if (e >= MAPSIZE) { 527 this.nonMapIndex = i; 528 break; 529 } 530 } 531 } 533 534 public String toString(int options) { 535 String ret; 536 if (this.type == RANGE) { 537 if (this == Token.token_dot) 538 ret = "."; 539 else if (this == Token.token_0to9) 540 ret = "\\d"; 541 else if (this == Token.token_wordchars) 542 ret = "\\w"; 543 else if (this == Token.token_spaces) 544 ret = "\\s"; 545 else { 546 StringBuffer sb = new StringBuffer (); 547 sb.append("["); 548 for (int i = 0; i < this.ranges.length; i += 2) { 549 if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(","); 550 if (this.ranges[i] == this.ranges[i+1]) { 551 sb.append(escapeCharInCharClass(this.ranges[i])); 552 } else { 553 sb.append(escapeCharInCharClass(this.ranges[i])); 554 sb.append((char)'-'); 555 sb.append(escapeCharInCharClass(this.ranges[i+1])); 556 } 557 } 558 sb.append("]"); 559 ret = sb.toString(); 560 } 561 } else { 562 if (this == Token.token_not_0to9) 563 ret = "\\D"; 564 else if (this == Token.token_not_wordchars) 565 ret = "\\W"; 566 else if (this == Token.token_not_spaces) 567 ret = "\\S"; 568 else { 569 StringBuffer sb = new StringBuffer (); 570 sb.append("[^"); 571 for (int i = 0; i < this.ranges.length; i += 2) { 572 if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(","); 573 if (this.ranges[i] == this.ranges[i+1]) { 574 sb.append(escapeCharInCharClass(this.ranges[i])); 575 } else { 576 sb.append(escapeCharInCharClass(this.ranges[i])); 577 sb.append('-'); 578 sb.append(escapeCharInCharClass(this.ranges[i+1])); 579 } 580 } 581 sb.append("]"); 582 ret = sb.toString(); 583 } 584 } 585 return ret; 586 } 587 588 private static String escapeCharInCharClass(int ch) { 589 String ret; 590 switch (ch) { 591 case '[': case ']': case '-': case '^': 592 case ',': case '\\': 593 ret = "\\"+(char)ch; 594 break; 595 case '\f': ret = "\\f"; break; 596 case '\n': ret = "\\n"; break; 597 case '\r': ret = "\\r"; break; 598 case '\t': ret = "\\t"; break; 599 case 0x1b: ret = "\\e"; break; 600 default: 602 if (ch < 0x20) { 603 String pre = "0"+Integer.toHexString(ch); 604 ret = "\\x"+pre.substring(pre.length()-2, pre.length()); 605 } else if (ch >= 0x10000) { 606 String pre = "0"+Integer.toHexString(ch); 607 ret = "\\v"+pre.substring(pre.length()-6, pre.length()); 608 } else 609 ret = ""+(char)ch; 610 } 611 return ret; 612 } 613 614 } 615 | Popular Tags |