1 9 package net.killingar; 10 11 import java.util.HashMap ; 12 13 48 49 public class Diff { 50 51 58 public Diff(Object [] a,Object [] b) { 59 HashMap h = new HashMap (a.length + b.length); 60 filevec[0] = new file_data(a,h); 61 filevec[1] = new file_data(b,h); 62 } 63 64 66 private int equiv_max = 1; 67 68 71 public boolean heuristic = false; 72 73 75 public boolean no_discards = false; 76 77 private int[] xvec, yvec; 78 private int[] fdiag; 82 private int[] bdiag; 86 private int fdiagoff, bdiagoff; 87 private final file_data[] filevec = new file_data[2]; 88 private int cost; 89 90 113 114 private int diag (int xoff, int xlim, int yoff, int ylim) { 115 final int[] fd = fdiag; final int[] bd = bdiag; final int[] xv = xvec; final int[] yv = yvec; final int dmin = xoff - ylim; final int dmax = xlim - yoff; final int fmid = xoff - yoff; final int bmid = xlim - ylim; int fmin = fmid, fmax = fmid; int bmin = bmid, bmax = bmid; 127 final boolean odd = (fmid - bmid & 1) != 0; 128 129 fd[fdiagoff + fmid] = xoff; 130 bd[bdiagoff + bmid] = xlim; 131 132 for (int c = 1;; ++c) 133 { 134 int d; 135 boolean big_snake = false; 136 137 138 if (fmin > dmin) 139 fd[fdiagoff + --fmin - 1] = -1; 140 else 141 ++fmin; 142 if (fmax < dmax) 143 fd[fdiagoff + ++fmax + 1] = -1; 144 else 145 --fmax; 146 for (d = fmax; d >= fmin; d -= 2) 147 { 148 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1]; 149 150 if (tlo >= thi) 151 x = tlo + 1; 152 else 153 x = thi; 154 oldx = x; 155 y = x - d; 156 while (x < xlim && y < ylim && xv[x] == yv[y]) { 157 ++x; ++y; 158 } 159 if (x - oldx > 20) 160 big_snake = true; 161 fd[fdiagoff + d] = x; 162 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) 163 { 164 cost = 2 * c - 1; 165 return d; 166 } 167 } 168 169 170 if (bmin > dmin) 171 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE; 172 else 173 ++bmin; 174 if (bmax < dmax) 175 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE; 176 else 177 --bmax; 178 for (d = bmax; d >= bmin; d -= 2) 179 { 180 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1]; 181 182 if (tlo < thi) 183 x = tlo; 184 else 185 x = thi - 1; 186 oldx = x; 187 y = x - d; 188 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) { 189 --x; --y; 190 } 191 if (oldx - x > 20) 192 big_snake = true; 193 bd[bdiagoff + d] = x; 194 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) 195 { 196 cost = 2 * c; 197 return d; 198 } 199 } 200 201 208 209 if (c > 200 && big_snake && heuristic) 210 { 211 int best = 0; 212 int bestpos = -1; 213 214 for (d = fmax; d >= fmin; d -= 2) 215 { 216 int dd = d - fmid; 217 if ((fd[fdiagoff + d] - xoff)*2 - dd > 12 * (c + (dd > 0 ? dd : -dd))) 218 { 219 if (fd[fdiagoff + d] * 2 - dd > best 220 && fd[fdiagoff + d] - xoff > 20 221 && fd[fdiagoff + d] - d - yoff > 20) 222 { 223 int k; 224 int x = fd[fdiagoff + d]; 225 226 228 for (k = 1; k <= 20; k++) 229 if (xvec[x - k] != yvec[x - d - k]) 230 break; 231 232 if (k == 21) 233 { 234 best = fd[fdiagoff + d] * 2 - dd; 235 bestpos = d; 236 } 237 } 238 } 239 } 240 if (best > 0) 241 { 242 cost = 2 * c - 1; 243 return bestpos; 244 } 245 246 best = 0; 247 for (d = bmax; d >= bmin; d -= 2) 248 { 249 int dd = d - bmid; 250 if ((xlim - bd[bdiagoff + d])*2 + dd > 12 * (c + (dd > 0 ? dd : -dd))) 251 { 252 if ((xlim - bd[bdiagoff + d]) * 2 + dd > best 253 && xlim - bd[bdiagoff + d] > 20 254 && ylim - (bd[bdiagoff + d] - d) > 20) 255 { 256 258 int k; 259 int x = bd[bdiagoff + d]; 260 261 for (k = 0; k < 20; k++) 262 if (xvec[x + k] != yvec[x - d + k]) 263 break; 264 if (k == 20) 265 { 266 best = (xlim - bd[bdiagoff + d]) * 2 + dd; 267 bestpos = d; 268 } 269 } 270 } 271 } 272 if (best > 0) 273 { 274 cost = 2 * c - 1; 275 return bestpos; 276 } 277 } 278 } 279 } 280 281 291 292 private void compareseq (int xoff, int xlim, int yoff, int ylim) { 293 294 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) { 295 ++xoff; ++yoff; 296 } 297 298 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) { 299 --xlim; --ylim; 300 } 301 302 303 if (xoff == xlim) 304 while (yoff < ylim) 305 filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true; 306 else if (yoff == ylim) 307 while (xoff < xlim) 308 filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true; 309 else 310 { 311 312 313 int d = diag (xoff, xlim, yoff, ylim); 314 int c = cost; 315 int f = fdiag[fdiagoff + d]; 316 int b = bdiag[bdiagoff + d]; 317 318 if (c == 1) 319 { 320 324 throw new IllegalArgumentException ("Empty subsequence"); 325 } 326 else 327 { 328 329 compareseq (xoff, b, yoff, b - d); 330 334 compareseq (b, xlim, b - d, ylim); 335 } 336 } 337 } 338 339 341 342 private void discard_confusing_lines() { 343 filevec[0].discard_confusing_lines(filevec[1]); 344 filevec[1].discard_confusing_lines(filevec[0]); 345 } 346 347 private boolean inhibit = false; 348 349 352 353 private void shift_boundaries() { 354 if (inhibit) 355 return; 356 filevec[0].shift_boundaries(filevec[1]); 357 filevec[1].shift_boundaries(filevec[0]); 358 } 359 360 362 363 private change build_reverse_script() { 364 change script = null; 365 final boolean[] changed0 = filevec[0].changed_flag; 366 final boolean[] changed1 = filevec[1].changed_flag; 367 final int len0 = filevec[0].buffered_lines; 368 final int len1 = filevec[1].buffered_lines; 369 370 371 372 int i0 = 0, i1 = 0; 373 374 while (i0 < len0 || i1 < len1) 375 { 376 if (changed0[1+i0] || changed1[1+i1]) 377 { 378 int line0 = i0, line1 = i1; 379 380 381 while (changed0[1+i0]) ++i0; 382 while (changed1[1+i1]) ++i1; 383 384 385 script = new change(line0, line1, i0 - line0, i1 - line1, script); 386 } 387 388 389 i0++; i1++; 390 } 391 392 return script; 393 } 394 395 397 398 private change build_script() { 399 change script = null; 400 final boolean[] changed0 = filevec[0].changed_flag; 401 final boolean[] changed1 = filevec[1].changed_flag; 402 final int len0 = filevec[0].buffered_lines; 403 final int len1 = filevec[1].buffered_lines; 404 int i0 = len0, i1 = len1; 405 406 407 408 while (i0 >= 0 || i1 >= 0) 409 { 410 if (changed0[i0] || changed1[i1]) 411 { 412 int line0 = i0, line1 = i1; 413 414 415 while (changed0[i0]) --i0; 416 while (changed1[i1]) --i1; 417 418 419 script = new change(i0, i1, line0 - i0, line1 - i1, script); 420 } 421 422 423 i0--; i1--; 424 } 425 426 return script; 427 } 428 429 431 public change diff_2(final boolean reverse) { 432 433 436 437 discard_confusing_lines (); 438 439 441 442 xvec = filevec[0].undiscarded; 443 yvec = filevec[1].undiscarded; 444 445 int diags = 446 filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3; 447 fdiag = new int[diags]; 448 fdiagoff = filevec[1].nondiscarded_lines + 1; 449 bdiag = new int[diags]; 450 bdiagoff = filevec[1].nondiscarded_lines + 1; 451 452 compareseq (0, filevec[0].nondiscarded_lines, 453 0, filevec[1].nondiscarded_lines); 454 fdiag = null; 455 bdiag = null; 456 457 459 460 shift_boundaries (); 461 462 464 465 if (reverse) 466 return build_reverse_script(); 467 else 468 return build_script(); 469 } 470 471 481 482 public static class change { 483 484 public change link; 485 486 public final int inserted; 487 488 public final int deleted; 489 490 public final int line0; 491 492 public final int line1; 493 494 501 change(int line0, int line1, int deleted, int inserted, change old) { 502 this.line0 = line0; 503 this.line1 = line1; 504 this.inserted = inserted; 505 this.deleted = deleted; 506 this.link = old; 507 } 509 } 510 511 513 514 class file_data { 515 516 517 void clear() { 518 522 changed_flag = new boolean[buffered_lines + 2]; 523 } 524 525 529 int[] equivCount() { 530 int[] equiv_count = new int[equiv_max]; 531 for (int i = 0; i < buffered_lines; ++i) 532 ++equiv_count[equivs[i]]; 533 return equiv_count; 534 } 535 536 549 void discard_confusing_lines(file_data f) { 550 clear(); 551 552 final byte[] discarded = discardable(f.equivCount()); 553 554 557 filterDiscards(discarded); 558 559 560 discard(discarded); 561 } 562 563 570 571 private byte[] discardable(final int[] counts) { 572 final int end = buffered_lines; 573 final byte[] discards = new byte[end]; 574 final int[] equivs = this.equivs; 575 int many = 5; 576 int tem = end / 64; 577 578 580 while ((tem = tem >> 2) > 0) 581 many *= 2; 582 583 for (int i = 0; i < end; i++) 584 { 585 int nmatch; 586 if (equivs[i] == 0) 587 continue; 588 nmatch = counts[equivs[i]]; 589 if (nmatch == 0) 590 discards[i] = 1; 591 else if (nmatch > many) 592 discards[i] = 2; 593 } 594 return discards; 595 } 596 597 600 601 private void filterDiscards(final byte[] discards) { 602 final int end = buffered_lines; 603 604 for (int i = 0; i < end; i++) 605 { 606 607 if (discards[i] == 2) 608 discards[i] = 0; 609 else if (discards[i] != 0) 610 { 611 612 int j; 613 int length; 614 int provisional = 0; 615 616 618 for (j = i; j < end; j++) 619 { 620 if (discards[j] == 0) 621 break; 622 if (discards[j] == 2) 623 ++provisional; 624 } 625 626 627 while (j > i && discards[j - 1] == 2) { 628 discards[--j] = 0; --provisional; 629 } 630 631 633 length = j - i; 634 635 637 if (provisional * 4 > length) 638 { 639 while (j > i) 640 if (discards[--j] == 2) 641 discards[j] = 0; 642 } 643 else 644 { 645 int consec; 646 int minimum = 1; 647 int tem = length / 4; 648 649 653 while ((tem = tem >> 2) > 0) 654 minimum *= 2; 655 minimum++; 656 657 659 for (j = 0, consec = 0; j < length; j++) 660 if (discards[i + j] != 2) 661 consec = 0; 662 else if (minimum == ++consec) 663 664 j -= consec; 665 else if (minimum < consec) 666 discards[i + j] = 0; 667 668 672 for (j = 0, consec = 0; j < length; j++) 673 { 674 if (j >= 8 && discards[i + j] == 1) 675 break; 676 if (discards[i + j] == 2) { 677 consec = 0; discards[i + j] = 0; 678 } 679 else if (discards[i + j] == 0) 680 consec = 0; 681 else 682 consec++; 683 if (consec == 3) 684 break; 685 } 686 687 688 i += length - 1; 689 690 691 for (j = 0, consec = 0; j < length; j++) 692 { 693 if (j >= 8 && discards[i - j] == 1) 694 break; 695 if (discards[i - j] == 2) { 696 consec = 0; discards[i - j] = 0; 697 } 698 else if (discards[i - j] == 0) 699 consec = 0; 700 else 701 consec++; 702 if (consec == 3) 703 break; 704 } 705 } 706 } 707 } 708 } 709 710 713 private void discard(final byte[] discards) { 714 final int end = buffered_lines; 715 int j = 0; 716 for (int i = 0; i < end; ++i) 717 if (no_discards || discards[i] == 0) 718 { 719 undiscarded[j] = equivs[i]; 720 realindexes[j++] = i; 721 } 722 else 723 changed_flag[1+i] = true; 724 nondiscarded_lines = j; 725 } 726 727 file_data(Object [] data,HashMap h) { 728 buffered_lines = data.length; 729 730 equivs = new int[buffered_lines]; 731 undiscarded = new int[buffered_lines]; 732 realindexes = new int[buffered_lines]; 733 734 for (int i = 0; i < data.length; ++i) { 735 Integer ir = (Integer )h.get(data[i]); 736 if (ir == null) 737 h.put(data[i],new Integer (equivs[i] = equiv_max++)); 738 else 739 equivs[i] = ir.intValue(); 740 } 741 } 742 743 755 756 void shift_boundaries(file_data f) { 757 final boolean[] changed = changed_flag; 758 final boolean[] other_changed = f.changed_flag; 759 int i = 0; 760 int j = 0; 761 int i_end = buffered_lines; 762 int preceding = -1; 763 int other_preceding = -1; 764 765 for (;;) 766 { 767 int start, end, other_start; 768 769 771 772 while (i < i_end && !changed[1+i]) 773 { 774 while (other_changed[1+j++]) 775 777 other_preceding = j; 778 i++; 779 } 780 781 if (i == i_end) 782 break; 783 784 start = i; 785 other_start = j; 786 787 for (;;) 788 { 789 790 791 while (i < i_end && changed[1+i]) i++; 792 end = i; 793 794 799 800 802 803 if (end != i_end 804 && equivs[start] == equivs[end] 805 && !other_changed[1+j] 806 && end != i_end 807 && !((preceding >= 0 && start == preceding) 808 || (other_preceding >= 0 809 && other_start == other_preceding))) 810 { 811 changed[1+end++] = true; 812 changed[1+start++] = false; 813 ++i; 814 817 ++j; 818 } 819 else 820 break; 821 } 822 823 preceding = i; 824 other_preceding = j; 825 } 826 } 827 828 829 final int buffered_lines; 830 831 834 private final int[] equivs; 835 836 838 final int[] undiscarded; 839 840 842 final int[] realindexes; 843 844 845 int nondiscarded_lines; 846 847 850 boolean[] changed_flag; 851 852 } 853 } 854 | Popular Tags |