1 package jdiff; 2 3 import java.io.*; 4 import java.util.*; 5 6 41 42 public class DiffMyers 43 { 44 45 52 public DiffMyers(Object [] a,Object [] b) 53 { 54 Hashtable h = new Hashtable(a.length + b.length); 55 filevec[0] = new file_data(a,h); 56 filevec[1] = new file_data(b,h); 57 } 58 59 61 private int equiv_max = 1; 62 63 66 public boolean heuristic = false; 67 68 70 public boolean no_discards = false; 71 72 private int[] xvec, yvec; 73 private int[] fdiag; 77 private int[] bdiag; 81 private int fdiagoff, bdiagoff; 82 private final file_data[] filevec = new file_data[2]; 83 private int cost; 84 85 108 109 private int diag (int xoff, int xlim, int yoff, int ylim) 110 { 111 final int[] fd = fdiag; final int[] bd = bdiag; final int[] xv = xvec; final int[] yv = yvec; final int dmin = xoff - ylim; final int dmax = xlim - yoff; final int fmid = xoff - yoff; final int bmid = xlim - ylim; int fmin = fmid, fmax = fmid; int bmin = bmid, bmax = bmid; 123 final boolean odd = (fmid - bmid & 1) != 0; 124 125 fd[fdiagoff + fmid] = xoff; 126 bd[bdiagoff + bmid] = xlim; 127 128 for (int c = 1;; ++c) 129 { 130 int d; 131 boolean big_snake = false; 132 133 134 if (fmin > dmin) 135 fd[fdiagoff + --fmin - 1] = -1; 136 else 137 ++fmin; 138 if (fmax < dmax) 139 fd[fdiagoff + ++fmax + 1] = -1; 140 else 141 --fmax; 142 for (d = fmax; d >= fmin; d -= 2) 143 { 144 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1]; 145 146 if (tlo >= thi) 147 x = tlo + 1; 148 else 149 x = thi; 150 oldx = x; 151 y = x - d; 152 while (x < xlim && y < ylim && xv[x] == yv[y]) { 153 ++x; ++y; 154 } 155 if (x - oldx > 20) 156 big_snake = true; 157 fd[fdiagoff + d] = x; 158 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) 159 { 160 cost = 2 * c - 1; 161 return d; 162 } 163 } 164 165 166 if (bmin > dmin) 167 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE; 168 else 169 ++bmin; 170 if (bmax < dmax) 171 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE; 172 else 173 --bmax; 174 for (d = bmax; d >= bmin; d -= 2) 175 { 176 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1]; 177 178 if (tlo < thi) 179 x = tlo; 180 else 181 x = thi - 1; 182 oldx = x; 183 y = x - d; 184 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) { 185 --x; --y; 186 } 187 if (oldx - x > 20) 188 big_snake = true; 189 bd[bdiagoff + d] = x; 190 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) 191 { 192 cost = 2 * c; 193 return d; 194 } 195 } 196 197 204 205 if (c > 200 && big_snake && heuristic) 206 { 207 int best = 0; 208 int bestpos = -1; 209 210 for (d = fmax; d >= fmin; d -= 2) 211 { 212 int dd = d - fmid; 213 if ((fd[fdiagoff + d] - xoff)*2 - dd > 12 * (c + (dd > 0 ? dd : -dd))) 214 { 215 if (fd[fdiagoff + d] * 2 - dd > best 216 && fd[fdiagoff + d] - xoff > 20 217 && fd[fdiagoff + d] - d - yoff > 20) 218 { 219 int k; 220 int x = fd[fdiagoff + d]; 221 222 224 for (k = 1; k <= 20; k++) 225 if (xvec[x - k] != yvec[x - d - k]) 226 break; 227 228 if (k == 21) 229 { 230 best = fd[fdiagoff + d] * 2 - dd; 231 bestpos = d; 232 } 233 } 234 } 235 } 236 if (best > 0) 237 { 238 cost = 2 * c - 1; 239 return bestpos; 240 } 241 242 best = 0; 243 for (d = bmax; d >= bmin; d -= 2) 244 { 245 int dd = d - bmid; 246 if ((xlim - bd[bdiagoff + d])*2 + dd > 12 * (c + (dd > 0 ? dd : -dd))) 247 { 248 if ((xlim - bd[bdiagoff + d]) * 2 + dd > best 249 && xlim - bd[bdiagoff + d] > 20 250 && ylim - (bd[bdiagoff + d] - d) > 20) 251 { 252 254 int k; 255 int x = bd[bdiagoff + d]; 256 257 for (k = 0; k < 20; k++) 258 if (xvec[x + k] != yvec[x - d + k]) 259 break; 260 if (k == 20) 261 { 262 best = (xlim - bd[bdiagoff + d]) * 2 + dd; 263 bestpos = d; 264 } 265 } 266 } 267 } 268 if (best > 0) 269 { 270 cost = 2 * c - 1; 271 return bestpos; 272 } 273 } 274 } 275 } 276 277 287 288 private void compareseq (int xoff, int xlim, int yoff, int ylim) { 289 290 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) { 291 ++xoff; ++yoff; 292 } 293 294 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) { 295 --xlim; --ylim; 296 } 297 298 299 if (xoff == xlim) 300 while (yoff < ylim) 301 filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true; 302 else if (yoff == ylim) 303 while (xoff < xlim) 304 filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true; 305 else 306 { 307 308 309 int d = diag (xoff, xlim, yoff, ylim); 310 int c = cost; 311 int f = fdiag[fdiagoff + d]; 312 int b = bdiag[bdiagoff + d]; 313 314 if (c == 1) 315 { 316 320 throw new IllegalArgumentException ("Empty subsequence"); 321 } 322 else 323 { 324 325 compareseq (xoff, b, yoff, b - d); 326 330 compareseq (b, xlim, b - d, ylim); 331 } 332 } 333 } 334 335 337 338 private void discard_confusing_lines() { 339 filevec[0].discard_confusing_lines(filevec[1]); 340 filevec[1].discard_confusing_lines(filevec[0]); 341 } 342 343 private boolean inhibit = false; 344 345 348 349 private void shift_boundaries() { 350 if (inhibit) 351 return; 352 filevec[0].shift_boundaries(filevec[1]); 353 filevec[1].shift_boundaries(filevec[0]); 354 } 355 356 358 359 private change build_reverse_script() { 360 change script = null; 361 final boolean[] changed0 = filevec[0].changed_flag; 362 final boolean[] changed1 = filevec[1].changed_flag; 363 final int len0 = filevec[0].buffered_lines; 364 final int len1 = filevec[1].buffered_lines; 365 366 367 368 int i0 = 0, i1 = 0; 369 370 while (i0 < len0 || i1 < len1) 371 { 372 if (changed0[1+i0] || changed1[1+i1]) 373 { 374 int line0 = i0, line1 = i1; 375 376 377 while (changed0[1+i0]) ++i0; 378 while (changed1[1+i1]) ++i1; 379 380 381 script = new change(line0, line1, i0 - line0, i1 - line1, script); 382 } 383 384 385 i0++; i1++; 386 } 387 388 return script; 389 } 390 391 393 394 private change build_script() { 395 change script = null; 396 final boolean[] changed0 = filevec[0].changed_flag; 397 final boolean[] changed1 = filevec[1].changed_flag; 398 final int len0 = filevec[0].buffered_lines; 399 final int len1 = filevec[1].buffered_lines; 400 int i0 = len0, i1 = len1; 401 402 403 404 while (i0 >= 0 || i1 >= 0) 405 { 406 if (changed0[i0] || changed1[i1]) 407 { 408 int line0 = i0, line1 = i1; 409 410 411 while (changed0[i0]) --i0; 412 while (changed1[i1]) --i1; 413 414 415 script = new change(i0, i1, line0 - i0, line1 - i1, script); 416 } 417 418 419 i0--; i1--; 420 } 421 422 return script; 423 } 424 425 427 public change diff_2(final boolean reverse) { 428 429 432 433 discard_confusing_lines (); 434 435 437 438 xvec = filevec[0].undiscarded; 439 yvec = filevec[1].undiscarded; 440 441 int diags = 442 filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3; 443 fdiag = new int[diags]; 444 fdiagoff = filevec[1].nondiscarded_lines + 1; 445 bdiag = new int[diags]; 446 bdiagoff = filevec[1].nondiscarded_lines + 1; 447 448 compareseq (0, filevec[0].nondiscarded_lines, 449 0, filevec[1].nondiscarded_lines); 450 fdiag = null; 451 bdiag = null; 452 453 455 456 shift_boundaries (); 457 458 460 461 if (reverse) 462 return build_reverse_script(); 463 else 464 return build_script(); 465 } 466 467 477 478 public static class change { 479 480 public change link; 481 482 public int inserted; 483 484 public int deleted; 485 486 public final int line0; 487 488 public final int line1; 489 490 497 change(int line0, int line1, int deleted, int inserted, change old) { 498 this.line0 = line0; 499 this.line1 = line1; 500 this.inserted = inserted; 501 this.deleted = deleted; 502 this.link = old; 503 } 505 } 506 507 509 510 class file_data { 511 512 513 void clear() { 514 518 changed_flag = new boolean[buffered_lines + 2]; 519 } 520 521 525 int[] equivCount() { 526 int[] equiv_count = new int[equiv_max]; 527 for (int i = 0; i < buffered_lines; ++i) 528 ++equiv_count[equivs[i]]; 529 return equiv_count; 530 } 531 532 545 void discard_confusing_lines(file_data f) { 546 clear(); 547 548 final byte[] discarded = discardable(f.equivCount()); 549 550 553 filterDiscards(discarded); 554 555 556 discard(discarded); 557 } 558 559 566 567 private byte[] discardable(final int[] counts) { 568 final int end = buffered_lines; 569 final byte[] discards = new byte[end]; 570 final int[] equivs = this.equivs; 571 int many = 5; 572 int tem = end / 64; 573 574 576 while ((tem = tem >> 2) > 0) 577 many *= 2; 578 579 for (int i = 0; i < end; i++) 580 { 581 int nmatch; 582 if (equivs[i] == 0) 583 continue; 584 nmatch = counts[equivs[i]]; 585 if (nmatch == 0) 586 discards[i] = 1; 587 else if (nmatch > many) 588 discards[i] = 2; 589 } 590 return discards; 591 } 592 593 596 597 private void filterDiscards(final byte[] discards) { 598 final int end = buffered_lines; 599 600 for (int i = 0; i < end; i++) 601 { 602 603 if (discards[i] == 2) 604 discards[i] = 0; 605 else if (discards[i] != 0) 606 { 607 608 int j; 609 int length; 610 int provisional = 0; 611 612 614 for (j = i; j < end; j++) 615 { 616 if (discards[j] == 0) 617 break; 618 if (discards[j] == 2) 619 ++provisional; 620 } 621 622 623 while (j > i && discards[j - 1] == 2) { 624 discards[--j] = 0; --provisional; 625 } 626 627 629 length = j - i; 630 631 633 if (provisional * 4 > length) 634 { 635 while (j > i) 636 if (discards[--j] == 2) 637 discards[j] = 0; 638 } 639 else 640 { 641 int consec; 642 int minimum = 1; 643 int tem = length / 4; 644 645 649 while ((tem = tem >> 2) > 0) 650 minimum *= 2; 651 minimum++; 652 653 655 for (j = 0, consec = 0; j < length; j++) 656 if (discards[i + j] != 2) 657 consec = 0; 658 else if (minimum == ++consec) 659 660 j -= consec; 661 else if (minimum < consec) 662 discards[i + j] = 0; 663 664 668 for (j = 0, consec = 0; j < length; j++) 669 { 670 if (j >= 8 && discards[i + j] == 1) 671 break; 672 if (discards[i + j] == 2) { 673 consec = 0; discards[i + j] = 0; 674 } 675 else if (discards[i + j] == 0) 676 consec = 0; 677 else 678 consec++; 679 if (consec == 3) 680 break; 681 } 682 683 684 i += length - 1; 685 686 687 for (j = 0, consec = 0; j < length; j++) 688 { 689 if (j >= 8 && discards[i - j] == 1) 690 break; 691 if (discards[i - j] == 2) { 692 consec = 0; discards[i - j] = 0; 693 } 694 else if (discards[i - j] == 0) 695 consec = 0; 696 else 697 consec++; 698 if (consec == 3) 699 break; 700 } 701 } 702 } 703 } 704 } 705 706 709 private void discard(final byte[] discards) { 710 final int end = buffered_lines; 711 int j = 0; 712 for (int i = 0; i < end; ++i) 713 if (no_discards || discards[i] == 0) 714 { 715 undiscarded[j] = equivs[i]; 716 realindexes[j++] = i; 717 } 718 else 719 changed_flag[1+i] = true; 720 nondiscarded_lines = j; 721 } 722 723 file_data(Object [] data,Hashtable h) { 724 buffered_lines = data.length; 725 726 equivs = new int[buffered_lines]; 727 undiscarded = new int[buffered_lines]; 728 realindexes = new int[buffered_lines]; 729 730 for (int i = 0; i < data.length; ++i) { 731 Integer ir = (Integer )h.get(data[i]); 732 if (ir == null) 733 h.put(data[i],new Integer (equivs[i] = equiv_max++)); 734 else 735 equivs[i] = ir.intValue(); 736 } 737 } 738 739 751 752 void shift_boundaries(file_data f) { 753 final boolean[] changed = changed_flag; 754 final boolean[] other_changed = f.changed_flag; 755 int i = 0; 756 int j = 0; 757 int i_end = buffered_lines; 758 int preceding = -1; 759 int other_preceding = -1; 760 761 for (;;) 762 { 763 int start, end, other_start; 764 765 767 768 while (i < i_end && !changed[1+i]) 769 { 770 while (other_changed[1+j++]) 771 773 other_preceding = j; 774 i++; 775 } 776 777 if (i == i_end) 778 break; 779 780 start = i; 781 other_start = j; 782 783 for (;;) 784 { 785 786 787 while (i < i_end && changed[1+i]) i++; 788 end = i; 789 790 795 796 798 799 if (end != i_end 800 && equivs[start] == equivs[end] 801 && !other_changed[1+j] 802 && end != i_end 803 && !((preceding >= 0 && start == preceding) 804 || (other_preceding >= 0 805 && other_start == other_preceding))) 806 { 807 changed[1+end++] = true; 808 changed[1+start++] = false; 809 ++i; 810 813 ++j; 814 } 815 else 816 break; 817 } 818 819 preceding = i; 820 other_preceding = j; 821 } 822 } 823 824 825 final int buffered_lines; 826 827 830 private final int[] equivs; 831 832 834 final int[] undiscarded; 835 836 838 final int[] realindexes; 839 840 841 int nondiscarded_lines; 842 843 846 boolean[] changed_flag; 847 848 } 849 850 } 851 | Popular Tags |