1 11 package org.eclipse.ui.internal.texteditor.quickdiff.compare.rangedifferencer; 12 13 import java.util.LinkedList ; 14 import java.util.List ; 15 16 import org.eclipse.core.runtime.IProgressMonitor; 17 import org.eclipse.core.runtime.NullProgressMonitor; 18 19 import org.eclipse.jface.text.Assert; 20 21 22 23 24 31 public final class Levenstein { 32 33 private static final boolean DEBUG= false; 34 private static final boolean MATRIX= false; 35 36 40 private static final boolean ASSERT= false; 41 42 43 private static final int COST_DELETE= 1; 44 private static final int COST_INSERT= 1; 45 private static final int COST_CHANGE= 1; 46 private static final int SKIP= Integer.MAX_VALUE; 47 48 private static final RangeDifference[] EMPTY_DIFFERENCES= new RangeDifference[0]; 49 private static final int NO_DISTANCE= 0; 50 51 private interface CellComputer { 52 int computeCell(int row, int col); 53 } 54 55 private final class DefaultCellComputer implements CellComputer { 56 public int computeCell(int row, int column) { 57 if (row == fRowStart) 58 return computeNullRow(column); 59 else if (column == fColStart) 60 return computeNullColumn(row); 61 else 62 return computeInnerCell(row, column); 63 } 64 65 private int computeNullRow(int column) { 66 return Math.abs(column - fColStart); 68 } 69 70 private int computeNullColumn(int row) { 71 return Math.abs(row - fRowStart); 73 } 74 75 private int computeInnerCell(int row, int col) { 76 int fromAbove= sum(getAt(row - fStep, col), COST_INSERT); 77 int fromLeft= sum(getAt(row, col - fStep), COST_DELETE); 78 int minDiag= getAt(row - fStep, col - fStep); 79 80 int minCellValue= Math.min(Math.min(fromAbove, fromLeft), minDiag); 81 int minCost= minCost(row, col, minCellValue); 82 83 if (minCellValue == fromAbove || minCellValue == fromLeft) 84 return minCellValue; 85 86 if (ASSERT) Assert.isTrue(minCellValue == minDiag && fromAbove >= minDiag && fromLeft >= minDiag); 87 88 int nextCharCost= rangesEqual(row, col) ? 0 : COST_CHANGE; 89 minCost= sum(minCost, nextCharCost); 90 int cost= minDiag + nextCharCost; 91 return cost; 92 } 93 } 94 95 99 private final class OptimizedCellComputer implements CellComputer { 100 public int computeCell(int row, int column) { 101 if (row == fRowStart) 102 return computeNullRow(column); 103 else if (column == fColStart) 104 return computeNullColumn(row); 105 else 106 return computeInnerCell(row, column); 107 } 108 109 private int computeNullRow(int column) { 110 if (minCost(fRowStart, column, Math.abs(column - fColStart)) > fMaxCost) 112 return Levenstein.SKIP; 113 return Math.abs(column - fColStart); 114 } 115 116 private int computeNullColumn(int row) { 117 if (minCost(row, fColStart, Math.abs(row - fRowStart)) > fMaxCost) 119 return Levenstein.SKIP; 120 return Math.abs(row - fRowStart); 121 } 122 123 private int computeInnerCell(int row, int col) { 124 int fromAbove= sum(getAt(row - fStep, col), Levenstein.COST_INSERT); 125 int fromLeft= sum(getAt(row, col - fStep), Levenstein.COST_DELETE); 126 int minDiag= getAt(row - fStep, col - fStep); 127 128 int minCellValue= Math.min(Math.min(fromAbove, fromLeft), minDiag); 129 int minCost= minCost(row, col, minCellValue); 130 131 if (minCost > fMaxCost) { 132 return Levenstein.SKIP; 133 } else if (minCellValue == fromAbove || minCellValue == fromLeft) { 134 return minCellValue; 135 } else { 136 if (ASSERT) Assert.isTrue(minCellValue == minDiag && fromAbove >= minDiag && fromLeft >= minDiag); 137 138 int nextCharCost= rangesEqual(row, col) ? 0 : Levenstein.COST_CHANGE; 139 minCost= Levenstein.sum(minCost, nextCharCost); 140 if (minCost > fMaxCost) 141 return Levenstein.SKIP; 142 int cost= minDiag + nextCharCost; 143 fMaxCost= Math.min(fMaxCost, maxCost(row, col, cost)); 144 return cost; 145 } 146 } 147 } 148 149 150 private IRangeComparator fLeft; 151 private IRangeComparator fRight; 152 private IProgressMonitor fProgressMonitor; 153 154 157 158 159 160 int[][] fMatrix; 161 162 int[] fPreviousRow; 163 164 int[] fCurrentRow; 165 166 private int[] fResultRow; 167 168 private int fStep; 169 170 private int fRow; 171 172 private int fRowStart; 173 174 private int fRowEnd; 175 176 private int fColStart; 177 178 private int fColEnd; 179 184 private int fMaxCost; 185 186 187 188 private long fComparisons; 189 190 194 private int[] fOptimalSplitColumn; 195 199 private boolean[] fOptimalSplitValues; 200 201 private List fDiffs; 202 203 204 final CellComputer fStandardCC= new DefaultCellComputer(); 205 206 final CellComputer fOptimizedCC= new OptimizedCellComputer(); 207 208 CellComputer fCellComputer= fStandardCC; 209 210 218 public static RangeDifference[] findDifferences(IRangeComparator left, IRangeComparator right) { 219 Levenstein levenstein= new Levenstein(left, right); 220 return levenstein.editScriptHirschberg(); 221 } 222 223 232 public static RangeDifference[] findDifferences(IProgressMonitor pm, IRangeComparator left, IRangeComparator right) { 233 Levenstein levenstein= new Levenstein(pm, left, right); 234 return levenstein.editScriptHirschberg(); 235 } 236 237 244 public Levenstein(IRangeComparator left, IRangeComparator right) { 245 this(null, left, right); 246 } 247 248 256 public Levenstein(IProgressMonitor pm, IRangeComparator left, IRangeComparator right) { 257 if (left == null || right == null) 258 throw new NullPointerException (); 259 fLeft= left; 260 fRight= right; 261 if (pm != null) 262 fProgressMonitor= pm; 263 else 264 fProgressMonitor= new NullProgressMonitor(); 265 } 266 267 272 public int editDistance() { 273 try { 274 fCellComputer= fOptimizedCC; 275 initRows(); 276 277 if (MATRIX) printHeader(fLeft, fRight); 278 279 internalEditDistance(1, fRight.getRangeCount(), 1, fLeft.getRangeCount()); 280 281 if (fProgressMonitor.isCanceled()) 282 return NO_DISTANCE; 283 284 if (DEBUG) 285 System.out.println("" + fComparisons + " comparisons"); 287 return getAt(fRowEnd, fColEnd); 288 } finally { 289 clear(); 290 } 291 } 292 293 298 public RangeDifference[] editScript() { 299 try { 300 fCellComputer= fOptimizedCC; 301 initMatrix(); 302 303 if (MATRIX) 304 printHeader(fLeft, fRight); 305 306 internalEditDistance(1, fRight.getRangeCount(), 1, fLeft.getRangeCount()); 308 309 if (fProgressMonitor.isCanceled()) 310 return EMPTY_DIFFERENCES; 311 312 if (DEBUG) 313 System.out.println("" + fComparisons + " comparisons"); 315 RangeDifference[] script= walkback(); 316 317 return script; 318 319 } finally { 320 clear(); 321 } 322 } 323 324 330 public RangeDifference[] editScriptHirschberg() { 331 try { 332 fCellComputer= fStandardCC; 333 334 initRows(); 335 fResultRow= new int[fCurrentRow.length]; 336 fOptimalSplitColumn= new int[fRight.getRangeCount() + 1]; 337 fOptimalSplitValues= new boolean[fRight.getRangeCount() + 1]; 338 339 hirschberg(1, fRight.getRangeCount(), 1, fLeft.getRangeCount()); 340 341 if (fProgressMonitor.isCanceled()) 342 return EMPTY_DIFFERENCES; 343 344 if (DEBUG) 345 System.out.println("" + fComparisons + " comparisons"); 347 RangeDifference[] script= buildDifferencesHirschberg(); 348 349 return script; 350 351 } finally { 352 clear(); 353 } 354 } 355 356 public int editDistanceHirschberg() { 357 try { 358 fCellComputer= fStandardCC; 359 360 initRows(); 361 fResultRow= new int[fLeft.getRangeCount() + 1]; 362 fOptimalSplitColumn= new int[fRight.getRangeCount() + 1]; 363 fOptimalSplitValues= new boolean[fRight.getRangeCount() + 1]; 364 365 int dist= hirschberg(1, fRight.getRangeCount(), 1, fLeft.getRangeCount()); 366 367 if (fProgressMonitor.isCanceled()) 368 return NO_DISTANCE; 369 370 if (DEBUG) 371 System.out.println("" + fComparisons + " comparisons"); 373 return dist; 374 375 } finally { 376 clear(); 377 } 378 } 379 380 void initMatrix() { 381 initMatrix(fRight.getRangeCount() + 1, fLeft.getRangeCount() + 1); 382 } 383 384 void initMatrix(int rows, int columns) { 385 if (fMatrix == null || fMatrix.length < rows || fMatrix[0].length < columns) 386 fMatrix= new int[rows][columns]; 387 } 388 389 void initRows() { 390 initRows(fLeft.getRangeCount() + 1); 391 } 392 393 void initRows(int columns) { 394 if (fCurrentRow == null || fCurrentRow.length < columns) 395 fCurrentRow= new int[columns]; 396 if (fPreviousRow == null || fPreviousRow.length < columns) 397 fPreviousRow= new int[columns]; 398 } 399 400 403 void internalEditDistance(int rStart, int rEnd, int lStart, int lEnd) { 404 405 if (ASSERT) Assert.isTrue(rStart <= rEnd + 1); 406 if (ASSERT) Assert.isTrue(lStart <= lEnd + 1); 407 408 fStep= 1; 410 fRowStart= rStart - fStep; 411 fRowEnd= rEnd; 412 413 fColStart= lStart - fStep; 414 fColEnd= lEnd; 415 416 fMaxCost= maxCost(fRowStart, fColStart, 0); 417 418 for (fRow= fRowStart; fRow <= fRowEnd; fRow += fStep) { 420 if (fProgressMonitor.isCanceled()) 421 return; 422 423 fProgressMonitor.worked(1); 424 425 for (int col= fColStart; col <= fColEnd; col += fStep) { 427 setAt(fRow, col, fCellComputer.computeCell(fRow, col)); 428 } 429 430 if (MATRIX) printRow(); 431 432 swapRows(); 433 } 434 } 435 436 439 void internalReverseEditDistance(int rStart, int rEnd, int lStart, int lEnd) { 440 441 if (ASSERT) Assert.isTrue(rStart <= rEnd + 1); 442 if (ASSERT) Assert.isTrue(lStart <= lEnd + 1); 443 444 fStep= -1; 446 fRowStart= rEnd - fStep; 447 fRowEnd= rStart; 448 449 fColStart= lEnd - fStep; 450 fColEnd= lStart; 451 452 fMaxCost= maxCost(fRowStart, fColStart, 0); 453 454 for (fRow= fRowStart; fRow >= fRowEnd; fRow += fStep) { 456 if (fProgressMonitor.isCanceled()) 457 return; 458 459 fProgressMonitor.worked(1); 460 461 for (int col= fColStart; col >= fColEnd; col += fStep) { 463 setAt(fRow, col, fCellComputer.computeCell(fRow, col)); 464 } 465 466 if (MATRIX) printRow(); 467 468 swapRows(); 469 } 470 } 471 472 private void swapRows() { 473 int[] tmp; 474 tmp= fPreviousRow; 475 fPreviousRow= fCurrentRow; 476 fCurrentRow= tmp; 477 } 478 479 private void clear() { 480 fPreviousRow= null; 481 fCurrentRow= null; 482 fMatrix= null; 483 fDiffs= null; 484 fResultRow= null; 485 fOptimalSplitColumn= null; 486 fOptimalSplitValues= null; 487 } 488 489 490 491 499 int getAt(int row, int column) { 500 501 if (fStep < 0) 503 column--; 504 505 if (fMatrix != null) 506 return fMatrix[row][column]; 507 508 if (row == fRow) 509 return fCurrentRow[column]; 510 511 if (ASSERT && !(row == fRow - fStep && ((fStep > 0 && row >= fRowStart && row <= fRowEnd) || fStep < 0 && row <= fRowStart && row >= fRowEnd))) { 512 Assert.isTrue(false, "random access to matrix not allowed"); return SKIP; } 515 516 return fPreviousRow[column]; 517 } 518 519 527 private void setAt(int row, int column, int value) { 528 529 if (fStep < 0) 531 column--; 532 533 if (fMatrix != null) { 534 fMatrix[row][column]= value; 535 } else { 536 if (row == fRow) 537 fCurrentRow[column]= value; 538 else if (ASSERT && !(row == fRow - fStep 539 && ((fStep > 0 && row >= fRowStart && row <= fRowEnd) 540 || fStep < 0 && row <= fRowStart && row >= fRowEnd))) 541 Assert.isTrue(false, "random access to matrix not allowed"); else 543 fPreviousRow[column]= value; 544 } 545 } 546 547 551 boolean rangesEqual(int r, int l) { 552 if (DEBUG) fComparisons++; 553 return fLeft.rangesEqual(l - 1, fRight, r - 1); 554 } 555 556 559 private static int sum(int c1, int c2) { 560 int sum= c1 + c2; 561 if (sum < 0) 562 return SKIP; 563 return sum; 564 } 565 566 570 int minCost(int r, int l, int cCur) { 571 if (cCur == SKIP) 575 return SKIP; 576 return cCur + Math.abs((fRowEnd - r) - (fColEnd - l)) * COST_INSERT; } 578 579 583 int maxCost(int r, int l, int cCur) { 584 if (cCur == SKIP) 587 return SKIP; 588 return cCur + Math.max(Math.abs(fRowEnd - r), Math.abs(fColEnd - l)) * COST_CHANGE; 589 } 590 591 592 593 private RangeDifference[] walkback() { 594 fDiffs= new LinkedList (); 595 596 int row= fRowEnd, col= fColEnd; 597 RangeDifference difference= null; 598 599 int cell= fMatrix[row][col]; 601 while (row > 0 || col > 0) { 602 int diag, above, left; 603 604 if (row == 0) { 605 diag= SKIP; 607 above= SKIP; 608 left= col - 1; 609 } else if (col == 0) { 610 diag= SKIP; 612 above= row - 1; 613 left= SKIP; 614 } else { 615 diag= fMatrix[row - 1][col - 1]; 617 above= fMatrix[row - 1][col]; 618 left= fMatrix[row][col - 1]; 619 } 620 621 if (left == cell - 1 && left <= diag && left <= above) { 622 col--; 624 difference= getChange(difference); 625 difference.fLeftStart= col; 626 difference.fLeftLength++; 627 difference.fRightStart= row; 628 629 cell= left; 630 } else if (above == cell - 1 && above <= diag) { 631 row--; 633 difference= getChange(difference); 634 difference.fLeftStart= col; 635 difference.fRightStart= row; 636 difference.fRightLength++; 637 638 cell= above; 639 } else { 640 col--; 641 row--; 642 if (cell == diag) { 643 difference= null; 646 } else if (cell == diag + 1) { 647 difference= getChange(difference); 649 difference.fLeftStart= col; 650 difference.fLeftLength++; 651 difference.fRightStart= row; 652 difference.fRightLength++; 653 } else { 654 if (ASSERT) Assert.isTrue(false, "illegal matrix"); } 656 657 cell= diag; 658 } 659 660 } 661 662 return (RangeDifference[]) fDiffs.toArray(new RangeDifference[fDiffs.size()]); 663 } 664 665 private RangeDifference getChange(RangeDifference difference) { 666 if (difference != null) 667 return difference; 668 669 difference= new RangeDifference(RangeDifference.CHANGE); 670 fDiffs.add(0, difference); 671 return difference; 672 } 673 674 675 676 private int hirschberg(int rStart, int rEnd, int lStart, int lEnd) { 677 678 679 680 if (rEnd < rStart) { 681 return lEnd - lStart + 1; 683 } else if (rStart == rEnd) { 684 internalEditDistance(rStart, rEnd, lStart, lEnd); 686 int distance= SKIP; 687 for (int col= lStart - 1; col <= lEnd; col++) { 688 distance= fPreviousRow[col]; 689 if (distance == 0) { 690 fOptimalSplitColumn[rStart]= col; 691 fOptimalSplitValues[rStart]= true; 692 return 0; 693 } 694 } 695 fOptimalSplitColumn[rStart]= lEnd; 696 fOptimalSplitValues[rStart]= false; 697 if (distance == SKIP) 698 return 1; 699 return distance; 700 } 701 708 709 710 int rowSplit= (rStart + rEnd + 1) / 2 - 1; 712 713 internalEditDistance(rStart, rowSplit, lStart, lEnd); 715 int[] tmp= fPreviousRow; 716 fPreviousRow= fResultRow; 717 fResultRow= tmp; 718 internalReverseEditDistance(rowSplit + 1, rEnd, lStart, lEnd); 720 721 int columnSplit= SKIP, distance= SKIP; 724 for (int col= lStart - 1; col <= lEnd; col++) { 725 int sum= sum(fResultRow[col], fPreviousRow[col ]); 726 if (sum < distance) { 727 distance= sum; 728 columnSplit= col; 729 } 730 } 731 732 if (fProgressMonitor.isCanceled()) 733 return NO_DISTANCE; 734 735 if (ASSERT) Assert.isTrue(distance != SKIP); 736 if (ASSERT) Assert.isTrue(columnSplit != SKIP); 737 738 if (distance == 0) { 739 if (ASSERT) Assert.isTrue(rEnd - rStart == lEnd - lStart); 742 int col= lStart; int row= rStart; 743 while (row <= rEnd) { 744 fOptimalSplitColumn[row]= col; 745 fOptimalSplitValues[row]= true; 746 col++; 747 row++; 748 } 749 return distance; 750 } 751 752 fOptimalSplitColumn[rowSplit]= columnSplit; 754 fOptimalSplitValues[rowSplit]= false; 755 756 hirschberg(rStart, rowSplit, lStart, columnSplit); 759 hirschberg(rowSplit + 1, rEnd, columnSplit + 1, lEnd); 760 761 return distance; 762 } 763 764 private RangeDifference[] buildDifferencesHirschberg() { 765 fDiffs= new LinkedList (); 766 767 RangeDifference difference= null; 768 int previousColumn= 0; 769 770 for (int row= 1; row < fOptimalSplitColumn.length; row++) { 771 int previousRow= row - 1; 772 int column= fOptimalSplitColumn[row]; 774 if (column == previousColumn + 1) { 775 if (fOptimalSplitValues[row]) { 777 difference= null; 780 } else { 781 difference= getChange(difference, previousRow, previousColumn); 783 difference.fLeftLength++; 784 difference.fRightLength++; 785 } 786 } else if (column == previousColumn) { 787 difference= getChange(difference, previousRow, previousColumn); 789 difference.fRightLength++; 790 791 } else if (column > previousColumn) { 792 difference= getChange(difference, previousRow, previousColumn); 794 difference.fLeftLength += column - previousColumn - 1; 795 } else { 796 if (ASSERT) Assert.isTrue(false, "Illegal edit description"); } 798 799 previousColumn= column; 800 } 801 802 if (previousColumn < fLeft.getRangeCount()) { 803 804 difference= getChange(difference, fOptimalSplitColumn.length - 1, previousColumn); 806 difference.fLeftLength += fLeft.getRangeCount() - previousColumn; 807 } 808 809 return (RangeDifference[]) fDiffs.toArray(new RangeDifference[fDiffs.size()]); 810 } 811 812 private RangeDifference getChange(RangeDifference difference, int row, int column) { 813 if (difference != null) 814 return difference; 815 816 difference= new RangeDifference(RangeDifference.CHANGE, row, 0, column, 0); 817 fDiffs.add(difference); 818 return difference; 819 } 820 821 822 823 private void printRow() { 824 if (fMatrix != null) 825 print(fMatrix[fRow]); 826 else 827 print(fCurrentRow); 828 } 829 830 private static void printHeader(IRangeComparator left, IRangeComparator right) { 831 System.out.println("============================="); System.out.println("= s1: " + left.toString()); System.out.println("= s2: " + right.toString()); System.out.println(); 835 } 836 837 private static void print(int[] row) { 838 for (int i= 0; i < row.length; i++) { 839 System.out.print("\t" + (row[i] == Integer.MAX_VALUE ? "-" : "" + row[i])); } 841 System.out.println(); 842 } 843 844 } 845 | Popular Tags |