KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > ui > internal > texteditor > quickdiff > compare > rangedifferencer > Levenshtein


1 /*******************************************************************************
2  * Copyright (c) 2000, 2006 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.ui.internal.texteditor.quickdiff.compare.rangedifferencer;
12
13 import java.util.LinkedList JavaDoc;
14 import java.util.List JavaDoc;
15
16 import org.eclipse.core.runtime.Assert;
17 import org.eclipse.core.runtime.IProgressMonitor;
18 import org.eclipse.core.runtime.NullProgressMonitor;
19
20
21
22
23
24 /**
25  * Levenshtein distance and edit script computation using a dynamic programming algorithm.
26  * The algorithm is O(n*m) in time where n and m are the number of elements in
27  * the two ranges to compare. It does not implement the greedy Ukkonen algorithm.
28  *
29  * @since 3.1
30  */

31 public final class Levenshtein {
32     /* debug output */
33     private static final boolean DEBUG= false;
34     private static final boolean MATRIX= false;
35     
36     /*
37      * asserts - enable conditional compilation by not making this a trace option.
38      * @since 3.2
39      */

40     private static final boolean ASSERT= false;
41
42     /* edit cost constants */
43     private static final int COST_DELETE= 1;
44     private static final int COST_INSERT= 1;
45     private static final int COST_CHANGE= 1;
46     private static final int SKIP= Integer.MAX_VALUE;
47
48     private static final RangeDifference[] EMPTY_DIFFERENCES= new RangeDifference[0];
49     private static final int NO_DISTANCE= 0;
50
51     private interface CellComputer {
52         int computeCell(int row, int col);
53     }
54
55     private final class DefaultCellComputer implements CellComputer {
56         public int computeCell(int row, int column) {
57             if (row == fRowStart)
58                 return computeNullRow(column);
59             else if (column == fColStart)
60                 return computeNullColumn(row);
61             else
62                 return computeInnerCell(row, column);
63         }
64
65         private int computeNullRow(int column) {
66             // initialize first row, [0,i] = i if it is valid
67
return Math.abs(column - fColStart);
68         }
69
70         private int computeNullColumn(int row) {
71             // initialize first column
72
return Math.abs(row - fRowStart);
73         }
74
75         private int computeInnerCell(int row, int col) {
76             int fromAbove= sum(getAt(row - fStep, col), COST_INSERT);
77             int fromLeft= sum(getAt(row, col - fStep), COST_DELETE);
78             int minDiag= getAt(row - fStep, col - fStep);
79
80             int minCellValue= Math.min(Math.min(fromAbove, fromLeft), minDiag);
81             int minCost= minCost(row, col, minCellValue);
82
83             if (minCellValue == fromAbove || minCellValue == fromLeft)
84                 return minCellValue;
85
86             if (ASSERT) Assert.isTrue(minCellValue == minDiag && fromAbove >= minDiag && fromLeft >= minDiag);
87
88             int nextCharCost= rangesEqual(row, col) ? 0 : COST_CHANGE;
89             minCost= sum(minCost, nextCharCost);
90             int cost= minDiag + nextCharCost;
91             return cost;
92         }
93     }
94
95     /**
96      * Reduces the needed comparisons - can not be used for hirschberg as we
97      * don't have global values there.
98      */

99     private final class OptimizedCellComputer implements CellComputer {
100         public int computeCell(int row, int column) {
101             if (row == fRowStart)
102                 return computeNullRow(column);
103             else if (column == fColStart)
104                 return computeNullColumn(row);
105             else
106                 return computeInnerCell(row, column);
107         }
108
109         private int computeNullRow(int column) {
110             // initialize first row, [0,i] = i if it is valid
111
if (minCost(fRowStart, column, Math.abs(column - fColStart)) > fMaxCost)
112                 return Levenshtein.SKIP;
113             return Math.abs(column - fColStart);
114         }
115
116         private int computeNullColumn(int row) {
117             // initialize first column
118
if (minCost(row, fColStart, Math.abs(row - fRowStart)) > fMaxCost)
119                 return Levenshtein.SKIP;
120             return Math.abs(row - fRowStart);
121         }
122
123         private int computeInnerCell(int row, int col) {
124             int fromAbove= sum(getAt(row - fStep, col), Levenshtein.COST_INSERT);
125             int fromLeft= sum(getAt(row, col - fStep), Levenshtein.COST_DELETE);
126             int minDiag= getAt(row - fStep, col - fStep);
127
128             int minCellValue= Math.min(Math.min(fromAbove, fromLeft), minDiag);
129             int minCost= minCost(row, col, minCellValue);
130
131             if (minCost > fMaxCost) {
132                 return Levenshtein.SKIP;
133             } else if (minCellValue == fromAbove || minCellValue == fromLeft) {
134                 return minCellValue;
135             } else {
136                 if (ASSERT) Assert.isTrue(minCellValue == minDiag && fromAbove >= minDiag && fromLeft >= minDiag);
137
138                 int nextCharCost= rangesEqual(row, col) ? 0 : Levenshtein.COST_CHANGE;
139                 minCost= Levenshtein.sum(minCost, nextCharCost);
140                 if (minCost > fMaxCost)
141                     return Levenshtein.SKIP;
142                 int cost= minDiag + nextCharCost;
143                 fMaxCost= Math.min(fMaxCost, maxCost(row, col, cost));
144                 return cost;
145             }
146         }
147     }
148
149     /* the domain ranges we compare */
150     private IRangeComparator fLeft;
151     private IRangeComparator fRight;
152     private IProgressMonitor fProgressMonitor;
153
154     /* algorithmic variables - may or may not be used by a method, always
155      * nulled out by clear().
156      */

157     /* package visible for testing */
158
159     /** The matrix for full blown N * M edit distance and script computation. */
160     int[][] fMatrix;
161     /** The two columns for dynamic algorithms - the last row. */
162     int[] fPreviousRow;
163     /** The two columns for dynamic algorithms - the current row. */
164     int[] fCurrentRow;
165     /** Used by hirschberg's algorithm to store the result of one run. */
166     private int[] fResultRow;
167     /** Direction of the matrix calculation - either <code>1</code> or <code>-1</code>. */
168     private int fStep;
169     /** The current row of the dynamic matrix calculation. */
170     private int fRow;
171     /** The first row of the matrix to calculate. */
172     private int fRowStart;
173     /** The last (inclusive) row of the matrix. */
174     private int fRowEnd;
175     /** The first column of the matrix to calculate. */
176     private int fColStart;
177     /** The last (inclusive) column of the matrix. */
178     private int fColEnd;
179     /**
180      * Maximum cost of the remaining computation given the current state of the
181      * computation. For edit distance calculation, this may be used to prune
182      * impossible cells.
183      */

184     private int fMaxCost;
185
186     /* statistics collection */
187     /** Keeps track of the number of needed comparisons. */
188     private long fComparisons;
189
190     /**
191      * For each row, Hirschberg stores the column of the optimal alignment
192      * in the next row of the matrix.
193      */

194     private int[] fOptimalSplitColumn;
195     /**
196      * For each row, this stores whether the domain elements at the [row,column]
197      * returned equality, where column is the value in <code>fOptimalSplitColumn</code>.
198      */

199     private boolean[] fOptimalSplitValues;
200     /** List of differences computed by the walkback methods. */
201     private List JavaDoc fDiffs;
202
203     /** Normal matrix cell computer. */
204     final CellComputer fStandardCC= new DefaultCellComputer();
205     /** Optimized cell computer for that prunes impossible cells. */
206     final CellComputer fOptimizedCC= new OptimizedCellComputer();
207     /** The current cell computer. */
208     CellComputer fCellComputer= fStandardCC;
209
210     /**
211      * Convenience method to compute the edit script between two range
212      * comparators, see <code>RangeDifferencer</code>.
213      *
214      * @param left the left hand side domain range
215      * @param right the right hand side domain range
216      * @return the edit script from left to right
217      */

218     public static RangeDifference[] findDifferences(IRangeComparator left, IRangeComparator right) {
219         Levenshtein levenshtein= new Levenshtein(left, right);
220         return levenshtein.editScriptHirschberg();
221     }
222
223     /**
224      * Convenience method to compute the edit script between two range
225      * comparators, see <code>RangeDifferencer</code>.
226      *
227      * @param pm a progress monitor, or <code>null</code> if no progress should be reported
228      * @param left the left hand side domain range
229      * @param right the right hand side domain range
230      * @return the edit script from left to right
231      */

232     public static RangeDifference[] findDifferences(IProgressMonitor pm, IRangeComparator left, IRangeComparator right) {
233         Levenshtein levenshtein= new Levenshtein(pm, left, right);
234         return levenshtein.editScriptHirschberg();
235     }
236
237     /**
238      * Create a new differ that operates on the two domain range comparators
239      * given.
240      *
241      * @param left the left domain range
242      * @param right the right domain range
243      */

244     public Levenshtein(IRangeComparator left, IRangeComparator right) {
245         this(null, left, right);
246     }
247
248     /**
249      * Create a new differ that operates on the two domain range comparators
250      * given.
251      *
252      * @param pm a progress monitor, or <code>null</code> if no progress should be reported
253      * @param left the left domain range
254      * @param right the right domain range
255      */

256     public Levenshtein(IProgressMonitor pm, IRangeComparator left, IRangeComparator right) {
257         if (left == null || right == null)
258             throw new NullPointerException JavaDoc();
259         fLeft= left;
260         fRight= right;
261         if (pm != null)
262             fProgressMonitor= pm;
263         else
264             fProgressMonitor= new NullProgressMonitor();
265     }
266
267     /**
268      * Computes the edit distance.
269      *
270      * @return the edit distance of the two range comparators
271      */

272     public int editDistance() {
273         try {
274             fCellComputer= fOptimizedCC;
275             initRows();
276
277             if (MATRIX) printHeader(fLeft, fRight);
278
279             internalEditDistance(1, fRight.getRangeCount(), 1, fLeft.getRangeCount());
280
281             if (fProgressMonitor.isCanceled())
282                 return NO_DISTANCE;
283
284             if (DEBUG)
285                 System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
286

287             return getAt(fRowEnd, fColEnd);
288         } finally {
289             clear();
290         }
291     }
292
293     /**
294      * Computes the edit script. This is quadratic in space and time.
295      *
296      * @return the shortest edit script between the two range comparators
297      */

298     public RangeDifference[] editScript() {
299         try {
300             fCellComputer= fOptimizedCC;
301             initMatrix();
302
303             if (MATRIX)
304                 printHeader(fLeft, fRight);
305
306             // build the matrix
307
internalEditDistance(1, fRight.getRangeCount(), 1, fLeft.getRangeCount());
308
309             if (fProgressMonitor.isCanceled())
310                 return EMPTY_DIFFERENCES;
311
312             if (DEBUG)
313                 System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
314

315             RangeDifference[] script= walkback();
316
317             return script;
318
319         } finally {
320             clear();
321         }
322     }
323
324     /**
325      * Computes the edit script. This is quadratic in time but linear in space;
326      * it is about twice as slow as the <code>editScript</code> method.
327      *
328      * @return the shortest edit script between the two range comparators
329      */

330     public RangeDifference[] editScriptHirschberg() {
331         try {
332             fCellComputer= fStandardCC;
333
334             initRows();
335             fResultRow= new int[fCurrentRow.length];
336             fOptimalSplitColumn= new int[fRight.getRangeCount() + 1];
337             fOptimalSplitValues= new boolean[fRight.getRangeCount() + 1];
338
339             hirschberg(1, fRight.getRangeCount(), 1, fLeft.getRangeCount());
340
341             if (fProgressMonitor.isCanceled())
342                 return EMPTY_DIFFERENCES;
343
344             if (DEBUG)
345                 System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
346

347             RangeDifference[] script= buildDifferencesHirschberg();
348
349             return script;
350
351         } finally {
352             clear();
353         }
354     }
355
356     public int editDistanceHirschberg() {
357         try {
358             fCellComputer= fStandardCC;
359
360             initRows();
361             fResultRow= new int[fLeft.getRangeCount() + 1];
362             fOptimalSplitColumn= new int[fRight.getRangeCount() + 1];
363             fOptimalSplitValues= new boolean[fRight.getRangeCount() + 1];
364
365             int dist= hirschberg(1, fRight.getRangeCount(), 1, fLeft.getRangeCount());
366
367             if (fProgressMonitor.isCanceled())
368                 return NO_DISTANCE;
369
370             if (DEBUG)
371                 System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
372

373             return dist;
374
375         } finally {
376             clear();
377         }
378     }
379
380     void initMatrix() {
381         initMatrix(fRight.getRangeCount() + 1, fLeft.getRangeCount() + 1);
382     }
383
384     void initMatrix(int rows, int columns) {
385         if (fMatrix == null || fMatrix.length < rows || fMatrix[0].length < columns)
386             fMatrix= new int[rows][columns];
387     }
388
389     void initRows() {
390         initRows(fLeft.getRangeCount() + 1);
391     }
392
393     void initRows(int columns) {
394         if (fCurrentRow == null || fCurrentRow.length < columns)
395             fCurrentRow= new int[columns];
396         if (fPreviousRow == null || fPreviousRow.length < columns)
397             fPreviousRow= new int[columns];
398     }
399
400     /*
401      * Fill the matrix, but do not allocate it.
402      */

403     void internalEditDistance(int rStart, int rEnd, int lStart, int lEnd) {
404
405         if (ASSERT) Assert.isTrue(rStart <= rEnd + 1);
406         if (ASSERT) Assert.isTrue(lStart <= lEnd + 1);
407
408         // build the matrix
409
fStep= 1;
410         fRowStart= rStart - fStep;
411         fRowEnd= rEnd;
412
413         fColStart= lStart - fStep;
414         fColEnd= lEnd;
415
416         fMaxCost= maxCost(fRowStart, fColStart, 0);
417
418         for (fRow= fRowStart; fRow <= fRowEnd; fRow += fStep) { // for every row
419

420             if (fProgressMonitor.isCanceled())
421                 return;
422             
423             fProgressMonitor.worked(1);
424
425             for (int col= fColStart; col <= fColEnd; col += fStep) { // for every column
426

427                 setAt(fRow, col, fCellComputer.computeCell(fRow, col));
428             }
429
430             if (MATRIX) printRow();
431
432             swapRows();
433         }
434     }
435
436     /*
437      * Fill the matrix, but do not allocate it.
438      */

439     void internalReverseEditDistance(int rStart, int rEnd, int lStart, int lEnd) {
440
441         if (ASSERT) Assert.isTrue(rStart <= rEnd + 1);
442         if (ASSERT) Assert.isTrue(lStart <= lEnd + 1);
443
444         // build the matrix
445
fStep= -1;
446         fRowStart= rEnd - fStep;
447         fRowEnd= rStart;
448
449         fColStart= lEnd - fStep;
450         fColEnd= lStart;
451
452         fMaxCost= maxCost(fRowStart, fColStart, 0);
453
454         for (fRow= fRowStart; fRow >= fRowEnd; fRow += fStep) { // for every row
455

456             if (fProgressMonitor.isCanceled())
457                 return;
458             
459             fProgressMonitor.worked(1);
460
461             for (int col= fColStart; col >= fColEnd; col += fStep) { // for every column
462

463                 setAt(fRow, col, fCellComputer.computeCell(fRow, col));
464             }
465
466             if (MATRIX) printRow();
467
468             swapRows();
469         }
470     }
471
472     private void swapRows() {
473         int[] tmp;
474         tmp= fPreviousRow;
475         fPreviousRow= fCurrentRow;
476         fCurrentRow= tmp;
477     }
478
479     private void clear() {
480         fPreviousRow= null;
481         fCurrentRow= null;
482         fMatrix= null;
483         fDiffs= null;
484         fResultRow= null;
485         fOptimalSplitColumn= null;
486         fOptimalSplitValues= null;
487     }
488
489     /* access methods for the compare algorithm */
490
491     /**
492      * Returns the matrix value for [row, column]. Note that not the entire
493      * matrix may be available at all times.
494      *
495      * @param row the row (right domain index)
496      * @param column (left domain index)
497      * @return the matrix value for the given row and column
498      */

499     int getAt(int row, int column) {
500
501         // shift reverse iteration towards left by one
502
if (fStep < 0)
503             column--;
504
505         if (fMatrix != null)
506             return fMatrix[row][column];
507
508         if (row == fRow)
509             return fCurrentRow[column];
510
511         if (ASSERT && !(row == fRow - fStep && ((fStep > 0 && row >= fRowStart && row <= fRowEnd) || fStep < 0 && row <= fRowStart && row >= fRowEnd))) {
512             Assert.isTrue(false, "random access to matrix not allowed"); //$NON-NLS-1$
513
return SKIP; // dummy
514
}
515
516         return fPreviousRow[column];
517     }
518
519     /**
520      * Sets the matrix value at [row, column]. Note that not the entire
521      * matrix may be available at all times.
522      *
523      * @param row the row (right domain index)
524      * @param column (left domain index)
525      * @param value the value to set
526      */

527     private void setAt(int row, int column, int value) {
528
529         // shift reverse iteration towards left by one
530
if (fStep < 0)
531             column--;
532
533         if (fMatrix != null) {
534             fMatrix[row][column]= value;
535         } else {
536             if (row == fRow)
537                 fCurrentRow[column]= value;
538             else if (ASSERT && !(row == fRow - fStep
539                     && ((fStep > 0 && row >= fRowStart && row <= fRowEnd)
540                       || fStep < 0 && row <= fRowStart && row >= fRowEnd)))
541                 Assert.isTrue(false, "random access to matrix not allowed"); //$NON-NLS-1$
542
else
543                 fPreviousRow[column]= value;
544         }
545     }
546
547     /*
548      * Compares the two domain element ranges corresponding to the cell at
549      * [r,l], that is the (zero-based) elements at r - 1 and l - 1.
550      */

551     boolean rangesEqual(int r, int l) {
552         if (DEBUG) fComparisons++;
553         return fLeft.rangesEqual(l - 1, fRight, r - 1);
554     }
555
556     /*
557      * Adds two cell cost values, never exceeding SKIP.
558      */

559     private static int sum(int c1, int c2) {
560         int sum= c1 + c2;
561         if (sum < 0)
562             return SKIP;
563         return sum;
564     }
565
566     /*
567      * Computes the best possible edit distance from cell [r,l] if getting
568      * there has cost cCur.
569      */

570     int minCost(int r, int l, int cCur) {
571         // minimal cost from cell [r,l] to [rCount, lCount] if cell cost == cost
572
// Assume that the minimum of the remaining columns / rows are equal, and just
573
// the rest of the ranges has to be inserted / deleted
574
if (cCur == SKIP)
575             return SKIP;
576         return cCur + Math.abs((fRowEnd - r) - (fColEnd - l)) * COST_INSERT; // can be either insert or delete
577
}
578
579     /*
580      * Computes the worst possible edit distance from cell [r,l] if getting
581      * there has cost cCur.
582      */

583     int maxCost(int r, int l, int cCur) {
584         // maximal cost from cell [r,l] to [rCount, lCount] if cell cost == cost
585
// maximal additional cost is the maximum remaining columns / rows
586
if (cCur == SKIP)
587             return SKIP;
588         return cCur + Math.max(Math.abs(fRowEnd - r), Math.abs(fColEnd - l)) * COST_CHANGE;
589     }
590
591     /* classic implementation */
592
593     private RangeDifference[] walkback() {
594         fDiffs= new LinkedList JavaDoc();
595
596         int row= fRowEnd, col= fColEnd;
597         RangeDifference difference= null;
598
599         int cell= fMatrix[row][col]; // edit distance
600

601         while (row > 0 || col > 0) {
602             int diag, above, left;
603
604             if (row == 0) {
605                 // slide deletes along row 0
606
diag= SKIP;
607                 above= SKIP;
608                 left= col - 1;
609             } else if (col == 0) {
610                 // slide inserts along column 0
611
diag= SKIP;
612                 above= row - 1;
613                 left= SKIP;
614             } else {
615                 // inner cells
616
diag= fMatrix[row - 1][col - 1];
617                 above= fMatrix[row - 1][col];
618                 left= fMatrix[row][col - 1];
619             }
620
621             if (left == cell - 1 && left <= diag && left <= above) {
622                 // delete
623
col--;
624                 difference= getChange(difference);
625                 difference.fLeftStart= col;
626                 difference.fLeftLength++;
627                 difference.fRightStart= row;
628
629                 cell= left;
630             } else if (above == cell - 1 && above <= diag) {
631                 // insert
632
row--;
633                 difference= getChange(difference);
634                 difference.fLeftStart= col;
635                 difference.fRightStart= row;
636                 difference.fRightLength++;
637
638                 cell= above;
639             } else {
640                 col--;
641                 row--;
642                 if (cell == diag) {
643                     // match
644
// alternatively, create NOCHANGE ranges for findRanges
645
difference= null;
646                 } else if (cell == diag + 1) {
647                     // change
648
difference= getChange(difference);
649                     difference.fLeftStart= col;
650                     difference.fLeftLength++;
651                     difference.fRightStart= row;
652                     difference.fRightLength++;
653                 } else {
654                     if (ASSERT) Assert.isTrue(false, "illegal matrix"); //$NON-NLS-1$
655
}
656
657                 cell= diag;
658             }
659
660         }
661
662         return (RangeDifference[]) fDiffs.toArray(new RangeDifference[fDiffs.size()]);
663     }
664
665     private RangeDifference getChange(RangeDifference difference) {
666         if (difference != null)
667             return difference;
668
669         difference= new RangeDifference(RangeDifference.CHANGE);
670         fDiffs.add(0, difference);
671         return difference;
672     }
673
674     /* hirschberg's algorithm */
675
676     private int hirschberg(int rStart, int rEnd, int lStart, int lEnd) {
677
678         /* trivial cases */
679
680         if (rEnd < rStart) {
681             // right range is empty
682
return lEnd - lStart + 1;
683         } else if (rStart == rEnd) {
684             // right has length 1: look for a match and split
685
internalEditDistance(rStart, rEnd, lStart, lEnd);
686             int distance= SKIP;
687             for (int col= lStart - 1; col <= lEnd; col++) {
688                 distance= fPreviousRow[col];
689                 if (distance == 0) {
690                     fOptimalSplitColumn[rStart]= col;
691                     fOptimalSplitValues[rStart]= true;
692                     return 0;
693                 }
694             }
695             fOptimalSplitColumn[rStart]= lEnd;
696             fOptimalSplitValues[rStart]= false;
697             if (distance == SKIP)
698                 return 1;
699             return distance;
700         }
701 // else if (lEnd < lStart) {
702
// // left is empty // perhaps not necessary
703
// Arrays.fill(fOptimalSplitColumn, rStart, rEnd + 1, lEnd);
704
// Arrays.fill(fOptimalSplitValues, rStart, rEnd + 1, false);
705
// return rEnd - rStart + 1;
706
// }
707

708         /* divide & conquer */
709
710         // split rows at half
711
int rowSplit= (rStart + rEnd + 1) / 2 - 1;
712
713         // compute edit distance of (r1,left) in linear space into fPreviousRow
714
internalEditDistance(rStart, rowSplit, lStart, lEnd);
715         int[] tmp= fPreviousRow;
716         fPreviousRow= fResultRow;
717         fResultRow= tmp;
718         // compute backwards edit distance of (r2,left) in linear space into fPreviousRow
719
internalReverseEditDistance(rowSplit + 1, rEnd, lStart, lEnd);
720
721         // find optimal alignment - the column in which to split the
722
// left hand side
723
int columnSplit= SKIP, distance= SKIP;
724         for (int col= lStart - 1; col <= lEnd; col++) {
725             int sum= sum(fResultRow[col], fPreviousRow[col ]);
726             if (sum < distance) {
727                 distance= sum;
728                 columnSplit= col;
729             }
730         }
731
732         if (fProgressMonitor.isCanceled())
733             return NO_DISTANCE;
734
735         if (ASSERT) Assert.isTrue(distance != SKIP);
736         if (ASSERT) Assert.isTrue(columnSplit != SKIP);
737
738         if (distance == 0) {
739             // optimize for large unchanged parts
740
// no further partitioning needed, this part is equal
741
if (ASSERT) Assert.isTrue(rEnd - rStart == lEnd - lStart);
742             int col= lStart; int row= rStart;
743             while (row <= rEnd) {
744                 fOptimalSplitColumn[row]= col;
745                 fOptimalSplitValues[row]= true;
746                 col++;
747                 row++;
748             }
749             return distance;
750         }
751
752         // store alignment: from [rowSplit, ?] connect to [rowSplit + 1, columnSplit]
753
fOptimalSplitColumn[rowSplit]= columnSplit;
754         fOptimalSplitValues[rowSplit]= false;
755
756         // divide at column & conquer
757
// TODO guard against stack overflow
758
hirschberg(rStart, rowSplit, lStart, columnSplit);
759         hirschberg(rowSplit + 1, rEnd, columnSplit + 1, lEnd);
760
761         return distance;
762     }
763
764     private RangeDifference[] buildDifferencesHirschberg() {
765         fDiffs= new LinkedList JavaDoc();
766
767         RangeDifference difference= null;
768         int previousColumn= 0;
769
770         for (int row= 1; row < fOptimalSplitColumn.length; row++) {
771             int previousRow= row - 1;
772             int column= fOptimalSplitColumn[row]; // from (row-1), jump to column (column) in row (row)
773

774             if (column == previousColumn + 1) {
775                 // diagonal
776
if (fOptimalSplitValues[row]) {
777                     // match
778
// alternatively, create NOCHANGE ranges for findRanges
779
difference= null;
780                 } else {
781                     // change
782
difference= getChange(difference, previousRow, previousColumn);
783                     difference.fLeftLength++;
784                     difference.fRightLength++;
785                 }
786             } else if (column == previousColumn) {
787                 // downwards / insert
788
difference= getChange(difference, previousRow, previousColumn);
789                 difference.fRightLength++;
790
791             } else if (column > previousColumn) {
792                 // rightward / deletes
793
difference= getChange(difference, previousRow, previousColumn);
794                 difference.fLeftLength += column - previousColumn - 1;
795             } else {
796                 if (ASSERT) Assert.isTrue(false, "Illegal edit description"); //$NON-NLS-1$
797
}
798
799             previousColumn= column;
800         }
801
802         if (previousColumn < fLeft.getRangeCount()) {
803
804             // trailing deletions
805
difference= getChange(difference, fOptimalSplitColumn.length - 1, previousColumn);
806             difference.fLeftLength += fLeft.getRangeCount() - previousColumn;
807         }
808
809         return (RangeDifference[]) fDiffs.toArray(new RangeDifference[fDiffs.size()]);
810     }
811
812     private RangeDifference getChange(RangeDifference difference, int row, int column) {
813         if (difference != null)
814             return difference;
815
816         difference= new RangeDifference(RangeDifference.CHANGE, row, 0, column, 0);
817         fDiffs.add(difference);
818         return difference;
819     }
820
821     /* pretty printing for debug output */
822
823     private void printRow() {
824         if (fMatrix != null)
825             print(fMatrix[fRow]);
826         else
827             print(fCurrentRow);
828     }
829
830     private static void printHeader(IRangeComparator left, IRangeComparator right) {
831         System.out.println("============================="); //$NON-NLS-1$
832
System.out.println("= s1: " + left.toString()); //$NON-NLS-1$
833
System.out.println("= s2: " + right.toString()); //$NON-NLS-1$
834
System.out.println();
835     }
836
837     private static void print(int[] row) {
838         for (int i= 0; i < row.length; i++) {
839             System.out.print("\t" + (row[i] == Integer.MAX_VALUE ? "-" : "" + row[i])); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
840
}
841         System.out.println();
842     }
843
844 }
845
Popular Tags