KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > compare > internal > LCS


1 /*******************************************************************************
2  * Copyright (c) 2000, 2006 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.compare.internal;
12
13 import org.eclipse.core.runtime.OperationCanceledException;
14 import org.eclipse.core.runtime.SubMonitor;
15
16
17 /* Used to determine the change set responsible for each line */
18 public abstract class LCS {
19     private static final double TOO_LONG = 10000000.0; // the value of N*M when
20
// to start binding the
21
// run time
22

23     private static final double POW_LIMIT = 1.5; // limit the time to
24
// D^POW_LIMIT
25

26     private int max_differences; // the maximum number of differences from
27
// each end to consider
28

29     private int length;
30
31     /**
32      * Myers' algorithm for longest common subsequence. O((M + N)D) worst case
33      * time, O(M + N + D^2) expected time, O(M + N) space
34      * (http://citeseer.ist.psu.edu/myers86ond.html)
35      *
36      * Note: Beyond implementing the algorithm as described in the paper I have
37      * added diagonal range compression which helps when finding the LCS of a
38      * very long and a very short sequence, also bound the running time to (N +
39      * M)^1.5 when both sequences are very long.
40      *
41      * After this method is called, the longest common subsequence is available
42      * by calling getResult() where result[0] is composed of
43      * entries from l1 and result[1] is composed of entries from l2
44      * @param subMonitor
45      */

46     public void longestCommonSubsequence(SubMonitor subMonitor) {
47         int length1 = getLength1();
48         int length2 = getLength2();
49         if (length1 == 0 || length2 == 0) {
50             length = 0;
51             return;
52         }
53
54         max_differences = (length1 + length2 + 1) / 2; // ceil((N+M)/2)
55
if ((double) length1 * (double) length2 > TOO_LONG) {
56             // limit complexity to D^POW_LIMIT for long sequences
57
max_differences = (int) Math.pow(max_differences, POW_LIMIT - 1.0);
58         }
59
60         initializeLcs(length1);
61         
62         subMonitor.beginTask(null, length1);
63         
64         /*
65          * The common prefixes and suffixes are always part of some LCS, include
66          * them now to reduce our search space
67          */

68         int forwardBound;
69         int max = Math.min(length1, length2);
70         for (forwardBound = 0; forwardBound < max
71                 && isRangeEqual(forwardBound, forwardBound); forwardBound++) {
72             setLcs(forwardBound, forwardBound);
73             worked(subMonitor, 1);
74         }
75
76         int backBoundL1 = length1 - 1;
77         int backBoundL2 = length2 - 1;
78
79         while (backBoundL1 >= forwardBound && backBoundL2 >= forwardBound
80                 && isRangeEqual(backBoundL1, backBoundL2)) {
81             setLcs(backBoundL1, backBoundL2);
82             backBoundL1--;
83             backBoundL2--;
84             worked(subMonitor, 1);
85         }
86
87         length = forwardBound
88                 + length1
89                 - backBoundL1
90                 - 1
91                 + lcs_rec(forwardBound, backBoundL1, forwardBound,
92                         backBoundL2, new int[2][length1 + length2 + 1],
93                         new int[3], subMonitor);
94
95     }
96
97     /**
98      * The recursive helper function for Myers' LCS. Computes the LCS of
99      * l1[bottoml1 .. topl1] and l2[bottoml2 .. topl2] fills in the appropriate
100      * location in lcs and returns the length
101      *
102      * @param l1 The 1st sequence
103      * @param bottoml1 Index in the 1st sequence to start from (inclusive)
104      * @param topl1 Index in the 1st sequence to end on (inclusive)
105      * @param l2 The 2nd sequence
106      * @param bottoml2 Index in the 2nd sequence to start from (inclusive)
107      * @param topl2 Index in the 2nd sequence to end on (inclusive)
108      * @param V should be allocated as int[2][l1.length + l2.length + 1], used
109      * to store furthest reaching D-paths
110      * @param snake should be allocated as int[3], used to store the beginning
111      * x, y coordinates and the length of the latest snake traversed
112      * @param subMonitor
113      * @param lcs should be allocated as TextLine[2][l1.length], used to store
114      * the common points found to be part of the LCS where lcs[0]
115      * references lines of l1 and lcs[1] references lines of l2.
116      *
117      * @return the length of the LCS
118      */

119     private int lcs_rec(
120             int bottoml1, int topl1,
121             int bottoml2, int topl2,
122             int[][] V, int[] snake, SubMonitor subMonitor) {
123
124         // check that both sequences are non-empty
125
if (bottoml1 > topl1 || bottoml2 > topl2) {
126             return 0;
127         }
128
129         int d = find_middle_snake(bottoml1, topl1, bottoml2, topl2, V, snake);
130         // System.out.println(snake[0] + " " + snake[1] + " " + snake[2]);
131

132         // need to store these so we don't lose them when they're overwritten by
133
// the recursion
134
int len = snake[2];
135         int startx = snake[0];
136         int starty = snake[1];
137
138         // the middle snake is part of the LCS, store it
139
for (int i = 0; i < len; i++) {
140             setLcs(startx + i, starty + i);
141             worked(subMonitor, 1);
142         }
143
144         if (d > 1) {
145             return len
146                     + lcs_rec(bottoml1, startx - 1, bottoml2, starty - 1, V, snake, subMonitor)
147                     + lcs_rec(startx + len, topl1, starty + len, topl2, V, snake, subMonitor);
148         } else if (d == 1) {
149             /*
150              * In this case the sequences differ by exactly 1 line. We have
151              * already saved all the lines after the difference in the for loop
152              * above, now we need to save all the lines before the difference.
153              */

154             int max = Math.min(startx - bottoml1, starty - bottoml2);
155             for (int i = 0; i < max; i++) {
156                 setLcs(bottoml1 + i, bottoml2 + i);
157                 worked(subMonitor, 1);
158             }
159             return max + len;
160         }
161
162         return len;
163     }
164
165     private void worked(SubMonitor subMonitor, int work) {
166         if (subMonitor.isCanceled())
167             throw new OperationCanceledException();
168         subMonitor.worked(work);
169     }
170
171     /**
172      * Helper function for Myers' LCS algorithm to find the middle snake for
173      * l1[bottoml1..topl1] and l2[bottoml2..topl2] The x, y coodrdinates of the
174      * start of the middle snake are saved in snake[0], snake[1] respectively
175      * and the length of the snake is saved in s[2].
176      *
177      * @param l1 The 1st sequence
178      * @param bottoml1 Index in the 1st sequence to start from (inclusive)
179      * @param topl1 Index in the 1st sequence to end on (inclusive)
180      * @param l2 The 2nd sequence
181      * @param bottoml2 Index in the 2nd sequence to start from (inclusive)
182      * @param topl2 Index in the 2nd sequence to end on (inclusive)
183      * @param V should be allocated as int[2][l1.length + l2.length + 1], used
184      * to store furthest reaching D-paths
185      * @param snake should be allocated as int[3], used to store the beginning
186      * x, y coordinates and the length of the middle snake
187      *
188      * @return The number of differences (SES) between l1[bottoml1..topl1] and
189      * l2[bottoml2..topl2]
190      */

191     private int find_middle_snake(
192             int bottoml1, int topl1,
193             int bottoml2, int topl2,
194             int[][] V, int[] snake) {
195         int N = topl1 - bottoml1 + 1;
196         int M = topl2 - bottoml2 + 1;
197         // System.out.println("N: " + N + " M: " + M + " bottom: " + bottoml1 +
198
// ", " +
199
// bottoml2 + " top: " + topl1 + ", " + topl2);
200
int delta = N - M;
201         boolean isEven;
202         if ((delta & 1) == 1) {
203             isEven = false;
204         } else {
205             isEven = true;
206         }
207
208         int limit = Math.min(max_differences, (N + M + 1) / 2); // ceil((N+M)/2)
209

210         int value_to_add_forward; // a 0 or 1 that we add to the start offset
211
// to make it odd/even
212
if ((M & 1) == 1) {
213             value_to_add_forward = 1;
214         } else {
215             value_to_add_forward = 0;
216         }
217
218         int value_to_add_backward;
219         if ((N & 1) == 1) {
220             value_to_add_backward = 1;
221         } else {
222             value_to_add_backward = 0;
223         }
224
225         int start_forward = -M;
226         int end_forward = N;
227         int start_backward = -N;
228         int end_backward = M;
229
230         V[0][limit + 1] = 0;
231         V[1][limit - 1] = N;
232         for (int d = 0; d <= limit; d++) {
233
234             int start_diag = Math.max(value_to_add_forward + start_forward, -d);
235             int end_diag = Math.min(end_forward, d);
236             value_to_add_forward = 1 - value_to_add_forward;
237
238             // compute forward furthest reaching paths
239
for (int k = start_diag; k <= end_diag; k += 2) {
240                 int x;
241                 if (k == -d
242                         || (k < d && V[0][limit + k - 1] < V[0][limit + k + 1])) {
243                     x = V[0][limit + k + 1];
244                 } else {
245                     x = V[0][limit + k - 1] + 1;
246                 }
247
248                 int y = x - k;
249
250                 snake[0] = x + bottoml1;
251                 snake[1] = y + bottoml2;
252                 snake[2] = 0;
253                 // System.out.println("1 x: " + x + " y: " + y + " k: " + k + "
254
// d: " + d );
255
while (x < N && y < M
256                         && isRangeEqual(x + bottoml1, y + bottoml2)) {
257                     x++;
258                     y++;
259                     snake[2]++;
260                 }
261                 V[0][limit + k] = x;
262                 // System.out.println(x + " " + V[1][limit+k -delta] + " " + k +
263
// " " + delta);
264
if (!isEven && k >= delta - d + 1 && k <= delta + d - 1
265                         && x >= V[1][limit + k - delta]) {
266                     // System.out.println("Returning: " + (2*d-1));
267
return 2 * d - 1;
268                 }
269
270                 // check to see if we can cut down the diagonal range
271
if (x >= N && end_forward > k - 1) {
272                     end_forward = k - 1;
273                 } else if (y >= M) {
274                     start_forward = k + 1;
275                     value_to_add_forward = 0;
276                 }
277             }
278
279             start_diag = Math.max(value_to_add_backward + start_backward, -d);
280             end_diag = Math.min(end_backward, d);
281             value_to_add_backward = 1 - value_to_add_backward;
282
283             // compute backward furthest reaching paths
284
for (int k = start_diag; k <= end_diag; k += 2) {
285                 int x;
286                 if (k == d
287                         || (k != -d && V[1][limit + k - 1] < V[1][limit + k + 1])) {
288                     x = V[1][limit + k - 1];
289                 } else {
290                     x = V[1][limit + k + 1] - 1;
291                 }
292
293                 int y = x - k - delta;
294
295                 snake[2] = 0;
296                 // System.out.println("2 x: " + x + " y: " + y + " k: " + k + "
297
// d: " + d);
298
while (x > 0 && y > 0
299                         && isRangeEqual(x - 1 + bottoml1, y - 1 + bottoml2)) {
300                     x--;
301                     y--;
302                     snake[2]++;
303                 }
304                 V[1][limit + k] = x;
305
306                 if (isEven && k >= -delta - d && k <= d - delta
307                         && x <= V[0][limit + k + delta]) {
308                     // System.out.println("Returning: " + 2*d);
309
snake[0] = bottoml1 + x;
310                     snake[1] = bottoml2 + y;
311
312                     return 2 * d;
313                 }
314
315                 // check to see if we can cut down our diagonal range
316
if (x <= 0) {
317                     start_backward = k + 1;
318                     value_to_add_backward = 0;
319                 } else if (y <= 0 && end_backward > k - 1) {
320                     end_backward = k - 1;
321                 }
322             }
323         }
324
325         /*
326          * computing the true LCS is too expensive, instead find the diagonal
327          * with the most progress and pretend a midle snake of length 0 occurs
328          * there.
329          */

330
331         int[] most_progress = findMostProgress(M, N, limit, V);
332
333         snake[0] = bottoml1 + most_progress[0];
334         snake[1] = bottoml2 + most_progress[1];
335         snake[2] = 0;
336         return 5; /*
337                      * HACK: since we didn't really finish the LCS computation
338                      * we don't really know the length of the SES. We don't do
339                      * anything with the result anyway, unless it's <=1. We know
340                      * for a fact SES > 1 so 5 is as good a number as any to
341                      * return here
342                      */

343     }
344
345     /**
346      * Takes the array with furthest reaching D-paths from an LCS computation
347      * and returns the x,y coordinates and progress made in the middle diagonal
348      * among those with maximum progress, both from the front and from the back.
349      *
350      * @param M the length of the 1st sequence for which LCS is being computed
351      * @param N the length of the 2nd sequence for which LCS is being computed
352      * @param limit the number of steps made in an attempt to find the LCS from
353      * the front and back
354      * @param V the array storing the furthest reaching D-paths for the LCS
355      * computation
356      * @return The result as an array of 3 integers where result[0] is the x
357      * coordinate of the current location in the diagonal with the most
358      * progress, result[1] is the y coordinate of the current location
359      * in the diagonal with the most progress and result[2] is the
360      * amount of progress made in that diagonal
361      */

362     private static int[] findMostProgress(int M, int N, int limit, int[][] V) {
363         int delta = N - M;
364
365         int forward_start_diag;
366         if ((M & 1) == (limit & 1)) {
367             forward_start_diag = Math.max(-M, -limit);
368         } else {
369             forward_start_diag = Math.max(1 - M, -limit);
370         }
371
372         int forward_end_diag = Math.min(N, limit);
373
374         int backward_start_diag;
375         if ((N & 1) == (limit & 1)) {
376             backward_start_diag = Math.max(-N, -limit);
377         } else {
378             backward_start_diag = Math.max(1 - N, -limit);
379         }
380
381         int backward_end_diag = Math.min(M, limit);
382
383         int[][] max_progress = new int[Math.max(forward_end_diag
384                 - forward_start_diag, backward_end_diag - backward_start_diag) / 2 + 1][3];
385         int num_progress = 0; // the 1st entry is current, it is initialized
386
// with 0s
387

388         // first search the forward diagonals
389
for (int k = forward_start_diag; k <= forward_end_diag; k += 2) {
390             int x = V[0][limit + k];
391             int y = x - k;
392             if (x > N || y > M) {
393                 continue;
394             }
395
396             int progress = x + y;
397             if (progress > max_progress[0][2]) {
398                 num_progress = 0;
399                 max_progress[0][0] = x;
400                 max_progress[0][1] = y;
401                 max_progress[0][2] = progress;
402             } else if (progress == max_progress[0][2]) {
403                 num_progress++;
404                 max_progress[num_progress][0] = x;
405                 max_progress[num_progress][1] = y;
406                 max_progress[num_progress][2] = progress;
407             }
408         }
409
410         boolean max_progress_forward = true; // initially the maximum
411
// progress is in the forward
412
// direction
413

414         // now search the backward diagonals
415
for (int k = backward_start_diag; k <= backward_end_diag; k += 2) {
416             int x = V[1][limit + k];
417             int y = x - k - delta;
418             if (x < 0 || y < 0) {
419                 continue;
420             }
421
422             int progress = N - x + M - y;
423             if (progress > max_progress[0][2]) {
424                 num_progress = 0;
425                 max_progress_forward = false;
426                 max_progress[0][0] = x;
427                 max_progress[0][1] = y;
428                 max_progress[0][2] = progress;
429             } else if (progress == max_progress[0][2] && !max_progress_forward) {
430                 num_progress++;
431                 max_progress[num_progress][0] = x;
432                 max_progress[num_progress][1] = y;
433                 max_progress[num_progress][2] = progress;
434             }
435         }
436
437         // return the middle diagonal with maximal progress.
438
return max_progress[num_progress / 2];
439     }
440     
441     protected abstract int getLength2();
442
443     protected abstract int getLength1();
444     
445     protected abstract boolean isRangeEqual(int i1, int i2);
446     
447     protected abstract void setLcs(int sl1, int sl2);
448     
449     protected abstract void initializeLcs(int lcsLength);
450
451     public int getLength() {
452         return length;
453     }
454 }
455
Popular Tags