KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > jdiff > DiffMyers


1 package jdiff;
2
3 import java.io.*;
4 import java.util.*;
5
6 /** A class to compare vectors of objects. The result of comparison
7     is a list of <code>change</code> objects which form an
8     edit script. The objects compared are traditionally lines
9     of text from two files. Comparison options such as "ignore
10     whitespace" are implemented by modifying the <code>equals</code>
11     and <code>hashcode</code> methods for the objects compared.
12 <p>
13    The basic algorithm is described in: </br>
14    "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
15    Algorithmica Vol. 1 No. 2, 1986, p 251.
16 <p>
17    This class outputs different results from GNU diff 1.15 on some
18    inputs. Our results are actually better (smaller change list, smaller
19    total size of changes), but it would be nice to know why. Perhaps
20    there is a memory overwrite bug in GNU diff 1.15.
21
22   @author Stuart D. Gathman, translated from GNU diff 1.15
23     Copyright (C) 2000 Business Management Systems, Inc.
24 <p>
25     This program is free software; you can redistribute it and/or modify
26     it under the terms of the GNU General Public License as published by
27     the Free Software Foundation; either version 1, or (at your option)
28     any later version.
29 <p>
30     This program is distributed in the hope that it will be useful,
31     but WITHOUT ANY WARRANTY; without even the implied warranty of
32     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33     GNU General Public License for more details.
34 <p>
35     You should have received a copy of the <a HREF=COPYING.txt>
36     GNU General Public License</a>
37     along with this program; if not, write to the Free Software
38     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
39
40  */

41
42 public class DiffMyers
43 {
44
45   /** Prepare to find differences between two arrays. Each element of
46       the arrays is translated to an "equivalence number" based on
47       the result of <code>equals</code>. The original Object arrays
48       are no longer needed for computing the differences. They will
49       be needed again later to print the results of the comparison as
50       an edit script, if desired.
51    */

52   public DiffMyers(Object JavaDoc[] a,Object JavaDoc[] b)
53   {
54     Hashtable h = new Hashtable(a.length + b.length);
55     filevec[0] = new file_data(a,h);
56     filevec[1] = new file_data(b,h);
57   }
58
59   /** 1 more than the maximum equivalence value used for this or its
60      sibling file. */

61   private int equiv_max = 1;
62
63   /** When set to true, the comparison uses a heuristic to speed it up.
64     With this heuristic, for files with a constant small density
65     of changes, the algorithm is linear in the file size. */

66   public boolean heuristic = false;
67
68   /** When set to true, the algorithm returns a guarranteed minimal
69       set of changes. This makes things slower, sometimes much slower. */

70   public boolean no_discards = false;
71
72   private int[] xvec, yvec; /* Vectors being compared. */
73   private int[] fdiag; /* Vector, indexed by diagonal, containing
74                                    the X coordinate of the point furthest
75                                    along the given diagonal in the forward
76                                    search of the edit matrix. */

77   private int[] bdiag; /* Vector, indexed by diagonal, containing
78                                    the X coordinate of the point furthest
79                                    along the given diagonal in the backward
80                                    search of the edit matrix. */

81   private int fdiagoff, bdiagoff;
82   private final file_data[] filevec = new file_data[2];
83   private int cost;
84
85   /** Find the midpoint of the shortest edit script for a specified
86      portion of the two files.
87
88      We scan from the beginnings of the files, and simultaneously from the ends,
89      doing a breadth-first search through the space of edit-sequence.
90      When the two searches meet, we have found the midpoint of the shortest
91      edit sequence.
92
93      The value returned is the number of the diagonal on which the midpoint lies.
94      The diagonal number equals the number of inserted lines minus the number
95      of deleted lines (counting only lines before the midpoint).
96      The edit cost is stored into COST; this is the total number of
97      lines inserted or deleted (counting only lines before the midpoint).
98
99      This function assumes that the first lines of the specified portions
100      of the two files do not match, and likewise that the last lines do not
101      match. The caller must trim matching lines from the beginning and end
102      of the portions it is going to specify.
103
104      Note that if we return the "wrong" diagonal value, or if
105      the value of bdiag at that diagonal is "wrong",
106      the worst this can do is cause suboptimal diff output.
107      It cannot cause incorrect diff output. */

108
109   private int diag (int xoff, int xlim, int yoff, int ylim)
110   {
111     final int[] fd = fdiag; // Give the compiler a chance.
112
final int[] bd = bdiag; // Additional help for the compiler.
113
final int[] xv = xvec; // Still more help for the compiler.
114
final int[] yv = yvec; // And more and more . . .
115
final int dmin = xoff - ylim; // Minimum valid diagonal.
116
final int dmax = xlim - yoff; // Maximum valid diagonal.
117
final int fmid = xoff - yoff; // Center diagonal of top-down search.
118
final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
119
int fmin = fmid, fmax = fmid; // Limits of top-down search.
120
int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
121
/* True if southeast corner is on an odd
122                                      diagonal with respect to the northwest. */

123     final boolean odd = (fmid - bmid & 1) != 0;
124
125     fd[fdiagoff + fmid] = xoff;
126     bd[bdiagoff + bmid] = xlim;
127
128     for (int c = 1;; ++c)
129       {
130         int d; /* Active diagonal. */
131         boolean big_snake = false;
132
133         /* Extend the top-down search by an edit step in each diagonal. */
134         if (fmin > dmin)
135           fd[fdiagoff + --fmin - 1] = -1;
136         else
137           ++fmin;
138         if (fmax < dmax)
139           fd[fdiagoff + ++fmax + 1] = -1;
140         else
141           --fmax;
142         for (d = fmax; d >= fmin; d -= 2)
143           {
144             int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
145
146             if (tlo >= thi)
147               x = tlo + 1;
148             else
149               x = thi;
150             oldx = x;
151             y = x - d;
152             while (x < xlim && y < ylim && xv[x] == yv[y]) {
153               ++x; ++y;
154             }
155             if (x - oldx > 20)
156               big_snake = true;
157             fd[fdiagoff + d] = x;
158             if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
159               {
160                 cost = 2 * c - 1;
161                 return d;
162               }
163           }
164
165         /* Similar extend the bottom-up search. */
166         if (bmin > dmin)
167           bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
168         else
169           ++bmin;
170         if (bmax < dmax)
171           bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
172         else
173           --bmax;
174         for (d = bmax; d >= bmin; d -= 2)
175           {
176             int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
177
178             if (tlo < thi)
179               x = tlo;
180             else
181               x = thi - 1;
182             oldx = x;
183             y = x - d;
184             while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
185               --x; --y;
186             }
187             if (oldx - x > 20)
188               big_snake = true;
189             bd[bdiagoff + d] = x;
190             if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
191               {
192                 cost = 2 * c;
193                 return d;
194               }
195           }
196
197         /* Heuristic: check occasionally for a diagonal that has made
198            lots of progress compared with the edit distance.
199            If we have any such, find the one that has made the most
200            progress and return it as if it had succeeded.
201
202            With this heuristic, for files with a constant small density
203            of changes, the algorithm is linear in the file size. */

204
205         if (c > 200 && big_snake && heuristic)
206           {
207             int best = 0;
208             int bestpos = -1;
209
210             for (d = fmax; d >= fmin; d -= 2)
211               {
212                 int dd = d - fmid;
213                 if ((fd[fdiagoff + d] - xoff)*2 - dd > 12 * (c + (dd > 0 ? dd : -dd)))
214                   {
215                     if (fd[fdiagoff + d] * 2 - dd > best
216                         && fd[fdiagoff + d] - xoff > 20
217                         && fd[fdiagoff + d] - d - yoff > 20)
218                       {
219                         int k;
220                         int x = fd[fdiagoff + d];
221
222                         /* We have a good enough best diagonal;
223                            now insist that it end with a significant snake. */

224                         for (k = 1; k <= 20; k++)
225                           if (xvec[x - k] != yvec[x - d - k])
226                             break;
227
228                         if (k == 21)
229                           {
230                             best = fd[fdiagoff + d] * 2 - dd;
231                             bestpos = d;
232                           }
233                       }
234                   }
235               }
236             if (best > 0)
237               {
238                 cost = 2 * c - 1;
239                 return bestpos;
240               }
241
242             best = 0;
243             for (d = bmax; d >= bmin; d -= 2)
244               {
245                 int dd = d - bmid;
246                 if ((xlim - bd[bdiagoff + d])*2 + dd > 12 * (c + (dd > 0 ? dd : -dd)))
247                   {
248                     if ((xlim - bd[bdiagoff + d]) * 2 + dd > best
249                         && xlim - bd[bdiagoff + d] > 20
250                         && ylim - (bd[bdiagoff + d] - d) > 20)
251                       {
252                         /* We have a good enough best diagonal;
253                            now insist that it end with a significant snake. */

254                         int k;
255                         int x = bd[bdiagoff + d];
256
257                         for (k = 0; k < 20; k++)
258                           if (xvec[x + k] != yvec[x - d + k])
259                             break;
260                         if (k == 20)
261                           {
262                             best = (xlim - bd[bdiagoff + d]) * 2 + dd;
263                             bestpos = d;
264                           }
265                       }
266                   }
267               }
268             if (best > 0)
269               {
270                 cost = 2 * c - 1;
271                 return bestpos;
272               }
273           }
274       }
275   }
276
277   /** Compare in detail contiguous subsequences of the two files
278      which are known, as a whole, to match each other.
279
280      The results are recorded in the vectors filevec[N].changed_flag, by
281      storing a 1 in the element for each line that is an insertion or deletion.
282
283      The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
284
285      Note that XLIM, YLIM are exclusive bounds.
286      All line numbers are origin-0 and discarded lines are not counted. */

287
288   private void compareseq (int xoff, int xlim, int yoff, int ylim) {
289     /* Slide down the bottom initial diagonal. */
290     while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
291       ++xoff; ++yoff;
292     }
293     /* Slide up the top initial diagonal. */
294     while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
295       --xlim; --ylim;
296     }
297     
298     /* Handle simple cases. */
299     if (xoff == xlim)
300       while (yoff < ylim)
301         filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
302     else if (yoff == ylim)
303       while (xoff < xlim)
304         filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
305     else
306       {
307         /* Find a point of correspondence in the middle of the files. */
308
309         int d = diag (xoff, xlim, yoff, ylim);
310         int c = cost;
311         int f = fdiag[fdiagoff + d];
312         int b = bdiag[bdiagoff + d];
313
314         if (c == 1)
315           {
316             /* This should be impossible, because it implies that
317                one of the two subsequences is empty,
318                and that case was handled above without calling `diag'.
319                Let's verify that this is true. */

320             throw new IllegalArgumentException JavaDoc("Empty subsequence");
321           }
322         else
323           {
324             /* Use that point to split this problem into two subproblems. */
325             compareseq (xoff, b, yoff, b - d);
326             /* This used to use f instead of b,
327                but that is incorrect!
328                It is not necessarily the case that diagonal d
329                has a snake from b to f. */

330             compareseq (b, xlim, b - d, ylim);
331           }
332       }
333   }
334
335   /** Discard lines from one file that have no matches in the other file.
336    */

337
338   private void discard_confusing_lines() {
339     filevec[0].discard_confusing_lines(filevec[1]);
340     filevec[1].discard_confusing_lines(filevec[0]);
341   }
342
343   private boolean inhibit = false;
344
345   /** Adjust inserts/deletes of blank lines to join changes
346      as much as possible.
347    */

348
349   private void shift_boundaries() {
350     if (inhibit)
351       return;
352     filevec[0].shift_boundaries(filevec[1]);
353     filevec[1].shift_boundaries(filevec[0]);
354   }
355
356   /** Scan the tables of which lines are inserted and deleted,
357      producing an edit script in reverse order. */

358
359   private change build_reverse_script() {
360     change script = null;
361     final boolean[] changed0 = filevec[0].changed_flag;
362     final boolean[] changed1 = filevec[1].changed_flag;
363     final int len0 = filevec[0].buffered_lines;
364     final int len1 = filevec[1].buffered_lines;
365
366     /* Note that changedN[len0] does exist, and contains 0. */
367
368     int i0 = 0, i1 = 0;
369
370     while (i0 < len0 || i1 < len1)
371       {
372         if (changed0[1+i0] || changed1[1+i1])
373           {
374             int line0 = i0, line1 = i1;
375
376             /* Find # lines changed here in each file. */
377             while (changed0[1+i0]) ++i0;
378             while (changed1[1+i1]) ++i1;
379
380             /* Record this change. */
381             script = new change(line0, line1, i0 - line0, i1 - line1, script);
382           }
383
384         /* We have reached lines in the two files that match each other. */
385         i0++; i1++;
386       }
387
388     return script;
389   }
390
391   /** Scan the tables of which lines are inserted and deleted,
392      producing an edit script in forward order. */

393
394   private change build_script() {
395     change script = null;
396     final boolean[] changed0 = filevec[0].changed_flag;
397     final boolean[] changed1 = filevec[1].changed_flag;
398     final int len0 = filevec[0].buffered_lines;
399     final int len1 = filevec[1].buffered_lines;
400     int i0 = len0, i1 = len1;
401
402     /* Note that changedN[-1] does exist, and contains 0. */
403
404     while (i0 >= 0 || i1 >= 0)
405       {
406         if (changed0[i0] || changed1[i1])
407           {
408             int line0 = i0, line1 = i1;
409
410             /* Find # lines changed here in each file. */
411             while (changed0[i0]) --i0;
412             while (changed1[i1]) --i1;
413
414             /* Record this change. */
415             script = new change(i0, i1, line0 - i0, line1 - i1, script);
416           }
417
418         /* We have reached lines in the two files that match each other. */
419         i0--; i1--;
420       }
421
422     return script;
423   }
424
425   /* Report the differences of two files. DEPTH is the current directory
426      depth. */

427   public change diff_2(final boolean reverse) {
428
429     /* Some lines are obviously insertions or deletions
430        because they don't match anything. Detect them now,
431        and avoid even thinking about them in the main comparison algorithm. */

432
433     discard_confusing_lines ();
434
435     /* Now do the main comparison algorithm, considering just the
436        undiscarded lines. */

437
438     xvec = filevec[0].undiscarded;
439     yvec = filevec[1].undiscarded;
440
441     int diags =
442       filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
443     fdiag = new int[diags];
444     fdiagoff = filevec[1].nondiscarded_lines + 1;
445     bdiag = new int[diags];
446     bdiagoff = filevec[1].nondiscarded_lines + 1;
447
448     compareseq (0, filevec[0].nondiscarded_lines,
449                 0, filevec[1].nondiscarded_lines);
450     fdiag = null;
451     bdiag = null;
452
453     /* Modify the results slightly to make them prettier
454        in cases where that can validly be done. */

455
456     shift_boundaries ();
457
458     /* Get the results of comparison in the form of a chain
459        of `struct change's -- an edit script. */

460
461     if (reverse)
462       return build_reverse_script();
463     else
464       return build_script();
465   }
466
467   /** The result of comparison is an "edit script": a chain of change objects.
468      Each change represents one place where some lines are deleted
469      and some are inserted.
470      
471      LINE0 and LINE1 are the first affected lines in the two files (origin 0).
472      DELETED is the number of lines deleted here from file 0.
473      INSERTED is the number of lines inserted here in file 1.
474
475      If DELETED is 0 then LINE0 is the number of the line before
476      which the insertion was done; vice versa for INSERTED and LINE1. */

477
478   public static class change {
479     /** Previous or next edit command. */
480     public change link;
481     /** # lines of file 1 changed here. */
482     public int inserted;
483     /** # lines of file 0 changed here. */
484     public int deleted;
485     /** Line number of 1st deleted line. */
486     public final int line0;
487     /** Line number of 1st inserted line. */
488     public final int line1;
489
490     /** Cons an additional entry onto the front of an edit script OLD.
491        LINE0 and LINE1 are the first affected lines in the two files (origin 0).
492        DELETED is the number of lines deleted here from file 0.
493        INSERTED is the number of lines inserted here in file 1.
494
495        If DELETED is 0 then LINE0 is the number of the line before
496        which the insertion was done; vice versa for INSERTED and LINE1. */

497     change(int line0, int line1, int deleted, int inserted, change old) {
498       this.line0 = line0;
499       this.line1 = line1;
500       this.inserted = inserted;
501       this.deleted = deleted;
502       this.link = old;
503       //System.err.println(line0+","+line1+","+inserted+","+deleted);
504
}
505   }
506
507   /** Data on one input file being compared.
508    */

509
510   class file_data {
511
512     /** Allocate changed array for the results of comparison. */
513     void clear() {
514       /* Allocate a flag for each line of each file, saying whether that line
515          is an insertion or deletion.
516          Allocate an extra element, always zero, at each end of each vector.
517        */

518       changed_flag = new boolean[buffered_lines + 2];
519     }
520
521     /** Return equiv_count[I] as the number of lines in this file
522        that fall in equivalence class I.
523          @return the array of equivalence class counts.
524      */

525     int[] equivCount() {
526       int[] equiv_count = new int[equiv_max];
527       for (int i = 0; i < buffered_lines; ++i)
528         ++equiv_count[equivs[i]];
529       return equiv_count;
530     }
531
532     /** Discard lines that have no matches in another file.
533
534        A line which is discarded will not be considered by the actual
535        comparison algorithm; it will be as if that line were not in the file.
536        The file's `realindexes' table maps virtual line numbers
537        (which don't count the discarded lines) into real line numbers;
538        this is how the actual comparison algorithm produces results
539        that are comprehensible when the discarded lines are counted.
540 <p>
541        When we discard a line, we also mark it as a deletion or insertion
542        so that it will be printed in the output.
543       @param f the other file
544      */

545     void discard_confusing_lines(file_data f) {
546       clear();
547     /* Set up table of which lines are going to be discarded. */
548       final byte[] discarded = discardable(f.equivCount());
549
550     /* Don't really discard the provisional lines except when they occur
551        in a run of discardables, with nonprovisionals at the beginning
552        and end. */

553       filterDiscards(discarded);
554
555     /* Actually discard the lines. */
556       discard(discarded);
557     }
558
559     /** Mark to be discarded each line that matches no line of another file.
560        If a line matches many lines, mark it as provisionally discardable.
561        @see equivCount()
562        @param counts The count of each equivalence number for the other file.
563        @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
564                for each line
565      */

566
567     private byte[] discardable(final int[] counts) {
568       final int end = buffered_lines;
569       final byte[] discards = new byte[end];
570       final int[] equivs = this.equivs;
571       int many = 5;
572       int tem = end / 64;
573
574       /* Multiply MANY by approximate square root of number of lines.
575          That is the threshold for provisionally discardable lines. */

576       while ((tem = tem >> 2) > 0)
577         many *= 2;
578
579       for (int i = 0; i < end; i++)
580         {
581           int nmatch;
582           if (equivs[i] == 0)
583             continue;
584           nmatch = counts[equivs[i]];
585           if (nmatch == 0)
586             discards[i] = 1;
587           else if (nmatch > many)
588             discards[i] = 2;
589         }
590       return discards;
591     }
592
593     /** Don't really discard the provisional lines except when they occur
594        in a run of discardables, with nonprovisionals at the beginning
595        and end. */

596
597     private void filterDiscards(final byte[] discards) {
598         final int end = buffered_lines;
599
600         for (int i = 0; i < end; i++)
601           {
602             /* Cancel provisional discards not in middle of run of discards. */
603             if (discards[i] == 2)
604               discards[i] = 0;
605             else if (discards[i] != 0)
606               {
607                 /* We have found a nonprovisional discard. */
608                 int j;
609                 int length;
610                 int provisional = 0;
611
612                 /* Find end of this run of discardable lines.
613                    Count how many are provisionally discardable. */

614                 for (j = i; j < end; j++)
615                   {
616                     if (discards[j] == 0)
617                       break;
618                     if (discards[j] == 2)
619                       ++provisional;
620                   }
621
622                 /* Cancel provisional discards at end, and shrink the run. */
623                 while (j > i && discards[j - 1] == 2) {
624                   discards[--j] = 0; --provisional;
625                 }
626
627                 /* Now we have the length of a run of discardable lines
628                    whose first and last are not provisional. */

629                 length = j - i;
630
631                 /* If 1/4 of the lines in the run are provisional,
632                    cancel discarding of all provisional lines in the run. */

633                 if (provisional * 4 > length)
634                   {
635                     while (j > i)
636                       if (discards[--j] == 2)
637                         discards[j] = 0;
638                   }
639                 else
640                   {
641                     int consec;
642                     int minimum = 1;
643                     int tem = length / 4;
644
645                     /* MINIMUM is approximate square root of LENGTH/4.
646                        A subrun of two or more provisionals can stand
647                        when LENGTH is at least 16.
648                        A subrun of 4 or more can stand when LENGTH >= 64. */

649                     while ((tem = tem >> 2) > 0)
650                       minimum *= 2;
651                     minimum++;
652
653                     /* Cancel any subrun of MINIMUM or more provisionals
654                        within the larger run. */

655                     for (j = 0, consec = 0; j < length; j++)
656                       if (discards[i + j] != 2)
657                         consec = 0;
658                       else if (minimum == ++consec)
659                         /* Back up to start of subrun, to cancel it all. */
660                         j -= consec;
661                       else if (minimum < consec)
662                         discards[i + j] = 0;
663
664                     /* Scan from beginning of run
665                        until we find 3 or more nonprovisionals in a row
666                        or until the first nonprovisional at least 8 lines in.
667                        Until that point, cancel any provisionals. */

668                     for (j = 0, consec = 0; j < length; j++)
669                       {
670                         if (j >= 8 && discards[i + j] == 1)
671                           break;
672                         if (discards[i + j] == 2) {
673                           consec = 0; discards[i + j] = 0;
674                         }
675                         else if (discards[i + j] == 0)
676                           consec = 0;
677                         else
678                           consec++;
679                         if (consec == 3)
680                           break;
681                       }
682
683                     /* I advances to the last line of the run. */
684                     i += length - 1;
685
686                     /* Same thing, from end. */
687                     for (j = 0, consec = 0; j < length; j++)
688                       {
689                         if (j >= 8 && discards[i - j] == 1)
690                           break;
691                         if (discards[i - j] == 2) {
692                           consec = 0; discards[i - j] = 0;
693                         }
694                         else if (discards[i - j] == 0)
695                           consec = 0;
696                         else
697                           consec++;
698                         if (consec == 3)
699                           break;
700                       }
701                   }
702               }
703           }
704       }
705
706     /** Actually discard the lines.
707       @param discards flags lines to be discarded
708      */

709     private void discard(final byte[] discards) {
710       final int end = buffered_lines;
711       int j = 0;
712       for (int i = 0; i < end; ++i)
713         if (no_discards || discards[i] == 0)
714           {
715             undiscarded[j] = equivs[i];
716             realindexes[j++] = i;
717           }
718         else
719           changed_flag[1+i] = true;
720       nondiscarded_lines = j;
721     }
722
723     file_data(Object JavaDoc[] data,Hashtable h) {
724       buffered_lines = data.length;
725
726       equivs = new int[buffered_lines];
727       undiscarded = new int[buffered_lines];
728       realindexes = new int[buffered_lines];
729
730       for (int i = 0; i < data.length; ++i) {
731         Integer JavaDoc ir = (Integer JavaDoc)h.get(data[i]);
732         if (ir == null)
733           h.put(data[i],new Integer JavaDoc(equivs[i] = equiv_max++));
734         else
735           equivs[i] = ir.intValue();
736       }
737     }
738
739     /** Adjust inserts/deletes of blank lines to join changes
740        as much as possible.
741
742        We do something when a run of changed lines include a blank
743        line at one end and have an excluded blank line at the other.
744        We are free to choose which blank line is included.
745        `compareseq' always chooses the one at the beginning,
746        but usually it is cleaner to consider the following blank line
747        to be the "change". The only exception is if the preceding blank line
748        would join this change to other changes.
749       @param f the file being compared against
750     */

751
752     void shift_boundaries(file_data f) {
753       final boolean[] changed = changed_flag;
754       final boolean[] other_changed = f.changed_flag;
755       int i = 0;
756       int j = 0;
757       int i_end = buffered_lines;
758       int preceding = -1;
759       int other_preceding = -1;
760
761       for (;;)
762         {
763           int start, end, other_start;
764
765           /* Scan forwards to find beginning of another run of changes.
766              Also keep track of the corresponding point in the other file. */

767
768           while (i < i_end && !changed[1+i])
769             {
770               while (other_changed[1+j++])
771                 /* Non-corresponding lines in the other file
772                    will count as the preceding batch of changes. */

773                 other_preceding = j;
774               i++;
775             }
776
777           if (i == i_end)
778             break;
779
780           start = i;
781           other_start = j;
782
783           for (;;)
784             {
785               /* Now find the end of this run of changes. */
786
787               while (i < i_end && changed[1+i]) i++;
788               end = i;
789
790               /* If the first changed line matches the following unchanged one,
791                  and this run does not follow right after a previous run,
792                  and there are no lines deleted from the other file here,
793                  then classify the first changed line as unchanged
794                  and the following line as changed in its place. */

795
796               /* You might ask, how could this run follow right after another?
797                  Only because the previous run was shifted here. */

798
799               if (end != i_end
800                   && equivs[start] == equivs[end]
801                   && !other_changed[1+j]
802                   && end != i_end
803                   && !((preceding >= 0 && start == preceding)
804                        || (other_preceding >= 0
805                            && other_start == other_preceding)))
806                 {
807                   changed[1+end++] = true;
808                   changed[1+start++] = false;
809                   ++i;
810                   /* Since one line-that-matches is now before this run
811                      instead of after, we must advance in the other file
812                      to keep in synch. */

813                   ++j;
814                 }
815               else
816                 break;
817             }
818
819           preceding = i;
820           other_preceding = j;
821         }
822     }
823
824     /** Number of elements (lines) in this file. */
825     final int buffered_lines;
826
827     /** Vector, indexed by line number, containing an equivalence code for
828        each line. It is this vector that is actually compared with that
829        of another file to generate differences. */

830     private final int[] equivs;
831
832     /** Vector, like the previous one except that
833        the elements for discarded lines have been squeezed out. */

834     final int[] undiscarded;
835
836     /** Vector mapping virtual line numbers (not counting discarded lines)
837        to real ones (counting those lines). Both are origin-0. */

838     final int[] realindexes;
839
840     /** Total number of nondiscarded lines. */
841     int nondiscarded_lines;
842
843     /** Array, indexed by real origin-1 line number,
844        containing true for a line that is an insertion or a deletion.
845        The results of comparison are stored here. */

846     boolean[] changed_flag;
847
848   }
849     
850 }
851
Popular Tags