Diff


1   /* $Log: Diff.java,v $
2    * Revision 1.3  2000/03/03  21:58:03  stuart
3    * move discard_confusing_lines and shift_boundaries to class file_data
4    *
5    * Revision 1.2  2000/03/02  16:37:38  stuart
6    * Add GPL and copyright
7    *
8    */
9   package net.killingar;
10  
11  import java.util.HashMap  ;
12  
13  /** A class to compare vectors of objects.  The result of comparison
14      is a list of <code>change</code> objects which form an
15      edit script.  The objects compared are traditionally lines
16      of text from two files.  Comparison options such as "ignore
17      whitespace" are implemented by modifying the <code>equals</code>
18      and <code>hashcode</code> methods for the objects compared.
19  <p>
20     The basic algorithm is described in: </br>
21     "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
22     Algorithmica Vol. 1 No. 2, 1986, p 251.
23  <p>
24     This class outputs different results from GNU diff 1.15 on some
25     inputs.  Our results are actually better (smaller change list, smaller
26     total size of changes), but it would be nice to know why.  Perhaps
27     there is a memory overwrite bug in GNU diff 1.15.
28  
29    @author Stuart D. Gathman, translated from GNU diff 1.15
30      Copyright (C) 2000  Business Management Systems, Inc.
31  <p>
32      This program is free software; you can redistribute it and/or modify
33      it under the terms of the GNU General Public License as published by
34      the Free Software Foundation; either version 1, or (at your option)
35      any later version.
36  <p>
37      This program is distributed in the hope that it will be useful,
38      but WITHOUT ANY WARRANTY; without even the implied warranty of
39      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
40      GNU General Public License for more details.
41  <p>
42      You should have received a copy of the <a HREF=COPYING.txt>
43      GNU General Public License</a>
44      along with this program; if not, write to the Free Software
45      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
46  
47   */
48  
49  public class Diff {
50  
51    /** Prepare to find differences between two arrays.  Each element of
52        the arrays is translated to an "equivalence number" based on
53        the result of <code>equals</code>.  The original Object arrays
54        are no longer needed for computing the differences.  They will
55        be needed again later to print the results of the comparison as
56        an edit script, if desired.
57     */
58    public Diff(Object  [] a,Object  [] b) {
59      HashMap   h = new HashMap  (a.length + b.length);
60      filevec[0] = new file_data(a,h);
61      filevec[1] = new file_data(b,h);
62    }
63  
64    /** 1 more than the maximum equivalence value used for this or its
65       sibling file. */
66    private int equiv_max = 1;
67  
68    /** When set to true, the comparison uses a heuristic to speed it up.
69      With this heuristic, for files with a constant small density
70      of changes, the algorithm is linear in the file size.  */
71    public boolean heuristic = false;
72  
73    /** When set to true, the algorithm returns a guarranteed minimal
74        set of changes.  This makes things slower, sometimes much slower. */
75    public boolean no_discards = false;
76  
77    private int[] xvec, yvec; /* Vectors being compared. */
78    private int[] fdiag;      /* Vector, indexed by diagonal, containing
79                     the X coordinate of the point furthest
80                     along the given diagonal in the forward
81                     search of the edit matrix. */
82    private int[] bdiag;      /* Vector, indexed by diagonal, containing
83                     the X coordinate of the point furthest
84                     along the given diagonal in the backward
85                     search of the edit matrix. */
86    private int fdiagoff, bdiagoff;
87    private final file_data[] filevec = new file_data[2];
88    private int cost;
89  
90    /** Find the midpoint of the shortest edit script for a specified
91       portion of the two files.
92  
93       We scan from the beginnings of the files, and simultaneously from the ends,
94       doing a breadth-first search through the space of edit-sequence.
95       When the two searches meet, we have found the midpoint of the shortest
96       edit sequence.
97  
98       The value returned is the number of the diagonal on which the midpoint lies.
99       The diagonal number equals the number of inserted lines minus the number
100      of deleted lines (counting only lines before the midpoint).
101      The edit cost is stored into COST; this is the total number of
102      lines inserted or deleted (counting only lines before the midpoint).
103 
104      This function assumes that the first lines of the specified portions
105      of the two files do not match, and likewise that the last lines do not
106      match.  The caller must trim matching lines from the beginning and end
107      of the portions it is going to specify.
108 
109      Note that if we return the "wrong" diagonal value, or if
110      the value of bdiag at that diagonal is "wrong",
111      the worst this can do is cause suboptimal diff output.
112      It cannot cause incorrect diff output.  */
113 
114   private int diag (int xoff, int xlim, int yoff, int ylim) {
115     final int[] fd = fdiag; // Give the compiler a chance.
116     final int[] bd = bdiag; // Additional help for the compiler.
117     final int[] xv = xvec;      // Still more help for the compiler.
118     final int[] yv = yvec;      // And more and more . . .
119     final int dmin = xoff - ylim;   // Minimum valid diagonal.
120     final int dmax = xlim - yoff;   // Maximum valid diagonal.
121     final int fmid = xoff - yoff;   // Center diagonal of top-down search.
122     final int bmid = xlim - ylim;   // Center diagonal of bottom-up search.
123     int fmin = fmid, fmax = fmid;   // Limits of top-down search.
124     int bmin = bmid, bmax = bmid;   // Limits of bottom-up search.
125     /* True if southeast corner is on an odd
126                      diagonal with respect to the northwest. */
127     final boolean odd = (fmid - bmid & 1) != 0;
128 
129     fd[fdiagoff + fmid] = xoff;
130     bd[bdiagoff + bmid] = xlim;
131 
132     for (int c = 1;; ++c)
133       {
134     int d;          /* Active diagonal. */
135     boolean big_snake = false;
136 
137     /* Extend the top-down search by an edit step in each diagonal. */
138     if (fmin > dmin)
139       fd[fdiagoff + --fmin - 1] = -1;
140     else
141       ++fmin;
142     if (fmax < dmax)
143       fd[fdiagoff + ++fmax + 1] = -1;
144     else
145       --fmax;
146     for (d = fmax; d >= fmin; d -= 2)
147       {
148         int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
149 
150         if (tlo >= thi)
151           x = tlo + 1;
152         else
153           x = thi;
154         oldx = x;
155         y = x - d;
156         while (x < xlim && y < ylim && xv[x] == yv[y]) {
157           ++x; ++y;
158         }
159         if (x - oldx > 20)
160           big_snake = true;
161         fd[fdiagoff + d] = x;
162         if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
163           {
164         cost = 2 * c - 1;
165         return d;
166           }
167       }
168 
169     /* Similar extend the bottom-up search. */
170     if (bmin > dmin)
171       bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
172     else
173       ++bmin;
174     if (bmax < dmax)
175       bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
176     else
177       --bmax;
178     for (d = bmax; d >= bmin; d -= 2)
179       {
180         int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
181 
182         if (tlo < thi)
183           x = tlo;
184         else
185           x = thi - 1;
186         oldx = x;
187         y = x - d;
188         while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
189           --x; --y;
190         }
191         if (oldx - x > 20)
192           big_snake = true;
193         bd[bdiagoff + d] = x;
194         if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
195           {
196         cost = 2 * c;
197         return d;
198           }
199       }
200 
201     /* Heuristic: check occasionally for a diagonal that has made
202        lots of progress compared with the edit distance.
203        If we have any such, find the one that has made the most
204        progress and return it as if it had succeeded.
205 
206        With this heuristic, for files with a constant small density
207        of changes, the algorithm is linear in the file size.  */
208 
209     if (c > 200 && big_snake && heuristic)
210       {
211         int best = 0;
212         int bestpos = -1;
213 
214         for (d = fmax; d >= fmin; d -= 2)
215           {
216         int dd = d - fmid;
217         if ((fd[fdiagoff + d] - xoff)*2 - dd > 12 * (c + (dd > 0 ? dd : -dd)))
218           {
219             if (fd[fdiagoff + d] * 2 - dd > best
220             && fd[fdiagoff + d] - xoff > 20
221             && fd[fdiagoff + d] - d - yoff > 20)
222               {
223             int k;
224             int x = fd[fdiagoff + d];
225 
226             /* We have a good enough best diagonal;
227                now insist that it end with a significant snake.  */
228             for (k = 1; k <= 20; k++)
229               if (xvec[x - k] != yvec[x - d - k])
230                 break;
231 
232             if (k == 21)
233               {
234                 best = fd[fdiagoff + d] * 2 - dd;
235                 bestpos = d;
236               }
237               }
238           }
239           }
240         if (best > 0)
241           {
242         cost = 2 * c - 1;
243         return bestpos;
244           }
245 
246         best = 0;
247         for (d = bmax; d >= bmin; d -= 2)
248           {
249         int dd = d - bmid;
250         if ((xlim - bd[bdiagoff + d])*2 + dd > 12 * (c + (dd > 0 ? dd : -dd)))
251           {
252             if ((xlim - bd[bdiagoff + d]) * 2 + dd > best
253             && xlim - bd[bdiagoff + d] > 20
254             && ylim - (bd[bdiagoff + d] - d) > 20)
255               {
256             /* We have a good enough best diagonal;
257                now insist that it end with a significant snake.  */
258             int k;
259             int x = bd[bdiagoff + d];
260 
261             for (k = 0; k < 20; k++)
262               if (xvec[x + k] != yvec[x - d + k])
263                 break;
264             if (k == 20)
265               {
266                 best = (xlim - bd[bdiagoff + d]) * 2 + dd;
267                 bestpos = d;
268               }
269               }
270           }
271           }
272         if (best > 0)
273           {
274         cost = 2 * c - 1;
275         return bestpos;
276           }
277       }
278       }
279   }
280 
281   /** Compare in detail contiguous subsequences of the two files
282      which are known, as a whole, to match each other.
283 
284      The results are recorded in the vectors filevec[N].changed_flag, by
285      storing a 1 in the element for each line that is an insertion or deletion.
286 
287      The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
288 
289      Note that XLIM, YLIM are exclusive bounds.
290      All line numbers are origin-0 and discarded lines are not counted.  */
291 
292   private void compareseq (int xoff, int xlim, int yoff, int ylim) {
293     /* Slide down the bottom initial diagonal. */
294     while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
295       ++xoff; ++yoff;
296     }
297     /* Slide up the top initial diagonal. */
298     while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
299       --xlim; --ylim;
300     }
301 
302     /* Handle simple cases. */
303     if (xoff == xlim)
304       while (yoff < ylim)
305     filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
306     else if (yoff == ylim)
307       while (xoff < xlim)
308     filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
309     else
310       {
311     /* Find a point of correspondence in the middle of the files.  */
312 
313     int d = diag (xoff, xlim, yoff, ylim);
314     int c = cost;
315     int f = fdiag[fdiagoff + d];
316     int b = bdiag[bdiagoff + d];
317 
318     if (c == 1)
319       {
320         /* This should be impossible, because it implies that
321            one of the two subsequences is empty,
322            and that case was handled above without calling `diag'.
323            Let's verify that this is true.  */
324         throw new IllegalArgumentException  ("Empty subsequence");
325       }
326     else
327       {
328         /* Use that point to split this problem into two subproblems.  */
329         compareseq (xoff, b, yoff, b - d);
330         /* This used to use f instead of b,
331            but that is incorrect!
332            It is not necessarily the case that diagonal d
333            has a snake from b to f.  */
334         compareseq (b, xlim, b - d, ylim);
335       }
336       }
337   }
338 
339   /** Discard lines from one file that have no matches in the other file.
340    */
341 
342   private void discard_confusing_lines() {
343     filevec[0].discard_confusing_lines(filevec[1]);
344     filevec[1].discard_confusing_lines(filevec[0]);
345   }
346 
347   private boolean inhibit = false;
348 
349   /** Adjust inserts/deletes of blank lines to join changes
350      as much as possible.
351    */
352 
353   private void shift_boundaries() {
354     if (inhibit)
355       return;
356     filevec[0].shift_boundaries(filevec[1]);
357     filevec[1].shift_boundaries(filevec[0]);
358   }
359 
360   /** Scan the tables of which lines are inserted and deleted,
361      producing an edit script in reverse order.  */
362 
363   private change build_reverse_script() {
364     change script = null;
365     final boolean[] changed0 = filevec[0].changed_flag;
366     final boolean[] changed1 = filevec[1].changed_flag;
367     final int len0 = filevec[0].buffered_lines;
368     final int len1 = filevec[1].buffered_lines;
369 
370     /* Note that changedN[len0] does exist, and contains 0.  */
371 
372     int i0 = 0, i1 = 0;
373 
374     while (i0 < len0 || i1 < len1)
375       {
376     if (changed0[1+i0] || changed1[1+i1])
377       {
378         int line0 = i0, line1 = i1;
379 
380         /* Find # lines changed here in each file.  */
381         while (changed0[1+i0]) ++i0;
382         while (changed1[1+i1]) ++i1;
383 
384         /* Record this change.  */
385         script = new change(line0, line1, i0 - line0, i1 - line1, script);
386       }
387 
388     /* We have reached lines in the two files that match each other.  */
389     i0++; i1++;
390       }
391 
392     return script;
393   }
394 
395   /** Scan the tables of which lines are inserted and deleted,
396      producing an edit script in forward order.  */
397 
398   private change build_script() {
399     change script = null;
400     final boolean[] changed0 = filevec[0].changed_flag;
401     final boolean[] changed1 = filevec[1].changed_flag;
402     final int len0 = filevec[0].buffered_lines;
403     final int len1 = filevec[1].buffered_lines;
404     int i0 = len0, i1 = len1;
405 
406     /* Note that changedN[-1] does exist, and contains 0.  */
407 
408     while (i0 >= 0 || i1 >= 0)
409       {
410     if (changed0[i0] || changed1[i1])
411       {
412         int line0 = i0, line1 = i1;
413 
414         /* Find # lines changed here in each file.  */
415         while (changed0[i0]) --i0;
416         while (changed1[i1]) --i1;
417 
418         /* Record this change.  */
419         script = new change(i0, i1, line0 - i0, line1 - i1, script);
420       }
421 
422     /* We have reached lines in the two files that match each other.  */
423     i0--; i1--;
424       }
425 
426     return script;
427   }
428 
429   /* Report the differences of two files.  DEPTH is the current directory
430      depth. */
431   public change diff_2(final boolean reverse) {
432 
433     /* Some lines are obviously insertions or deletions
434        because they don't match anything.  Detect them now,
435        and avoid even thinking about them in the main comparison algorithm.  */
436 
437     discard_confusing_lines ();
438 
439     /* Now do the main comparison algorithm, considering just the
440        undiscarded lines.  */
441 
442     xvec = filevec[0].undiscarded;
443     yvec = filevec[1].undiscarded;
444 
445     int diags =
446       filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
447     fdiag = new int[diags];
448     fdiagoff = filevec[1].nondiscarded_lines + 1;
449     bdiag = new int[diags];
450     bdiagoff = filevec[1].nondiscarded_lines + 1;
451 
452     compareseq (0, filevec[0].nondiscarded_lines,
453         0, filevec[1].nondiscarded_lines);
454     fdiag = null;
455     bdiag = null;
456 
457     /* Modify the results slightly to make them prettier
458        in cases where that can validly be done.  */
459 
460     shift_boundaries ();
461 
462     /* Get the results of comparison in the form of a chain
463        of `struct change's -- an edit script.  */
464 
465     if (reverse)
466       return build_reverse_script();
467     else
468       return build_script();
469   }
470 
471   /** The result of comparison is an "edit script": a chain of change objects.
472      Each change represents one place where some lines are deleted
473      and some are inserted.
474 
475      LINE0 and LINE1 are the first affected lines in the two files (origin 0).
476      DELETED is the number of lines deleted here from file 0.
477      INSERTED is the number of lines inserted here in file 1.
478 
479      If DELETED is 0 then LINE0 is the number of the line before
480      which the insertion was done; vice versa for INSERTED and LINE1.  */
481 
482   public static class change {
483     /** Previous or next edit command. */
484     public change link;
485     /** # lines of file 1 changed here.  */
486     public final int inserted;
487     /** # lines of file 0 changed here.  */
488     public final int deleted;
489     /** Line number of 1st deleted line.  */
490     public final int line0;
491     /** Line number of 1st inserted line.  */
492     public final int line1;
493 
494     /** Cons an additional entry onto the front of an edit script OLD.
495        LINE0 and LINE1 are the first affected lines in the two files (origin 0).
496        DELETED is the number of lines deleted here from file 0.
497        INSERTED is the number of lines inserted here in file 1.
498 
499        If DELETED is 0 then LINE0 is the number of the line before
500        which the insertion was done; vice versa for INSERTED and LINE1.  */
501     change(int line0, int line1, int deleted, int inserted, change old) {
502       this.line0 = line0;
503       this.line1 = line1;
504       this.inserted = inserted;
505       this.deleted = deleted;
506       this.link = old;
507       //System.err.println(line0+","+line1+","+inserted+","+deleted);
508     }
509   }
510 
511   /** Data on one input file being compared.
512    */
513 
514   class file_data {
515 
516     /** Allocate changed array for the results of comparison.  */
517     void clear() {
518       /* Allocate a flag for each line of each file, saying whether that line
519      is an insertion or deletion.
520      Allocate an extra element, always zero, at each end of each vector.
521        */
522       changed_flag = new boolean[buffered_lines + 2];
523     }
524 
525     /** Return equiv_count[I] as the number of lines in this file
526        that fall in equivalence class I.
527          @return the array of equivalence class counts.
528      */
529     int[] equivCount() {
530       int[] equiv_count = new int[equiv_max];
531       for (int i = 0; i < buffered_lines; ++i)
532     ++equiv_count[equivs[i]];
533       return equiv_count;
534     }
535 
536     /** Discard lines that have no matches in another file.
537 
538        A line which is discarded will not be considered by the actual
539        comparison algorithm; it will be as if that line were not in the file.
540        The file's `realindexes' table maps virtual line numbers
541        (which don't count the discarded lines) into real line numbers;
542        this is how the actual comparison algorithm produces results
543        that are comprehensible when the discarded lines are counted.
544 <p>
545        When we discard a line, we also mark it as a deletion or insertion
546        so that it will be printed in the output.
547       @param f the other file
548      */
549     void discard_confusing_lines(file_data f) {
550       clear();
551     /* Set up table of which lines are going to be discarded. */
552       final byte[] discarded = discardable(f.equivCount());
553 
554     /* Don't really discard the provisional lines except when they occur
555        in a run of discardables, with nonprovisionals at the beginning
556        and end.  */
557       filterDiscards(discarded);
558 
559     /* Actually discard the lines. */
560       discard(discarded);
561     }
562 
563     /** Mark to be discarded each line that matches no line of another file.
564        If a line matches many lines, mark it as provisionally discardable.
565        @see equivCount()
566        @param counts The count of each equivalence number for the other file.
567        @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
568         for each line
569      */
570 
571     private byte[] discardable(final int[] counts) {
572       final int end = buffered_lines;
573       final byte[] discards = new byte[end];
574       final int[] equivs = this.equivs;
575       int many = 5;
576       int tem = end / 64;
577 
578       /* Multiply MANY by approximate square root of number of lines.
579      That is the threshold for provisionally discardable lines.  */
580       while ((tem = tem >> 2) > 0)
581     many *= 2;
582 
583       for (int i = 0; i < end; i++)
584     {
585       int nmatch;
586       if (equivs[i] == 0)
587         continue;
588       nmatch = counts[equivs[i]];
589       if (nmatch == 0)
590         discards[i] = 1;
591       else if (nmatch > many)
592         discards[i] = 2;
593     }
594       return discards;
595     }
596 
597     /** Don't really discard the provisional lines except when they occur
598        in a run of discardables, with nonprovisionals at the beginning
599        and end.  */
600 
601     private void filterDiscards(final byte[] discards) {
602     final int end = buffered_lines;
603 
604     for (int i = 0; i < end; i++)
605       {
606         /* Cancel provisional discards not in middle of run of discards.  */
607         if (discards[i] == 2)
608           discards[i] = 0;
609         else if (discards[i] != 0)
610           {
611         /* We have found a nonprovisional discard.  */
612         int j;
613         int length;
614         int provisional = 0;
615 
616         /* Find end of this run of discardable lines.
617            Count how many are provisionally discardable.  */
618         for (j = i; j < end; j++)
619           {
620             if (discards[j] == 0)
621               break;
622             if (discards[j] == 2)
623               ++provisional;
624           }
625 
626         /* Cancel provisional discards at end, and shrink the run.  */
627         while (j > i && discards[j - 1] == 2) {
628           discards[--j] = 0; --provisional;
629         }
630 
631         /* Now we have the length of a run of discardable lines
632            whose first and last are not provisional.  */
633         length = j - i;
634 
635         /* If 1/4 of the lines in the run are provisional,
636            cancel discarding of all provisional lines in the run.  */
637         if (provisional * 4 > length)
638           {
639             while (j > i)
640               if (discards[--j] == 2)
641             discards[j] = 0;
642           }
643         else
644           {
645             int consec;
646             int minimum = 1;
647             int tem = length / 4;
648 
649             /* MINIMUM is approximate square root of LENGTH/4.
650                A subrun of two or more provisionals can stand
651                when LENGTH is at least 16.
652                A subrun of 4 or more can stand when LENGTH >= 64.  */
653             while ((tem = tem >> 2) > 0)
654               minimum *= 2;
655             minimum++;
656 
657             /* Cancel any subrun of MINIMUM or more provisionals
658                within the larger run.  */
659             for (j = 0, consec = 0; j < length; j++)
660               if (discards[i + j] != 2)
661             consec = 0;
662               else if (minimum == ++consec)
663             /* Back up to start of subrun, to cancel it all.  */
664             j -= consec;
665               else if (minimum < consec)
666             discards[i + j] = 0;
667 
668             /* Scan from beginning of run
669                until we find 3 or more nonprovisionals in a row
670                or until the first nonprovisional at least 8 lines in.
671                Until that point, cancel any provisionals.  */
672             for (j = 0, consec = 0; j < length; j++)
673               {
674             if (j >= 8 && discards[i + j] == 1)
675               break;
676             if (discards[i + j] == 2) {
677               consec = 0; discards[i + j] = 0;
678             }
679             else if (discards[i + j] == 0)
680               consec = 0;
681             else
682               consec++;
683             if (consec == 3)
684               break;
685               }
686 
687             /* I advances to the last line of the run.  */
688             i += length - 1;
689 
690             /* Same thing, from end.  */
691             for (j = 0, consec = 0; j < length; j++)
692               {
693             if (j >= 8 && discards[i - j] == 1)
694               break;
695             if (discards[i - j] == 2) {
696               consec = 0; discards[i - j] = 0;
697             }
698             else if (discards[i - j] == 0)
699               consec = 0;
700             else
701               consec++;
702             if (consec == 3)
703               break;
704               }
705           }
706           }
707       }
708       }
709 
710     /** Actually discard the lines.
711       @param discards flags lines to be discarded
712      */
713     private void discard(final byte[] discards) {
714       final int end = buffered_lines;
715       int j = 0;
716       for (int i = 0; i < end; ++i)
717     if (no_discards || discards[i] == 0)
718       {
719         undiscarded[j] = equivs[i];
720         realindexes[j++] = i;
721       }
722     else
723       changed_flag[1+i] = true;
724       nondiscarded_lines = j;
725     }
726 
727     file_data(Object  [] data,HashMap   h) {
728       buffered_lines = data.length;
729 
730       equivs = new int[buffered_lines];
731       undiscarded = new int[buffered_lines];
732       realindexes = new int[buffered_lines];
733 
734       for (int i = 0; i < data.length; ++i) {
735         Integer   ir = (Integer  )h.get(data[i]);
736     if (ir == null)
737       h.put(data[i],new Integer  (equivs[i] = equiv_max++));
738     else
739       equivs[i] = ir.intValue();
740       }
741     }
742 
743     /** Adjust inserts/deletes of blank lines to join changes
744        as much as possible.
745 
746        We do something when a run of changed lines include a blank
747        line at one end and have an excluded blank line at the other.
748        We are free to choose which blank line is included.
749        `compareseq' always chooses the one at the beginning,
750        but usually it is cleaner to consider the following blank line
751        to be the "change".  The only exception is if the preceding blank line
752        would join this change to other changes.
753       @param f the file being compared against
754     */
755 
756     void shift_boundaries(file_data f) {
757       final boolean[] changed = changed_flag;
758       final boolean[] other_changed = f.changed_flag;
759       int i = 0;
760       int j = 0;
761       int i_end = buffered_lines;
762       int preceding = -1;
763       int other_preceding = -1;
764 
765       for (;;)
766     {
767       int start, end, other_start;
768 
769       /* Scan forwards to find beginning of another run of changes.
770          Also keep track of the corresponding point in the other file.  */
771 
772       while (i < i_end && !changed[1+i])
773         {
774           while (other_changed[1+j++])
775         /* Non-corresponding lines in the other file
776            will count as the preceding batch of changes.  */
777         other_preceding = j;
778           i++;
779         }
780 
781       if (i == i_end)
782         break;
783 
784       start = i;
785       other_start = j;
786 
787       for (;;)
788         {
789           /* Now find the end of this run of changes.  */
790 
791           while (i < i_end && changed[1+i]) i++;
792           end = i;
793 
794           /* If the first changed line matches the following unchanged one,
795          and this run does not follow right after a previous run,
796          and there are no lines deleted from the other file here,
797          then classify the first changed line as unchanged
798          and the following line as changed in its place.  */
799 
800           /* You might ask, how could this run follow right after another?
801          Only because the previous run was shifted here.  */
802 
803           if (end != i_end
804           && equivs[start] == equivs[end]
805           && !other_changed[1+j]
806           && end != i_end
807           && !((preceding >= 0 && start == preceding)
808                || (other_preceding >= 0
809                && other_start == other_preceding)))
810         {
811           changed[1+end++] = true;
812           changed[1+start++] = false;
813           ++i;
814           /* Since one line-that-matches is now before this run
815              instead of after, we must advance in the other file
816              to keep in synch.  */
817           ++j;
818         }
819           else
820         break;
821         }
822 
823       preceding = i;
824       other_preceding = j;
825     }
826     }
827 
828     /** Number of elements (lines) in this file. */
829     final int buffered_lines;
830 
831     /** Vector, indexed by line number, containing an equivalence code for
832        each line.  It is this vector that is actually compared with that
833        of another file to generate differences. */
834     private final int[]     equivs;
835 
836     /** Vector, like the previous one except that
837        the elements for discarded lines have been squeezed out.  */
838     final int[]    undiscarded;
839 
840     /** Vector mapping virtual line numbers (not counting discarded lines)
841        to real ones (counting those lines).  Both are origin-0.  */
842     final int[]    realindexes;
843 
844     /** Total number of nondiscarded lines. */
845     int         nondiscarded_lines;
846 
847     /** Array, indexed by real origin-1 line number,
848        containing true for a line that is an insertion or a deletion.
849        The results of comparison are stored here.  */
850     boolean[]       changed_flag;
851 
852   }
853 }
854
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags