KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > snipsnap > versioning > cookbook > CookbookDiff


1 /*
2  * This file is part of "SnipSnap Wiki/Weblog".
3  *
4  * Adapted from <a HREF="http://javacook.darwinsys.com/">Ian Darwin's Java Cookbook</a>
5  * See Diff.java for more information
6  *
7  * Result was printed, now the result is a list of ChangeInfo objects.
8  * Input was from two files, now diff takes two Strings.
9  * Changed to Java coding style.
10  *
11  * diff Text file difference utility.
12  * ---- Copyright 1987, 1989 by Donald C. Lindsay,
13  * School of Computer Science, Carnegie Mellon University.
14  * Copyright 1982 by Symbionics.
15  * Use without fee is permitted when not for direct commercial
16  * advantage, and when credit to the source is given. Other uses
17  * require specific permission.
18  *
19  * Adaption Copyright (c) 2002 Stephan J. Schmidt, Matthias L. Jugel
20  * All Rights Reserved.
21  *
22  * Please visit http://snipsnap.org/ for updates and contact.
23  *
24  * --LICENSE NOTICE--
25  * This program is free software; you can redistribute it and/or
26  * modify it under the terms of the GNU General Public License
27  * as published by the Free Software Foundation; either version 2
28  * of the License, or (at your option) any later version.
29  *
30  * This program is distributed in the hope that it will be useful,
31  * but WITHOUT ANY WARRANTY; without even the implied warranty of
32  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33  * GNU General Public License for more details.
34  *
35  * You should have received a copy of the GNU General Public License
36  * along with this program; if not, write to the Free Software
37  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
38  * --LICENSE NOTICE--
39  */

40
41 package org.snipsnap.versioning.cookbook;
42
43 import org.snipsnap.versioning.ChangeInfo;
44
45 import java.util.ArrayList JavaDoc;
46 import java.util.List JavaDoc;
47 import java.util.StringTokenizer JavaDoc;
48
49 /**
50  * Returns differences between two text strings
51  *
52  * @author Stephan J. Schmidt
53  * @version $Id: CookbookDiff.java 1257 2003-12-11 13:36:55Z leo $
54  *
55  * @author Ian F. Darwin, Java version
56  * @author D. C. Lindsay, C version (1982-1987)
57  */

58
59 public class CookbookDiff {
60   private ChangeInfo last;
61   private List JavaDoc lines = new ArrayList JavaDoc();
62
63   /** block len > any possible real block len */
64   private final int UNREAL = Integer.MAX_VALUE;
65
66   /** Keeps track of information about file1 and file2 */
67   private SourceInfo oldInfo, newInfo;
68
69   /** blocklen is the info about found blocks. It will be set to 0, except
70    * at the line#s where blocks start in the old file. At these places it
71    * will be set to the # of lines in the block. During printout ,
72    * this # will be reset to -1 if the block is printed as a MOVE block
73    * (because the printout phase will encounter the block twice, but
74    * must only print it once.)
75    * The array declarations are to MAXLINECOUNT+2 so that we can have two
76    * extra lines (pseudolines) at line# 0 and line# MAXLINECOUNT+1
77    * (or less).
78    */

79   private int blocklen[];
80
81   /** Do one string comparison. Called with both strings. */
82   public List JavaDoc diff(String JavaDoc oldText, String JavaDoc newText) {
83     oldInfo = new SourceInfo();
84     newInfo = new SourceInfo();
85     /* we don't process until we know both files really do exist. */
86
87     inputScan(oldText, oldInfo);
88     inputScan(newText, newInfo);
89
90     /* Now that we've read all the lines, allocate some arrays.
91      */

92     oldInfo.alloc();
93     newInfo.alloc();
94
95     blocklen = new int[(oldInfo.maxLine > newInfo.maxLine ?
96       oldInfo.maxLine : newInfo.maxLine) + 2];
97
98     /* Now do the work, and print the results. */
99     transform();
100     return printOut();
101   }
102
103   /**
104    * inputscan Reads the file specified by pinfo.file.
105    * --------- Places the lines of that file in the symbol table.
106    * Sets pinfo.maxLine to the number of lines found.
107    */

108   void inputScan(String JavaDoc input, SourceInfo pinfo) {
109     pinfo.maxLine = 0;
110     StringTokenizer JavaDoc tokenizer = new StringTokenizer JavaDoc(input, "\n\r");
111     while (tokenizer.hasMoreTokens()) {
112       String JavaDoc line = tokenizer.nextToken();
113       storeLine(line, pinfo);
114     }
115   }
116
117   /**
118    * storeline Places line into symbol table.
119    * --------- Expects pinfo.maxLine initted: increments.
120    * Places symbol table handle in pinfo.ymbol.
121    * Expects pinfo is either oldinfo or newinfo.
122    */

123   void storeLine(String JavaDoc linebuffer, SourceInfo pinfo) {
124     int linenum = ++pinfo.maxLine; /* note, no line zero */
125     if (linenum > SourceInfo.MAXLINECOUNT) {
126       System.err.println("MAXLINECOUNT exceeded, must stop.");
127     }
128     pinfo.symbol[linenum] =
129       Node.addSymbol(linebuffer, pinfo == oldInfo, linenum);
130   }
131
132   /*
133    * transform
134    * Analyzes the file differences and leaves its findings in
135    * the global arrays oldinfo.other, newinfo.other, and blocklen.
136    * Expects both files in symtab.
137    * Expects valid "maxLine" and "symbol" in oldinfo and newinfo.
138    */

139   void transform() {
140
141     int oldline, newline;
142     int oldmax = oldInfo.maxLine + 2; /* Count pseudolines at */
143     int newmax = newInfo.maxLine + 2; /* ..front and rear of file */
144
145     for (oldline = 0; oldline < oldmax; oldline++)
146       oldInfo.other[oldline] = -1;
147     for (newline = 0; newline < newmax; newline++)
148       newInfo.other[newline] = -1;
149
150     scanUnique(); /* scan for lines used once in both files */
151     scanAfter(); /* scan past sure-matches for non-unique blocks */
152     scanBefore(); /* scan backwards from sure-matches */
153     scanBlocks(); /* find the fronts and lengths of blocks */
154   }
155
156   /*
157    * scanunique
158    * Scans for lines which are used exactly once in each file.
159    * Expects both files in symtab, and oldinfo and newinfo valid.
160    * The appropriate "other" array entries are set to the line# in
161    * the other file.
162    * Claims pseudo-lines at 0 and XXXinfo.maxLine+1 are unique.
163    */

164   void scanUnique() {
165     int oldline, newline;
166     Node psymbol;
167
168     for (newline = 1; newline <= newInfo.maxLine; newline++) {
169       psymbol = newInfo.symbol[newline];
170       if (psymbol.symbolIsUnique()) { // 1 use in each file
171
oldline = psymbol.linenum;
172         newInfo.other[newline] = oldline; // record 1-1 map
173
oldInfo.other[oldline] = newline;
174       }
175     }
176     newInfo.other[0] = 0;
177     oldInfo.other[0] = 0;
178     newInfo.other[newInfo.maxLine + 1] = oldInfo.maxLine + 1;
179     oldInfo.other[oldInfo.maxLine + 1] = newInfo.maxLine + 1;
180   }
181
182   /*
183    * scanafter
184    * Expects both files in symtab, and oldinfo and newinfo valid.
185    * Expects the "other" arrays contain positive #s to indicate
186    * lines that are unique in both files.
187    * For each such pair of places, scans past in each file.
188    * Contiguous groups of lines that match non-uniquely are
189    * taken to be good-enough matches, and so marked in "other".
190    * Assumes each other[0] is 0.
191    */

192   void scanAfter() {
193     int oldline, newline;
194
195     for (newline = 0; newline <= newInfo.maxLine; newline++) {
196       oldline = newInfo.other[newline];
197       if (oldline >= 0) { /* is unique in old & new */
198         for (; ;) { /* scan after there in both files */
199           if (++oldline > oldInfo.maxLine) break;
200           if (oldInfo.other[oldline] >= 0) break;
201           if (++newline > newInfo.maxLine) break;
202           if (newInfo.other[newline] >= 0) break;
203
204           /* oldline & newline exist, and
205           aren't already matched */

206
207           if (newInfo.symbol[newline] !=
208             oldInfo.symbol[oldline])
209             break; // not same
210

211           newInfo.other[newline] = oldline; // record a match
212
oldInfo.other[oldline] = newline;
213         }
214       }
215     }
216   }
217
218   /**
219    * scanbefore
220    * As scanafter, except scans towards file fronts.
221    * Assumes the off-end lines have been marked as a match.
222    */

223   void scanBefore() {
224     int oldline, newline;
225
226     for (newline = newInfo.maxLine + 1; newline > 0; newline--) {
227       oldline = newInfo.other[newline];
228       if (oldline >= 0) { /* unique in each */
229         for (; ;) {
230           if (--oldline <= 0) break;
231           if (oldInfo.other[oldline] >= 0) break;
232           if (--newline <= 0) break;
233           if (newInfo.other[newline] >= 0) break;
234
235           /* oldline and newline exist,
236           and aren't marked yet */

237
238           if (newInfo.symbol[newline] !=
239             oldInfo.symbol[oldline])
240             break; // not same
241

242           newInfo.other[newline] = oldline; // record a match
243
oldInfo.other[oldline] = newline;
244         }
245       }
246     }
247   }
248
249   /**
250    * scanblocks - Finds the beginnings and lengths of blocks of matches.
251    * Sets the blocklen array (see definition).
252    * Expects oldinfo valid.
253    */

254   void scanBlocks() {
255     int oldline, newline;
256     int oldfront = 0; // line# of front of a block in old, or 0
257
int newlast = -1; // newline's value during prev. iteration
258

259     for (oldline = 1; oldline <= oldInfo.maxLine; oldline++)
260       blocklen[oldline] = 0;
261     blocklen[oldInfo.maxLine + 1] = UNREAL; // starts a mythical blk
262

263     for (oldline = 1; oldline <= oldInfo.maxLine; oldline++) {
264       newline = oldInfo.other[oldline];
265       if (newline < 0)
266         oldfront = 0; /* no match: not in block */
267       else { /* match. */
268         if (oldfront == 0) oldfront = oldline;
269         if (newline != (newlast + 1)) oldfront = oldline;
270         ++blocklen[oldfront];
271       }
272       newlast = newline;
273     }
274   }
275
276   /* The following are global to printout's subsidiary routines */
277   // enum{ idle, delete, insert, movenew, moveold,
278
// same, change } printstatus;
279
public static final int
280     idle = 0, delete = 1, insert = 2, movenew = 3, moveold = 4,
281   same = 5, change = 6;
282   int printstatus;
283   boolean anyprinted;
284   int printoldline, printnewline; // line numbers in old & new file
285

286   /**
287    * printout - Prints summary to stdout.
288    * Expects all data structures have been filled out.
289    */

290   private List JavaDoc printOut() {
291     List JavaDoc result = new ArrayList JavaDoc();
292
293     printstatus = idle;
294     anyprinted = false;
295     for (printoldline = printnewline = 1; ;) {
296       if (printoldline > oldInfo.maxLine) {
297         newConsume(result);
298         break;
299       }
300       if (printnewline > newInfo.maxLine) {
301         oldConsume(result);
302         break;
303       }
304       if (newInfo.other[printnewline] < 0) {
305         if (oldInfo.other[printoldline] < 0)
306           showChange(result);
307         else
308           showInsert(result);
309       } else if (oldInfo.other[printoldline] < 0)
310         showDelete(result);
311       else if (blocklen[printoldline] < 0)
312         skipOld();
313       else if (oldInfo.other[printoldline] == printnewline)
314         showSame(result);
315       else
316         showMove(result);
317     }
318     setLast(result);
319     return result;
320   }
321
322
323   // Stores an info object
324
// and adds all 'printed' lines to the
325
// last one
326
private void setLast(ChangeInfo info, List JavaDoc result) {
327     setLast(result);
328     last = info;
329   }
330
331   private void setLast(List JavaDoc result) {
332     if (null != last) {
333       last.setLines((String JavaDoc[]) lines.toArray(new String JavaDoc[0]));
334       result.add(last);
335     }
336     lines = new ArrayList JavaDoc();
337   }
338
339   /*
340    * newconsume Part of printout. Have run out of old file.
341    * Print the rest of the new file, as inserts and/or moves.
342    */

343   private void newConsume(List JavaDoc result) {
344     for (; ;) {
345       if (printnewline > newInfo.maxLine)
346         break; /* end of file */
347       if (newInfo.other[printnewline] < 0)
348         showInsert(result);
349       else
350         showMove(result);
351     }
352   }
353
354   /**
355    * oldconsume Part of printout. Have run out of new file.
356    * Process the rest of the old file, printing any
357    * parts which were deletes or moves.
358    */

359   private void oldConsume(List JavaDoc result) {
360     for (; ;) {
361       if (printoldline > oldInfo.maxLine)
362         break; /* end of file */
363       printnewline = oldInfo.other[printoldline];
364       if (printnewline < 0)
365         showDelete(result);
366       else if (blocklen[printoldline] < 0)
367         skipOld();
368       else
369         showMove(result);
370     }
371   }
372
373   /**
374    * showdelete Part of printout.
375    * Expects printoldline is at a deletion.
376    */

377   private void showDelete(List JavaDoc result) {
378     if (printstatus != delete) {
379       ChangeInfo info = new ChangeInfo(ChangeInfo.DELETE, printoldline, printoldline);
380       setLast(info, result);
381     }
382     printstatus = delete;
383     lines.add(oldInfo.symbol[printoldline].getSymbol());
384     anyprinted = true;
385     printoldline++;
386   }
387
388   /*
389    * showinsert Part of printout.
390    * Expects printnewline is at an insertion.
391    */

392   private void showInsert(List JavaDoc result) {
393     if (printstatus == change) {
394       // result.add(">>>> CHANGED TO");
395
} else if (printstatus != insert) {
396       ChangeInfo info = new ChangeInfo(ChangeInfo.INSERT, printoldline, printoldline);
397       setLast(info, result);
398     }
399     printstatus = insert;
400     lines.add(newInfo.symbol[printnewline].getSymbol());
401     anyprinted = true;
402     printnewline++;
403   }
404
405   /**
406    * showchange Part of printout.
407    * Expects printnewline is an insertion.
408    * Expects printoldline is a deletion.
409    */

410   private void showChange(List JavaDoc result) {
411     if (printstatus != change) {
412       ChangeInfo info = new ChangeInfo(ChangeInfo.CHANGE, printoldline, printoldline);
413       setLast(info, result);
414     }
415     printstatus = change;
416     lines.add(oldInfo.symbol[printoldline].getSymbol());
417     anyprinted = true;
418     printoldline++;
419   }
420
421   /**
422    * skipold Part of printout.
423    * Expects printoldline at start of an old block that has
424    * already been announced as a move.
425    * Skips over the old block.
426    */

427   private void skipOld() {
428     printstatus = idle;
429     for (; ;) {
430       if (++printoldline > oldInfo.maxLine)
431         break; /* end of file */
432       if (oldInfo.other[printoldline] < 0)
433         break; /* end of block */
434       if (blocklen[printoldline] != 0)
435         break; /* start of another */
436     }
437   }
438
439   /**
440    * skipnew Part of printout.
441    * Expects printnewline is at start of a new block that has
442    * already been announced as a move.
443    * Skips over the new block.
444    */

445   private void skipNew() {
446     int oldline;
447     printstatus = idle;
448     for (; ;) {
449       if (++printnewline > newInfo.maxLine)
450         break; /* end of file */
451       oldline = newInfo.other[printnewline];
452       if (oldline < 0)
453         break; /* end of block */
454       if (blocklen[oldline] != 0)
455         break; /* start of another */
456     }
457   }
458
459   /**
460    * showsame Part of printout.
461    * Expects printnewline and printoldline at start of
462    * two blocks that aren't to be displayed.
463    */

464   private void showSame(List JavaDoc result) {
465     int count;
466     printstatus = idle;
467     if (newInfo.other[printnewline] != printoldline) {
468       System.err.println("BUG IN LINE REFERENCING");
469     }
470     count = blocklen[printoldline];
471     printoldline += count;
472     printnewline += count;
473   }
474
475   /**
476    * showmove Part of printout.
477    * Expects printoldline, printnewline at start of
478    * two different blocks ( a move was done).
479    */

480   private void showMove(List JavaDoc result) {
481     int oldblock = blocklen[printoldline];
482     int newother = newInfo.other[printnewline];
483     int newblock = blocklen[newother];
484
485     if (newblock < 0)
486       skipNew(); // already printed.
487
else if (oldblock >= newblock) { // assume new's blk moved.
488
blocklen[newother] = -1; // stamp block as "printed".
489
ChangeInfo info = new ChangeInfo(ChangeInfo.MOVE, newother, printoldline);
490       setLast(info, result);
491       for (; newblock > 0; newblock--, printnewline++)
492         lines.add(newInfo.symbol[printnewline].getSymbol());
493       anyprinted = true;
494       printstatus = idle;
495
496     } else /* assume old's block moved */
497       skipOld(); /* target line# not known, display later */
498   }
499 }
500
501 ;
502
503 /**
504  * Class "node". The symbol table routines in this class all
505  * understand the symbol table format, which is a binary tree.
506  * The methods are: addSymbol, symbolIsUnique, showSymbol.
507  */

508 class Node { /* the tree is made up of these nodes */
509   Node pleft, pright;
510   int linenum;
511
512   static final int freshnode = 0,
513   oldonce = 1, newonce = 2, bothonce = 3, other = 4;
514
515   int /* enum linestates */ linestate;
516   String JavaDoc line;
517
518   static Node panchor = null; /* symtab is a tree hung from this */
519
520   Node(String JavaDoc pline) {
521     pleft = pright = null;
522     linestate = freshnode;
523     /* linenum field is not always valid */
524     line = pline;
525   }
526
527   /**
528    * matchsymbol Searches tree for a match to the line.
529    * If node's linestate == freshnode, then created the node.
530    */

531   static Node matchsymbol(String JavaDoc pline) {
532     int comparison;
533     Node pnode = panchor;
534     if (panchor == null) return panchor = new Node(pline);
535     for (; ;) {
536       comparison = pnode.line.compareTo(pline);
537       if (comparison == 0) return pnode; /* found */
538
539       if (comparison < 0) {
540         if (pnode.pleft == null) {
541           pnode.pleft = new Node(pline);
542           return pnode.pleft;
543         }
544         pnode = pnode.pleft;
545       }
546       if (comparison > 0) {
547         if (pnode.pright == null) {
548           pnode.pright = new Node(pline);
549           return pnode.pright;
550         }
551         pnode = pnode.pright;
552       }
553     }
554     /* NOTE: There are return stmts, so control does not get here. */
555   }
556
557   /**
558    * addSymbol(String pline) - Saves line into the symbol table.
559    * Returns a handle to the symtab entry for that unique line.
560    * If inoldfile nonzero, then linenum is remembered.
561    */

562   static Node addSymbol(String JavaDoc pline, boolean inoldfile, int linenum) {
563     Node pnode;
564     pnode = matchsymbol(pline); /* find the node in the tree */
565     if (pnode.linestate == freshnode) {
566       pnode.linestate = inoldfile ? oldonce : newonce;
567     } else {
568       if ((pnode.linestate == oldonce && !inoldfile) ||
569         (pnode.linestate == newonce && inoldfile))
570         pnode.linestate = bothonce;
571       else
572         pnode.linestate = other;
573     }
574     if (inoldfile) pnode.linenum = linenum;
575     return pnode;
576   }
577
578   /**
579    * symbolIsUnique Arg is a ptr previously returned by addSymbol.
580    * -------------- Returns true if the line was added to the
581    * symbol table exactly once with inoldfile true,
582    * and exactly once with inoldfile false.
583    */

584   public boolean symbolIsUnique() {
585     return (linestate == bothonce);
586   }
587
588   /**
589    * showSymbol Prints the line to stdout.
590    */

591   public String JavaDoc getSymbol() {
592     return line;
593   }
594 }
595
596 /** This is the info kept per-source. */
597 class SourceInfo {
598   static final int MAXLINECOUNT = 20000;
599
600   public int maxLine; /* After input done, # lines in file. */
601   Node symbol[]; /* The symtab handle of each line. */
602   int other[]; /* Map of line# to line# in other file */
603   /* ( -1 means don't-know ). */
604   /* Allocated AFTER the lines are read. */
605
606   /**
607    * Normal constructor
608    */

609   SourceInfo() {
610     symbol = new Node[MAXLINECOUNT + 2];
611     other = null; // allocated later!
612
}
613
614   // This is done late, to be same size as # lines in input file.
615
void alloc() {
616     other = new int[symbol.length + 2];
617   }
618 }
619
620 ;
Popular Tags