KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > sqlprofiler > StringDiff


1 /*
2  * Copyright (C) Jahia Ltd. All rights reserved.
3  *
4  * This software is published under the terms of the Jahia Open Software
5  * License version 1.1, a copy of which has been included with this
6  * distribution in the LICENSE.txt file.
7  */

8 package org.jahia.sqlprofiler;
9
10 import java.util.ArrayList JavaDoc;
11
12 /**
13  * <p>Title: StringDiff</p>
14  * <p>Description: This class implements a string comparison algorithm that
15  * allows to know the similirity sequences, etc. The main method is the diff
16  * method which performs the difference algorithm and sets all the other
17  * variables.</p>
18  * <p>Copyright: Copyright (c) 2003</p>
19  * <p>Company: Jahia Ltd</p>
20  * @author Serge Huber
21  * @version 1.0
22  */

23
24 public class StringDiff {
25
26     private java.util.ArrayList JavaDoc leftSequences;
27     private java.util.ArrayList JavaDoc rightSequences;
28     private String JavaDoc left;
29     private String JavaDoc right;
30     private int sequenceCount;
31     private int sameCharCount;
32
33     public StringDiff() {
34     }
35
36     /**
37      * <p>Title: Sequence position</p>
38      * <p>Description: This class is used to store sequence positions in a
39      * string. A sequence position is compose of two coordinates, a starting
40      * and an ending position for a sequence that is present in both strings.
41      * These coordinates can then be used to build visual differentiators for
42      * the strings, or whatever one can imagine.</p>
43      * <p>Copyright: Copyright (c) 2003</p>
44      * <p>Company: </p>
45      * @author Serge Huber
46      * @version 1.0
47      */

48     public class SequencePos {
49         private int startPos;
50         private int endPos;
51         public SequencePos(int startPos, int endPos) {
52             this.startPos = startPos;
53             this.endPos = endPos;
54         }
55
56         public int getStartPos() {
57             return this.startPos;
58         }
59
60         public int getEndPos() {
61             return this.endPos;
62         }
63     }
64
65     /**
66      * This is the main method of this class, that actually performs the
67      * matching between the two strings. The result of this operation is stored
68      * in class fields such as leftSequences and rightSequences, sameCharCount,
69      * sequenceCount, etc... These results stay available until the next call
70      * to the diff method is done.
71      * @param left the first string to compare
72      * @param right the second string to compare the first one with
73      */

74     public void diff(String JavaDoc left, String JavaDoc right) {
75         int curLeftPos = 0;
76         int curRightPos = 0;
77         this.sameCharCount = 0;
78         this.sequenceCount = 0;
79
80         this.leftSequences = new ArrayList JavaDoc();
81         this.rightSequences = new ArrayList JavaDoc();
82
83         this.left = left;
84         this.right = right;
85
86         // first let's handle all the special cases...
87

88         if (left == null) {
89             return;
90         }
91         if (right == null) {
92             return;
93         }
94
95         if (left.length() < 2) {
96             if (right.startsWith(left)) {
97                 SequencePos leftSequencePos = new SequencePos(0, left.length());
98                 leftSequences.add(leftSequencePos);
99                 rightSequences.add(leftSequencePos);
100                 return;
101             }
102         }
103         if (right.length() < 2) {
104             if (left.startsWith(right)) {
105                 SequencePos rightSequencePos = new SequencePos(0, right.length());
106                 leftSequences.add(rightSequencePos);
107                 rightSequences.add(rightSequencePos);
108                 return;
109             }
110         }
111
112         // special cases have been handled, now to the general algorithm...
113

114         // first we follow all the similar sequences if they are similar for
115
// more than one character.
116
if (
117             (
118             ( (curLeftPos + 1) < left.length()) &&
119             ( (curRightPos + 1) < right.length()) &&
120             (left.charAt(curLeftPos + 1) == (right.charAt(curRightPos + 1)))
121             ) ||
122             (
123             ( (curLeftPos + 1) == left.length()) &&
124             ( (curRightPos + 1) == right.length())
125             )
126             ) {
127
128             int leftSequenceStartPos = curLeftPos;
129             int rightSequenceStartPos = curRightPos;
130
131             while ( (curLeftPos < left.length()) &&
132                    (curRightPos < right.length()) &&
133                    (left.charAt(curLeftPos) == right.charAt(curRightPos))) {
134                 System.out.print(left.charAt(curLeftPos));
135                 sameCharCount++;
136                 curLeftPos++;
137                 curRightPos++;
138             }
139             if (curLeftPos != 0) {
140                 sequenceCount++;
141                 SequencePos newLeftSequencePos = new SequencePos(
142                     leftSequenceStartPos, curLeftPos);
143                 SequencePos newRightSequencePos = new SequencePos(
144                     rightSequenceStartPos, curRightPos);
145                 leftSequences.add(newLeftSequencePos);
146                 rightSequences.add(newRightSequencePos);
147             }
148
149         }
150
151         while (curLeftPos < left.length()) {
152             // we now have differences in the two strings, lets try to find
153
// another simitude join point.
154
int tempRightPos = curRightPos;
155             while ( (tempRightPos < right.length() &&
156                      (left.charAt(curLeftPos) != right.charAt(tempRightPos)))) {
157                 tempRightPos++;
158             }
159             // if we arrived here it means we either found a new similitude or
160
// we looked through the whole right string.
161
if (tempRightPos == right.length()) {
162                 // we arrived at the end of the right string without finding
163
// anything, let's move one character in the left string and
164
// continue searching...
165
curLeftPos++;
166             } else {
167                 // we found similitudes again, let's eat them up only if we
168
// have more than one similar character.
169
if (
170                     (
171                     ( (curLeftPos + 1) < left.length()) &&
172                     ( (tempRightPos + 1) < right.length()) &&
173                     (left.charAt(curLeftPos + 1) ==
174                      (right.charAt(tempRightPos + 1)))
175                     ) ||
176                     (
177                     ( (curLeftPos + 1) == left.length()) &&
178                     ( (tempRightPos + 1) == right.length())
179                     )
180                     ) {
181                     System.out.print("?");
182                     curRightPos = tempRightPos;
183                     int leftSequenceStartPos = curLeftPos;
184                     int rightSequenceStartPos = curRightPos;
185                     while ( (curLeftPos < left.length()) &&
186                            (curRightPos < right.length()) &&
187                            (left.charAt(curLeftPos) == right.charAt(curRightPos))) {
188                         System.out.print(left.charAt(curLeftPos));
189                         sameCharCount++;
190                         curLeftPos++;
191                         curRightPos++;
192                     }
193                     SequencePos newLeftSequencePos = new SequencePos(
194                         leftSequenceStartPos, curLeftPos);
195                     SequencePos newRightSequencePos = new SequencePos(
196                         rightSequenceStartPos, curRightPos);
197                     leftSequences.add(newLeftSequencePos);
198                     rightSequences.add(newRightSequencePos);
199                     sequenceCount++;
200                 } else {
201                     curLeftPos++;
202                 }
203             }
204         }
205
206         // ok we finished looking through the left string, but the right
207
// string might be longer, let's make sure we add the characters to the
208
// diff list.
209

210     }
211
212     /**
213      * Returns the array of sequence positions of similar chars for the left
214      * string
215      * @return an ArrayList of SequencePos objects that contain the position of
216      * the sequences that were found in both strings.
217      */

218     public java.util.ArrayList JavaDoc getLeftSequences() {
219         return leftSequences;
220     }
221
222     /**
223      * Returns the array of sequence positions of similar chars for the right
224      * string
225      * @return an ArrayList of SequencePos objects that contain the position of
226      * the sequences that were found in both strings.
227      */

228     public java.util.ArrayList JavaDoc getRightSequences() {
229         return rightSequences;
230     }
231
232     /**
233      * @return the last used left string
234      */

235     public String JavaDoc getLeft() {
236         return left;
237     }
238
239     /**
240      * @return the last used right string
241      */

242     public String JavaDoc getRight() {
243         return right;
244     }
245
246     /**
247      * @return the number of sequences that are present in both strings
248      */

249     public int getSequenceCount() {
250         return sequenceCount;
251     }
252
253     /**
254      * @return the full count of characters that are in all the sequences that
255      * are the same between both strings
256      */

257     public int getSameCharCount() {
258         return sameCharCount;
259     }
260
261     private static void displayResults(StringDiff stringDiff) {
262         System.out.println("\nsameCharCount=" + stringDiff.getSameCharCount() +
263                            " sequenceCount=" +
264                            stringDiff.getSequenceCount());
265         for (int i = 0; i < stringDiff.getLeftSequences().size(); i++) {
266             SequencePos leftSequencePos = (SequencePos) stringDiff.
267                 getLeftSequences().get(i);
268             SequencePos rightSequencePos = (SequencePos) stringDiff.
269                 getRightSequences().get(i);
270             System.out.println("sequence " + i + " : left=[" +
271                                leftSequencePos.getStartPos() + "," +
272                                leftSequencePos.getEndPos() +
273                                "]=[" +
274                                stringDiff.getLeft().substring(leftSequencePos.
275                 getStartPos(),
276                 leftSequencePos.getEndPos()) +
277                                "] right=[" + rightSequencePos.getStartPos() +
278                                "," +
279                                rightSequencePos.getEndPos() + "]=[" +
280                                stringDiff.getRight().substring(rightSequencePos.
281                 getStartPos(),
282                 rightSequencePos.getEndPos()) +
283                                "]");
284         }
285
286     }
287
288     /**
289      * Runs a few tests cases to make sure the diff algorithm is correct.
290      * @param args
291      */

292     public static void main(String JavaDoc[] args) {
293         StringDiff stringDiff = new StringDiff();
294
295         // test cases.
296
stringDiff.diff("select * from jahia_fields_data where iasdfd_jahia_fields_data=10 order by id_jahia_fields_datba",
297                         "select * from jahia_fkjahdflields_data where id_jahiadsfa_fields_data=23 order by id_jahia_fields_data");
298         displayResults(stringDiff);
299         stringDiff.diff(
300             "select * from jahia_fields_data where id_jahia_fields_data=10 order by",
301             "select * from jahia_fields_data where id_jahia_fields_data=10 order by id_jahia_fields_data");
302         displayResults(stringDiff);
303         stringDiff.diff("a",
304                         "a");
305         displayResults(stringDiff);
306         stringDiff.diff("",
307                         "");
308         displayResults(stringDiff);
309         stringDiff.diff("",
310                         "a");
311         displayResults(stringDiff);
312         stringDiff.diff("a",
313                         "");
314         displayResults(stringDiff);
315         stringDiff.diff("aa",
316                         "a");
317         displayResults(stringDiff);
318         stringDiff.diff("a",
319                         "aa");
320         displayResults(stringDiff);
321         stringDiff.diff("aa",
322                         "aa");
323         displayResults(stringDiff);
324         stringDiff.diff(null,
325                         null);
326         displayResults(stringDiff);
327         stringDiff.diff("a",
328                         null);
329         displayResults(stringDiff);
330         stringDiff.diff(null,
331                         "a");
332         displayResults(stringDiff);
333
334     }
335
336 }
Popular Tags