BlockComparator


1   /*
2    * Copyright 2004 Outerthought bvba and Schaubroeck nv
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.outerj.daisy.diff;
17  
18  import org.eclipse.compare.rangedifferencer.IRangeComparator;
19  
20  import java.util.ArrayList  ;
21  import java.util.List  ;
22  
23  /**
24   * A Comparator for diffing corresponding changes resulting from a line-based diff.
25   * For example, if a line-based diff says "these lines were replaced by those lines",
26   * then this comparator can be used to compare the content in those lines.
27   *
28   * <p>This comparator has also a little bit of special treatment for HTML/XML tags,
29   * ie it tries to treat tags as single entities to be compared.
30   */
31  public class BlockComparator implements IRangeComparator {
32      private List   tokens;
33  
34      /**
35       * @param text should contain the lines of text concatenated with a "\n" in between
36       *        them.
37       */
38      public BlockComparator(StringBuffer   text) {
39          this.tokens = splitLineTokens(text);
40      }
41  
42      public int getRangeCount() {
43          return tokens.size();
44      }
45  
46      public boolean rangesEqual(int thisIndex, IRangeComparator other, int otherIndex) {
47          String   thisToken = getToken(thisIndex);
48          String   otherToken = ((BlockComparator)other).getToken(otherIndex);
49  
50          // treating newlines and spaces the same gives a good effect
51          if ((thisToken.equals(" ") && otherToken.equals("\n")) || (thisToken.equals("\n") && otherToken.equals(" ")))
52              return true;
53  
54          return thisToken.equals(otherToken);
55      }
56  
57      public boolean skipRangeComparison(int length, int maxLength, IRangeComparator other) {
58          return false;
59      }
60  
61      private String   getToken(int i) {
62          if (i < tokens.size())
63              return (String  )tokens.get(i);
64          return "";
65      }
66  
67      public String   substring(int startToken) {
68          return substring(startToken, tokens.size());
69      }
70  
71      public String   substring(int startToken, int endToken) {
72          if (startToken == endToken) {
73              return (String  )tokens.get(startToken);
74          } else {
75              StringBuffer   result = new StringBuffer  ();
76              for (int i = startToken; i < endToken; i++) {
77                  result.append((String  )tokens.get(i));
78              }
79              return result.toString();
80          }
81      }
82  
83      public String  [] substringSplitted(int startToken) {
84          return substringSplitted(startToken, tokens.size());
85      }
86  
87      /**
88       * Returns the substring as an array of strings, each array entry
89       * corresponding to one line. The newlines themselves are also
90       * entries in the array.
91       */
92      public String  [] substringSplitted(int startToken, int endToken) {
93          if (startToken == endToken) {
94              return new String  [] { (String  )tokens.get(startToken) };
95          } else {
96              int resultPos = -1;
97              String  [] result = null;
98              StringBuffer   resultBuffer = new StringBuffer  ();
99              for (int i = startToken; i < endToken; i++) {
100                 String   token = (String  )tokens.get(i);
101                 if (token.equals("\n")) {
102                     if (resultBuffer.length() > 0) {
103                         result = grow(result, 2);
104                         result[++resultPos] = resultBuffer.toString();
105                         result[++resultPos] = "\n";
106                         resultBuffer.setLength(0);
107                     } else {
108                         result = grow(result, 1);
109                         result[++resultPos] = "\n";
110                     }
111                 } else {
112                     resultBuffer.append(token);
113                 }
114             }
115             if (resultBuffer.length() > 0) {
116                 result = grow(result, 1);
117                 result[++resultPos] = resultBuffer.toString();
118             } else if (result == null) {
119                 result = new String  [0];
120             }
121             return result;
122         }
123     }
124 
125     private String  [] grow(String  [] strings, int count) {
126         if (strings == null) {
127             return new String  [count];
128         } else {
129             String  [] result = new String  [strings.length + count];
130             System.arraycopy(strings, 0, result, 0, strings.length);
131             return result;
132         }
133     }
134 
135     private ArrayList   splitLineTokens(StringBuffer   text) {
136         ArrayList   tokens = new ArrayList  (100);
137         StringBuffer   currentWord = new StringBuffer  (100);
138 
139         for (int i = 0; i < text.length(); i++) {
140             char c = text.charAt(i);
141             switch (c) {
142                 case '<': // begin of a HTML/XML tag: let it stick to the next word
143                     if (currentWord.length() > 0) {
144                         tokens.add(currentWord.toString());
145                         currentWord.setLength(0);
146                     }
147                     currentWord.append(c);
148                     break;
149                 case '/':
150                     // special handling for (possible) closing HTML/XML tag
151                     if (currentWord.length() == 1 && currentWord.charAt(0) == '<') {
152                         currentWord.append(c);
153                         break;
154                     }
155                     // else: no break so that code below gets executed
156                 case '>':
157                     if (currentWord.length() > 2 && currentWord.charAt(0) == '<' && currentWord.charAt(1) == '/') {
158                         currentWord.append(c);
159                         break;
160                     }
161                 case '.':
162                 case '!':
163                 case ',':
164                 case ';':
165                 case '?':
166                 case ' ':
167                 case '=':
168                 case '\'':
169                 case '"':
170                 case '\t':
171                 case '\r':
172                 case '\n':
173                     if (currentWord.length() > 0) {
174                         tokens.add(currentWord.toString());
175                         currentWord.setLength(0);
176                     }
177                     tokens.add(String.valueOf(c));
178                     break;
179                 default:
180                     currentWord.append(c);
181             }
182         }
183 
184         if (currentWord.length() > 0) {
185             tokens.add(currentWord.toString());
186         }
187         return tokens;
188     }
189 }
190
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags