1 package com.puppycrawl.tools.checkstyle.checks.duplicates; 20 21 import java.io.File ; 22 import java.io.IOException ; 23 import java.util.Arrays ; 24 25 import com.puppycrawl.tools.checkstyle.api.AbstractFileSetCheck; 26 import com.puppycrawl.tools.checkstyle.api.Utils; 27 import com.puppycrawl.tools.checkstyle.api.MessageDispatcher; 28 import org.apache.commons.logging.Log; 29 import org.apache.commons.logging.LogFactory; 30 31 41 public final class StrictDuplicateCodeCheck extends AbstractFileSetCheck 42 { 43 47 private interface ChecksumGenerator 48 { 49 56 long[] convertLines(String [] aOriginalLines); 57 } 58 59 60 64 private class TextfileChecksumGenerator implements ChecksumGenerator 65 { 66 67 public long[] convertLines(String [] aOriginalLines) 68 { 69 final long[] checkSums = new long[aOriginalLines.length]; 70 for (int i = 0; i < aOriginalLines.length; i++) { 71 final String line = aOriginalLines[i].trim(); 72 checkSums[i] = calcChecksum(line); 73 } 74 return checkSums; 75 } 76 77 82 protected long calcChecksum(String aLine) 83 { 84 final int bigPrime = 317; 87 88 long result = 0; 91 for (int i = 0; i < aLine.length(); i++) { 92 final long c = aLine.charAt(i); 93 result += bigPrime * i + c; 94 } 95 return result; 96 } 97 } 98 99 102 private class JavaChecksumGenerator extends TextfileChecksumGenerator 103 { 104 107 110 111 117 protected long calcChecksum(String aLine) 118 { 119 if (aLine.startsWith("import ")) { 120 return IGNORE; 121 } 122 return super.calcChecksum(aLine); 123 } 124 } 125 126 127 private static final Log LOG = 128 LogFactory.getLog(StrictDuplicateCodeCheck.class); 129 130 131 private static final long IGNORE = Long.MIN_VALUE; 132 133 134 private static final int DEFAULT_MIN_DUPLICATE_LINES = 12; 135 136 137 private int mMin = DEFAULT_MIN_DUPLICATE_LINES; 138 139 140 private String mBasedir; 141 142 143 private long[][] mLineChecksums; 144 145 146 private long[][] mSortedRelevantChecksums; 147 148 149 private File [] mFiles; 150 151 153 154 private int mDuplicates; 155 156 157 private int mLoc; 158 159 160 private long mCacheMisses; 161 162 163 private long mCacheHits; 164 165 166 public StrictDuplicateCodeCheck() 167 { 168 } 169 170 177 public void setMin(int aMin) 178 { 179 mMin = aMin; 180 } 181 182 183 public void setBasedir(String aBasedir) 184 { 185 mBasedir = aBasedir; 186 } 187 188 191 public synchronized void process(File [] aFiles) 192 { 193 final long start = System.currentTimeMillis(); 194 mLoc = 0; 195 mDuplicates = 0; 196 mFiles = filter(aFiles); 197 mLineChecksums = new long[mFiles.length][]; 198 mSortedRelevantChecksums = new long[mFiles.length][]; 199 200 if (LOG.isDebugEnabled()) { 201 LOG.debug("Reading input files"); 202 } 203 204 for (int i = 0; i < mFiles.length; i++) { 205 try { 206 final File file = mFiles[i]; 207 final String [] lines = 208 Utils.getLines(file.getPath(), getCharset()); 209 final ChecksumGenerator transformer = 210 findChecksumGenerator(file); 211 mLineChecksums[i] = transformer.convertLines(lines); 212 } 213 catch (final IOException ex) { 214 LOG.error("Cannot access files to check, giving up: " 215 + ex.getMessage(), ex); 216 mLineChecksums = new long[0][0]; 220 } 221 } 222 fillSortedRelevantChecksums(); 223 224 final long endReading = System.currentTimeMillis(); 225 findDuplicates(); 226 final long endSearching = System.currentTimeMillis(); 227 228 dumpStats(start, endReading, endSearching); 229 230 mLineChecksums = null; 231 mSortedRelevantChecksums = null; 232 } 233 234 240 private ChecksumGenerator findChecksumGenerator(File aFile) 241 { 242 if (aFile.getName().endsWith(".java")) { 243 return new JavaChecksumGenerator(); 244 } 245 return new TextfileChecksumGenerator(); 247 } 248 249 255 private void dumpStats(long aStart, long aEndReading, long aEndSearching) 256 { 257 if (LOG.isDebugEnabled()) { 258 final long cacheLookups = mCacheHits + mCacheMisses; 259 final long initTime = aEndReading - aStart; 260 final long workTime = aEndSearching - aEndReading; 261 LOG.debug("cache hits = " + mCacheHits + "/" + cacheLookups); 262 LOG.debug("files = " + mFiles.length); 263 LOG.debug("loc = " + mLoc); 264 LOG.debug("duplicates = " + mDuplicates); 265 LOG.debug("Runtime = " + initTime + " + " + workTime); 266 } 267 } 268 269 277 private void fillSortedRelevantChecksums() 278 { 279 for (int i = 0; i < mLineChecksums.length; i++) { 280 int count = 0; 281 final long[] checksums = mLineChecksums[i]; 282 final long[] relevant = new long[checksums.length]; 283 for (int j = 0; j < checksums.length; j++) { 284 final long checksum = checksums[j]; 285 if (checksum != IGNORE) { 286 relevant[count++] = checksum; 287 } 288 } 289 Arrays.sort(relevant, 0, count); 290 final long[] result = new long[count]; 291 System.arraycopy(relevant, 0, result, 0, count); 292 mSortedRelevantChecksums[i] = result; 293 } 294 } 295 296 301 private void findDuplicates() 302 { 303 if (LOG.isDebugEnabled()) { 304 LOG.debug("Analysis phase"); 305 } 306 307 310 313 315 for (int i = 0; i < mFiles.length; i++) { 316 317 final String path = mFiles[i].getPath(); 318 319 getMessageCollector().reset(); 320 final MessageDispatcher dispatcher = getMessageDispatcher(); 321 dispatcher.fireFileStarted(path); 322 323 mLoc += mLineChecksums[i].length; 324 for (int j = 0; j <= i; j++) { 325 findDuplicatesInFiles(i, j); 326 } 327 328 fireErrors(path); 329 dispatcher.fireFileFinished(path); 330 } 331 } 332 333 338 private void findDuplicatesInFiles(int aI, int aJ) 339 { 340 final int iFileLength = mLineChecksums[aI].length; 341 342 final boolean[] iLineOccurInJ = new boolean[iFileLength]; 344 for (int iLine = 0; iLine < iFileLength; iLine++) { 345 iLineOccurInJ[iLine] = (Arrays.binarySearch( 346 mSortedRelevantChecksums[aJ], mLineChecksums[aI][iLine]) >= 0); 347 } 348 349 for (int iLine = 0; iLine < iFileLength - mMin; iLine++) { 352 353 boolean fastExit = false; 355 final int kLimit = iFileLength - iLine; 356 for (int k = 0; k < Math.min(mMin, kLimit); k++) { 357 if (!iLineOccurInJ[iLine + k]) { 358 fastExit = true; 359 break; 360 } 361 } 362 363 if (!fastExit) { 364 mCacheMisses += 1; 366 iLine = findDuplicateFromLine(aI, aJ, iLine); 367 } 368 else { 369 mCacheHits += 1; 370 } 371 } 372 } 373 374 383 private int findDuplicateFromLine(int aI, int aJ, int aILine) 384 { 385 388 final int iFileLength = mLineChecksums[aI].length; 389 final int jFileLength = mLineChecksums[aJ].length; 390 391 for (int jLine = 0; jLine < jFileLength - mMin; jLine++) { 392 393 if ((aI == aJ) && (aILine == jLine)) { 394 continue; 395 } 396 397 int equivalent = 0; 398 while ((aILine + equivalent < iFileLength) 399 && (jLine + equivalent < jFileLength) 400 && (mLineChecksums[aI][aILine + equivalent] != IGNORE) 401 && (mLineChecksums[aI][aILine + equivalent] 402 == mLineChecksums[aJ][jLine + equivalent])) 403 { 404 equivalent += 1; 405 } 406 407 if (((aI != aJ) || (aILine < jLine)) && (equivalent >= mMin)) { 408 reportDuplicate(equivalent, aILine, mFiles[aJ], jLine); 409 aILine += equivalent; } 411 } 412 return aILine; 413 } 414 415 423 private void reportDuplicate( 424 int aEquivalent, int aILine, File aJFile, int aJLine) 425 { 426 final Integer dupLines = new Integer (aEquivalent); 427 final Integer startLine = new Integer (aJLine + 1); 428 final String fileName = 429 Utils.getStrippedFileName(mBasedir, aJFile.getPath()); 430 log(aILine + 1, "duplicates.lines", 431 new Object []{dupLines, fileName, startLine}); 432 mDuplicates += 1; 433 } 434 435 } 436 | Popular Tags |