1 4 package net.sourceforge.pmd.cpd; 5 6 import java.util.ArrayList ; 7 import java.util.Collections ; 8 import java.util.HashMap ; 9 import java.util.HashSet ; 10 import java.util.Iterator ; 11 import java.util.List ; 12 import java.util.Map ; 13 import java.util.Set ; 14 15 public class MatchCollector { 16 17 private MatchAlgorithm ma; 18 private Map startMap = new HashMap (); 19 private Map fileMap = new HashMap (); 20 21 public MatchCollector(MatchAlgorithm ma) { 22 this.ma = ma; 23 } 24 25 public void collect(List marks) { 26 for (int i = 0; i < marks.size() - 1; i++) { 28 TokenEntry mark1 = (TokenEntry) marks.get(i); 29 for (int j = i + 1; j < marks.size(); j++) { 30 TokenEntry mark2 = (TokenEntry) marks.get(j); 31 int diff = mark1.getIndex() - mark2.getIndex(); 32 if (-diff < ma.getMinimumTileSize()) { 33 continue; 34 } 35 if (hasPreviousDupe(mark1, mark2)) { 36 continue; 37 } 38 39 int dupes = countDuplicateTokens(mark1, mark2); 41 if (dupes < ma.getMinimumTileSize()) { 42 continue; 43 } 44 if (diff + dupes >= 1) { 46 continue; 47 } 48 determineMatch(mark1, mark2, dupes); 49 } 50 } 51 } 52 53 public List getMatches() { 54 List matchList = new ArrayList (startMap.values()); 55 Collections.sort(matchList); 56 Set matchSet = new HashSet (); 57 Match.MatchCode matchCode = new Match.MatchCode(); 58 for (int i = matchList.size(); i > 1; i--) { 59 Match match1 = (Match) matchList.get(i - 1); 60 TokenEntry mark1 = (TokenEntry) match1.getMarkSet().iterator().next(); 61 matchSet.clear(); 62 matchSet.add(match1.getMatchCode()); 63 for (int j = i - 1; j > 0; j--) { 64 Match match2 = (Match) matchList.get(j - 1); 65 if (match1.getTokenCount() != match2.getTokenCount()) { 66 break; 67 } 68 TokenEntry mark2 = null; 69 for (Iterator iter = match2.getMarkSet().iterator(); iter.hasNext();) { 70 mark2 = (TokenEntry) iter.next(); 71 if (mark2 != mark1) { 72 break; 73 } 74 } 75 int dupes = countDuplicateTokens(mark1, mark2); 76 if (dupes < match1.getTokenCount()) { 77 break; 78 } 79 matchSet.add(match2.getMatchCode()); 80 match1.getMarkSet().addAll(match2.getMarkSet()); 81 matchList.remove(i - 2); 82 i--; 83 } 84 if (matchSet.size() == 1) { 85 continue; 86 } 87 Set pruned = match1.getMarkSet(); 89 boolean done = false; 90 ArrayList a1 = new ArrayList (match1.getMarkSet()); 91 Collections.sort(a1); 92 for (int outer = 0; outer < a1.size() - 1 && !done; outer++) { 93 TokenEntry cmark1 = (TokenEntry) a1.get(outer); 94 for (int inner = outer + 1; inner < a1.size() && !done; inner++) { 95 TokenEntry cmark2 = (TokenEntry) a1.get(inner); 96 matchCode.setFirst(cmark1.getIndex()); 97 matchCode.setSecond(cmark2.getIndex()); 98 if (!matchSet.contains(matchCode)) { 99 if (pruned.size() > 2) { 100 pruned.remove(cmark2); 101 } 102 if (pruned.size() == 2) { 103 done = true; 104 } 105 } 106 } 107 } 108 } 109 return matchList; 110 } 111 112 115 private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) { 116 Match match = new Match(dupes, mark1, mark2); 117 String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID(); 118 List pairMatches = (ArrayList ) fileMap.get(fileKey); 119 if (pairMatches == null) { 120 pairMatches = new ArrayList (); 121 fileMap.put(fileKey, pairMatches); 122 } 123 boolean add = true; 124 for (int i = 0; i < pairMatches.size(); i++) { 125 Match other = (Match) pairMatches.get(i); 126 if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex() 127 > 0) { 128 boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0; 129 if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0)) 130 || (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) { 131 if (other.getTokenCount() >= match.getTokenCount()) { 132 add = false; 133 break; 134 } else { 135 pairMatches.remove(i); 136 startMap.remove(other.getMatchCode()); 137 } 138 } 139 } 140 } 141 if (add) { 142 pairMatches.add(match); 143 startMap.put(match.getMatchCode(), match); 144 } 145 } 146 147 private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) { 148 if (mark1.getIndex() == 0) { 149 return false; 150 } 151 return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2)); 152 } 153 154 private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) { 155 int index = 0; 156 while (!matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) { 157 index++; 158 } 159 return index; 160 } 161 162 private boolean matchEnded(TokenEntry token1, TokenEntry token2) { 163 return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF; 164 } 165 } | Popular Tags |