KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sourceforge > pmd > cpd > MatchCollector


1 /**
2  * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3  */

4 package net.sourceforge.pmd.cpd;
5
6 import java.util.ArrayList JavaDoc;
7 import java.util.Collections JavaDoc;
8 import java.util.HashMap JavaDoc;
9 import java.util.HashSet JavaDoc;
10 import java.util.Iterator JavaDoc;
11 import java.util.List JavaDoc;
12 import java.util.Map JavaDoc;
13 import java.util.Set JavaDoc;
14
15 public class MatchCollector {
16
17     private MatchAlgorithm ma;
18     private Map JavaDoc startMap = new HashMap JavaDoc();
19     private Map JavaDoc fileMap = new HashMap JavaDoc();
20
21     public MatchCollector(MatchAlgorithm ma) {
22         this.ma = ma;
23     }
24
25     public void collect(List JavaDoc marks) {
26         //first get a pairwise collection of all maximal matches
27
for (int i = 0; i < marks.size() - 1; i++) {
28             TokenEntry mark1 = (TokenEntry) marks.get(i);
29             for (int j = i + 1; j < marks.size(); j++) {
30                 TokenEntry mark2 = (TokenEntry) marks.get(j);
31                 int diff = mark1.getIndex() - mark2.getIndex();
32                 if (-diff < ma.getMinimumTileSize()) {
33                     continue;
34                 }
35                 if (hasPreviousDupe(mark1, mark2)) {
36                     continue;
37                 }
38
39                 // "match too small" check
40
int dupes = countDuplicateTokens(mark1, mark2);
41                 if (dupes < ma.getMinimumTileSize()) {
42                     continue;
43                 }
44                 // is it still too close together
45
if (diff + dupes >= 1) {
46                     continue;
47                 }
48                 determineMatch(mark1, mark2, dupes);
49             }
50         }
51     }
52
53     public List JavaDoc getMatches() {
54         List JavaDoc matchList = new ArrayList JavaDoc(startMap.values());
55         Collections.sort(matchList);
56         Set JavaDoc matchSet = new HashSet JavaDoc();
57         Match.MatchCode matchCode = new Match.MatchCode();
58         for (int i = matchList.size(); i > 1; i--) {
59             Match match1 = (Match) matchList.get(i - 1);
60             TokenEntry mark1 = (TokenEntry) match1.getMarkSet().iterator().next();
61             matchSet.clear();
62             matchSet.add(match1.getMatchCode());
63             for (int j = i - 1; j > 0; j--) {
64                 Match match2 = (Match) matchList.get(j - 1);
65                 if (match1.getTokenCount() != match2.getTokenCount()) {
66                     break;
67                 }
68                 TokenEntry mark2 = null;
69                 for (Iterator JavaDoc iter = match2.getMarkSet().iterator(); iter.hasNext();) {
70                     mark2 = (TokenEntry) iter.next();
71                     if (mark2 != mark1) {
72                         break;
73                     }
74                 }
75                 int dupes = countDuplicateTokens(mark1, mark2);
76                 if (dupes < match1.getTokenCount()) {
77                     break;
78                 }
79                 matchSet.add(match2.getMatchCode());
80                 match1.getMarkSet().addAll(match2.getMarkSet());
81                 matchList.remove(i - 2);
82                 i--;
83             }
84             if (matchSet.size() == 1) {
85                 continue;
86             }
87             //prune the mark set
88
Set JavaDoc pruned = match1.getMarkSet();
89             boolean done = false;
90             ArrayList JavaDoc a1 = new ArrayList JavaDoc(match1.getMarkSet());
91             Collections.sort(a1);
92             for (int outer = 0; outer < a1.size() - 1 && !done; outer++) {
93                 TokenEntry cmark1 = (TokenEntry) a1.get(outer);
94                 for (int inner = outer + 1; inner < a1.size() && !done; inner++) {
95                     TokenEntry cmark2 = (TokenEntry) a1.get(inner);
96                     matchCode.setFirst(cmark1.getIndex());
97                     matchCode.setSecond(cmark2.getIndex());
98                     if (!matchSet.contains(matchCode)) {
99                         if (pruned.size() > 2) {
100                             pruned.remove(cmark2);
101                         }
102                         if (pruned.size() == 2) {
103                             done = true;
104                         }
105                     }
106                 }
107             }
108         }
109         return matchList;
110     }
111
112     /**
113      * A greedy algorithm for determining non-overlapping matches
114      */

115     private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
116         Match match = new Match(dupes, mark1, mark2);
117         String JavaDoc fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
118         List JavaDoc pairMatches = (ArrayList JavaDoc) fileMap.get(fileKey);
119         if (pairMatches == null) {
120             pairMatches = new ArrayList JavaDoc();
121             fileMap.put(fileKey, pairMatches);
122         }
123         boolean add = true;
124         for (int i = 0; i < pairMatches.size(); i++) {
125             Match other = (Match) pairMatches.get(i);
126             if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
127                     > 0) {
128                 boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
129                 if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
130                         || (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
131                     if (other.getTokenCount() >= match.getTokenCount()) {
132                         add = false;
133                         break;
134                     } else {
135                         pairMatches.remove(i);
136                         startMap.remove(other.getMatchCode());
137                     }
138                 }
139             }
140         }
141         if (add) {
142             pairMatches.add(match);
143             startMap.put(match.getMatchCode(), match);
144         }
145     }
146
147     private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
148         if (mark1.getIndex() == 0) {
149             return false;
150         }
151         return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2));
152     }
153
154     private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) {
155         int index = 0;
156         while (!matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) {
157             index++;
158         }
159         return index;
160     }
161
162     private boolean matchEnded(TokenEntry token1, TokenEntry token2) {
163         return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF;
164     }
165 }
Popular Tags