KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > mmbase > applications > xmlimporter > BasicFinder


1 /*
2
3 This software is OSI Certified Open Source Software.
4 OSI Certified is a certification mark of the Open Source Initiative.
5
6 The license (Mozilla version 1.0) can be read at the MMBase site.
7 See http://www.MMBase.org/license
8
9 */

10
11 package org.mmbase.applications.xmlimporter;
12
13 import java.util.*;
14 import org.mmbase.module.core.MMObjectBuilder;
15 import org.mmbase.module.core.MMObjectNode;
16 import org.mmbase.util.logging.Logger;
17 import org.mmbase.util.logging.Logging;
18
19 /**
20  * A basic implementation of SimilarObjectFinder.
21  * @since MMBase-1.5
22  * @version $Id: BasicFinder.java,v 1.4 2005/01/25 12:45:18 pierre Exp $
23  */

24 public abstract class BasicFinder implements SimilarObjectFinder {
25
26     /** Logger instance. */
27     private static Logger log
28     = Logging.getLoggerInstance(BasicFinder.class.getName());
29
30     /**
31      * Convenience method: finds MMBase id's for all objects in the
32      * persistent cloud of a given type and satisfying a criterium.
33      * @param builder The builder for this type.
34      * @param criterium The criterium: SQL where-clause, but
35      * without the "where ".
36      * @return List of (Integer) MMBase id's.
37      */

38     protected static List findPersistentObjects(
39     MMObjectBuilder builder, String JavaDoc criterium) {
40
41         Enumeration en = builder.search("WHERE " + criterium);
42         List result = new ArrayList();
43         while (en.hasMoreElements()) {
44             MMObjectNode node = (MMObjectNode) en.nextElement();
45             result.add(node.getIntegerValue("number"));
46         }
47         return result;
48     }
49
50     /** Creates new BasicFinder */
51     public BasicFinder() {}
52
53     /**
54      * Initializes this instance.
55      * @param params The initialization parameters, provided as
56      * name/value pairs (both String).
57      * @throws TransactionHandlerException if a failure occurred.
58      */

59     public void init(HashMap params) throws TransactionHandlerException {
60     }
61
62     /**
63      * Searches for similar object. Objects found in the
64      * persistent cloud will be accessed in the transaction.
65      * @return List of the similar objects found.
66      * @param transaction The transaction.
67      * @param tmpObj The object to search for.
68      * @throws TransactionHandlerException If a failure occurred.
69      */

70     public List findSimilarObject(Transaction transaction, TmpObject tmpObj)
71     throws TransactionHandlerException {
72         Set exactMatches = new HashSet();
73         Set closeMatches = new HashSet();
74
75         MMObjectNode node1 = tmpObj.getNode();
76         int otype = node1.getOType();
77         Integer JavaDoc mmBaseId1 = new Integer JavaDoc(tmpObj.getMMBaseId());
78
79         // Search temporary cloud for matching nodes,
80
// add exact matches to exactMatches, close matches to closeMatches.
81
Iterator iTmpObjects = transaction.getTmpObjects().iterator();
82         while (iTmpObjects.hasNext()) {
83             TmpObject tmpObj2 = (TmpObject)iTmpObjects.next();
84             if (tmpObj2 == tmpObj) {
85                 // Traversal stops at this object (tmpnode1).
86
// This is important, because
87
// 1: all pairs get matched only once
88
// 2: all objects it is compared with are in the part
89
// of the transaction that is already merged.
90
break;
91             }
92             if (!tmpObj2.isRelation()) {
93                 if (tmpObj2.getNode().getOType() == otype) {
94                     evaluateMatch(tmpObj2, tmpObj, exactMatches, closeMatches);
95                 }
96             }
97         }
98
99         // Search persistent cloud for exactly matching nodes,
100
// add these to exactMatches.
101
Iterator iPersistentObjects
102             = getExactPersistentObjects(tmpObj).iterator();
103         while (iPersistentObjects.hasNext()) {
104             Integer JavaDoc mmBaseId2 = ((Integer JavaDoc) iPersistentObjects.next());
105
106             // Ignore if this is the node to match to.
107
if (mmBaseId2.equals(mmBaseId1)) {
108                 continue;
109             }
110
111             // Access the object in the transaction context.
112
TmpObject persObj2
113             = transaction.getAccessObject(mmBaseId2.intValue());
114
115             // Add to exact matches.
116
exactMatches.add(persObj2);
117         }
118
119         // When exact matches are found, return these.
120
if (exactMatches.size() > 0) {
121             if (log.isDebugEnabled()) {
122                 log.debug("Matches (exact) found for " + tmpObj + ":\n"
123                 + exactMatches);
124             }
125             return new ArrayList(exactMatches);
126         }
127
128         // When no exact matches found, search persistent cloud for
129
// close matching nodes as well.
130
Iterator iCloseObjects
131             = getClosePersistentObjects(tmpObj).iterator();
132         while (iCloseObjects.hasNext()) {
133             Integer JavaDoc mmBaseId2 = ((Integer JavaDoc) iCloseObjects.next());
134
135             // Ignore if this is the node to match to.
136
if (mmBaseId2.equals(mmBaseId1)) {
137                 continue;
138             }
139
140             // Access the object in the transaction context.
141
TmpObject persObj2
142             = transaction.getAccessObject(mmBaseId2.intValue());
143
144             // Evaluate matching rate, and add to exactMatches
145
// or closeMatches accordingly.
146
evaluateMatch(persObj2, tmpObj, exactMatches, closeMatches);
147         }
148
149         // Return the close matches.
150
if (log.isDebugEnabled()) {
151             log.debug("Matches (close) found for " + tmpObj + ":\n"
152             + closeMatches);
153         }
154         return new ArrayList(closeMatches);
155     }
156
157     /**
158      * Calculates matching rate for two objects.
159      * e.g. the rate in which tmpObj1 matches tmpObj2, represented by a value ranging
160      * from 0 to 1: <ul>
161      * <li>1.0 for exact match,
162      * <li>between 1.0 and 0.0 for not-exact but qualifying match,
163      * <li>0.0 for match that is not close enough to qualify.
164      * </ul>
165      * @param tmpObj1 The object for which the matching rate is wanted.
166      * @param tmpObj2 The object to match with.
167      * @return Matching rate.
168      */

169     public abstract float scoreNode(TmpObject tmpObj1, TmpObject tmpObj2);
170
171     /**
172      * Gets MMBase id's for all objects from persistent cloud that
173      * produce an exact match with the given object (possibly
174      * including the object itself).
175      * This can be used to prevent a more extensive search for close
176      * matches when exact matches are possible.
177      * @param tmpObj The object to match with.
178      * @return Collection of (Integer) MMBase id's for objects from the
179      * persistent cloud that produce an exact match with the given
180      * object.
181      */

182     public abstract Collection getExactPersistentObjects(TmpObject tmpObj);
183
184     /**
185      * Gets MMBase id's for all objects from persistent cloud that
186      * might produce a qualifying match with the given object
187      * (possibly including the object itself).
188      * When looking for a fuzzy match, this can be used to make a
189      * pre-selection from all the objects in the persistent cloud,
190      * to reduce the total number of objects to be inspected closer.
191      * @param tmpObj The object to match with.
192      * @return Collection of (Integer) MMBase id's for objects from the
193      * persistent cloud that might produce a qualifying match with the
194      * given object.
195      */

196     public abstract Collection getClosePersistentObjects(TmpObject tmpObj);
197
198     /**
199      * Calculates and evaluates matching rate of an object with respect
200      * to a given object, and adds the object/match rate to a list of
201      * exact matches - when the match is exact, or a list of close matches
202      * - when the match is qualifying but not exact.
203      * @param tmpObj1 The object for which the matching rate is wanted.
204      * @param tmpObj2 The object to match with.
205      * @param exactMatches Set of exact matching objects.
206      * @param closeMatches Set of close matching objects.
207      */

208     private void evaluateMatch(
209     TmpObject tmpObj1, TmpObject tmpObj2, Set exactMatches, Set closeMatches) {
210         float matchingRate = scoreNode(tmpObj1, tmpObj2);
211         if (matchingRate == 1.0) {
212             // Exact match.
213
exactMatches.add(tmpObj1);
214         } else if (matchingRate > 0.0) {
215             // Close match.
216
closeMatches.add(tmpObj1);
217         }
218     }
219
220 }
221
Popular Tags