KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > contineo > searchengine > comparision > Searcher


1 package org.contineo.searchengine.comparision;
2
3 import java.util.ArrayList JavaDoc;
4 import java.util.Collection JavaDoc;
5 import java.util.Collections JavaDoc;
6 import java.util.Hashtable JavaDoc;
7 import java.util.Iterator JavaDoc;
8 import java.util.List JavaDoc;
9
10 import org.contineo.admin.Menu;
11 import org.contineo.admin.dao.MenuDAO;
12 import org.contineo.documan.Term;
13 import org.contineo.documan.dao.TermDAO;
14
15 /**
16  * Class for finding similar documents.
17  * Created on 21.03.2004
18  * @author Michael Scholz
19  */

20 public class Searcher {
21
22     /**
23      *
24      */

25     public Searcher() {
26     }
27
28     /**
29      * This method finds documents, which are similar to a reference document.
30      * All documents are valued by dice-coefficient.
31      * dice-coefficient = 2*scalar product (doc1,doc2) / (absolute value(doc1) + absoulute value(doc2))
32      * @param docid - ID of the reference document.
33      * @param minScore - Minimum score value (between 0 and 1)
34      * @return Collection of similar documents sorted by score value.
35      */

36     public Collection JavaDoc findSimilarDocuments(int menuid, double minScore, String JavaDoc username) {
37         TermDAO termsDao = new TermDAO();
38         Collection JavaDoc basicTerms = termsDao.findByMenuid(menuid);
39         // select all documents having a keyword a the basic document
40
Collection JavaDoc terms = termsDao.findByStem(menuid);
41         Collection JavaDoc<SearchResult> result = new ArrayList JavaDoc<SearchResult>();
42         Iterator JavaDoc iter = terms.iterator();
43         MenuDAO mdao = new MenuDAO();
44         Collection JavaDoc coll2 = mdao.findMenuIdByUserName(username);
45         while (iter.hasNext()) {
46             // calculate the score for ranking
47
Term term = (Term)iter.next();
48             if (coll2.contains(String.valueOf(term.getMenuid()))) {
49                 Collection JavaDoc docTerms = termsDao.findByMenuid(term.getMenuid());
50                 double score = calculateScore(basicTerms, docTerms);
51                 if (score >= minScore) {
52                     SearchResult sres = new SearchResult();
53                     Menu menu = mdao.findByPrimaryKey(term.getMenuid());
54                     sres.setScore(score);
55                     sres.setIcon(menu.getMenuIcon());
56                     sres.setMenuid(String.valueOf(menu.getMenuId()));
57                     sres.setName(menu.getMenuText());
58                     sres.setPath(menu.getMenuPath());
59                     result.add(sres);
60                 }
61             }
62         }
63         Collections.sort((List JavaDoc)result, new SearchResultComparator());
64         return result;
65     }
66         
67     private double calculateScore(Collection JavaDoc refTerms, Collection JavaDoc terms) {
68         double score = 0.0d;
69         double abs1 = 0.0d;
70         double abs2 = 0.0d;
71         Hashtable JavaDoc table = convert(terms);
72         Iterator JavaDoc iter = refTerms.iterator();
73         while (iter.hasNext()) {
74             Term term = (Term)iter.next();
75             abs1 += term.getValue() * term.getValue();
76             if (table.containsKey(term.getStem())) {
77                 Double JavaDoc value = (Double JavaDoc)table.get(term.getStem());
78                 abs2 += value.doubleValue() * value.doubleValue();
79                 score += value.doubleValue() * term.getValue();
80             }
81         }
82         return (2*score) / (abs1+abs2);
83     }
84     
85     private Hashtable JavaDoc convert(Collection JavaDoc coll) {
86         Hashtable JavaDoc<String JavaDoc, Double JavaDoc> table = new Hashtable JavaDoc<String JavaDoc, Double JavaDoc>(coll.size());
87         Iterator JavaDoc iter = coll.iterator();
88         while (iter.hasNext()) {
89             Term term = (Term)iter.next();
90             table.put(term.getStem(), new Double JavaDoc(term.getValue()));
91         }
92         return table;
93     }
94 }
95
Popular Tags