KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > contineo > searchengine > util > TermSelector


1 package org.contineo.searchengine.util;
2
3 import java.util.ArrayList JavaDoc;
4 import java.util.Collection JavaDoc;
5 import java.util.Enumeration JavaDoc;
6 import java.util.Hashtable JavaDoc;
7 import java.util.Iterator JavaDoc;
8
9 import org.contineo.documan.Term;
10 import org.contineo.documan.dao.TermDAO;
11 import org.contineo.searchengine.search.Result;
12
13 /**
14  * Created on 15.11.2004
15  */

16 public class TermSelector {
17
18     private Hashtable JavaDoc<String JavaDoc, TermEntry> terms;
19     
20     /**
21      * Creates a new TermSelector.
22      */

23     public TermSelector() {
24         terms = new Hashtable JavaDoc<String JavaDoc, TermEntry>();
25     }
26
27     /**
28      * Selects n terms the mostly occured in all document being in a given search result.
29      * @param searchResult
30      * @param count Count of terms to be returned.
31      * @return
32      */

33     public Collection JavaDoc getTopTerms(Collection JavaDoc searchResult, int count) {
34         Iterator JavaDoc iter = searchResult.iterator();
35         TermDAO termDao = new TermDAO();
36         while (iter.hasNext()) {
37             Result rs = (Result)iter.next();
38             Collection JavaDoc termcoll = termDao.findByMenuid(Integer.parseInt(rs.getMenuid()));
39             Iterator JavaDoc termiter = termcoll.iterator();
40             while (termiter.hasNext()) {
41                 Term term = (Term)termiter.next();
42                 if (terms.containsKey(term.getStem())) {
43                     TermEntry entry = (TermEntry)terms.get(term.getStem());
44                     //entry.setValue(entry.getValue() + (term.getValue() / term.getWordCount()));
45
//entry.setWordCount(entry.getWordCount() + term.getWordCount());
46
entry.setValue(entry.getValue() + term.getValue());
47                     if (term.getOriginWord().length() < entry.getOriginWord().length())
48                         entry.setOriginWord(term.getOriginWord());
49                     double value = term.getValue();
50                     Edge edge = new Edge();
51                     if (value > 30.0)
52                         edge.setThickness(3);
53                     else
54                         if (value > 10.0)
55                             edge.setThickness(2);
56                         else
57                             edge.setThickness(1);
58                     edge.setId(term.getMenuid());
59                     entry.addDocument(edge);
60                 } else {
61                     TermEntry entry = new TermEntry();
62                     entry.setName(term.getStem());
63                     //entry.setWordCount(term.getWordCount());
64
//entry.setValue(term.getValue() * term.getWordCount());
65
entry.setValue(term.getValue());
66                     entry.setOriginWord(term.getOriginWord());
67                     double value = term.getValue();
68                     Edge edge = new Edge();
69                     if (value > 30.0)
70                         edge.setThickness(3);
71                     else
72                         if (value > 10.0)
73                             edge.setThickness(2);
74                         else
75                             edge.setThickness(1);
76                     edge.setId(term.getMenuid());
77                     entry.addDocument(edge);
78                     terms.put(term.getStem(), entry);
79                 }
80             }
81         }
82         Collection JavaDoc<TermEntry> coll = new ArrayList JavaDoc<TermEntry>(count);
83         if (terms.size() > 0) {
84             for (int i=0; i<count; i++) {
85                 TermEntry e = getTopWord();
86                 coll.add(e);
87             }
88         }
89         return coll;
90     }
91     
92     protected TermEntry getTopWord() {
93         TermEntry entry = new TermEntry();
94         Enumeration JavaDoc enum1 = terms.keys();
95         String JavaDoc topterm = "";
96         double topvalue = 0d;
97         while (enum1.hasMoreElements()) {
98             String JavaDoc term = (String JavaDoc)enum1.nextElement();
99             TermEntry te = (TermEntry)terms.get(term);
100             //double val = (double)te.getWordCount() / te.getValue();
101
double val = te.getValue();
102             if (val > topvalue) {
103                 topvalue = val;
104                 topterm = term;
105                 entry = te;
106             }
107         }
108         terms.remove(topterm);
109         return entry;
110     }
111 }
112
Popular Tags