1 package org.contineo.core.text.analyze; 2 3 import java.util.ArrayList ; 4 import java.util.Collection ; 5 import java.util.Enumeration ; 6 import java.util.Hashtable ; 7 8 13 public abstract class WordRanker { 14 15 protected Hashtable stoptable = new Hashtable (); 16 17 21 protected Hashtable <String , WordEntry> wordtable = new Hashtable <String , WordEntry>(); 22 23 protected long wordcount = 0; 24 protected int minlen = 2; 25 26 27 protected Entry getTopWord(Hashtable table) { 28 Entry entry = new Entry(); 29 Enumeration enum1 = table.keys(); 30 int topvalue = -1; 31 String topword = ""; 32 String topOriginWord = ""; 33 while (enum1.hasMoreElements()) { 34 String key = (String )enum1.nextElement(); 35 WordEntry termEntry = (WordEntry)table.get(key); 36 int val = termEntry.getValue(); 37 if (val > topvalue) { 38 topvalue = val; 39 topword = key; 40 topOriginWord = termEntry.getOriginWord(); 41 } 42 } 43 entry.setWord(topword); 44 entry.setNumber(topvalue); 45 entry.setOriginWord(topOriginWord); 46 return entry; 47 } 48 49 53 public void setMinLen(int len) { 54 minlen = len; 55 } 56 57 62 public Collection getTopWords(int hits) { 63 Hashtable table = new Hashtable <String , WordEntry>(wordtable); 64 Collection <Entry> coll = new ArrayList <Entry>(hits); 65 if (hits > table.size()) 66 hits = table.size(); 67 for (int i = 0; i < hits; i++) { 68 Entry e = getTopWord(table); 69 if (!e.getWord().equals("")) { 70 coll.add(e); 71 table.remove(e.getWord()); 72 } 73 } 74 return coll; 75 } 76 77 80 public int relevantWords() { 81 return wordtable.size(); 82 } 83 84 87 public long getWordCount() { 88 return wordcount; 89 } 90 } | Popular Tags |