1 package org.contineo.core.text.analyze; 2 3 import java.text.BreakIterator ; 4 import java.util.Hashtable ; 5 6 11 public class WordTable { 12 13 public final static AnalyseResult fillWordTable(BreakIterator boundary, StringBuffer source, Hashtable stopwords, int minlen, Stemmer stemmer) { 14 int start = boundary.first(); 15 long wordcount = 0; 16 String prestem = ""; 17 String preword = ""; 18 AnalyseResult result = new AnalyseResult(); 19 Hashtable <String , WordEntry> wordtable = new Hashtable <String , WordEntry>(source.length()/6); 20 for (int end = boundary.next();end != BreakIterator.DONE;start = end, end = boundary.next()) { 21 String word = source.substring(start,end).trim(); 22 char next = ' '; 23 try { 24 next = source.charAt(end); 25 } catch (Exception e) { 26 } 27 if (word.length() > minlen) { 28 String stem = stemmer.stem(word.toLowerCase()); 29 WordEntry entry = new WordEntry(); 30 if (word.length() >= minlen && !stopwords.containsKey(word) && !stopwords.containsKey(stem)) { 31 wordcount++; 32 if (wordtable.containsKey(stem)) { 33 entry = (WordEntry)wordtable.get(stem); 35 entry.incValue(); 36 if (word.length() < entry.getOriginWord().length() && next != (char)45) 37 entry.setOriginWord(word); 38 } else { 39 entry.incValue(); 40 entry.setOriginWord(word); 41 } 42 wordtable.put(stem,entry); 43 if (!prestem.equals("")) { 44 WordEntry gentry = new WordEntry(); 45 String groupstem = prestem + " " + stem; 46 String groupword = preword + " " + word; 47 if (wordtable.containsKey(groupstem)) { 48 gentry = (WordEntry)wordtable.get(groupstem); 50 gentry.incValue(); 51 if (groupword.length() < gentry.getOriginWord().length() && next != (char)45) 52 gentry.setOriginWord(groupword); 53 } else { 54 gentry.setOriginWord(groupword); 55 } 56 wordtable.put(groupstem, gentry); 57 58 try { 59 WordEntry tempEntry = (WordEntry)wordtable.get(stem); 60 if (tempEntry.getValue() > 0) { 61 tempEntry.decValue(); 62 wordtable.put(stem,tempEntry); 63 } 64 tempEntry = (WordEntry)wordtable.get(prestem); 65 if (tempEntry.getValue() > 0) { 66 tempEntry.decValue(); 67 wordtable.put(prestem,tempEntry); 68 } 69 } catch (Exception e) { 70 } 71 } 72 prestem = stem; 73 preword = word; 74 } 75 } else if (word.length() > 1 || word.equals(".") || word.equals("!") || 76 word.equals("?") || word.equals(",") || word.equals(";")) { 77 prestem = ""; 78 preword = ""; 79 } 80 } 81 result.setWordCount(wordcount); 82 result.setWordTable(wordtable); 83 return result; 84 } 85 86 } | Popular Tags |