1 package org.contineo.core.text.analyze.en; 2 3 import java.text.BreakIterator ; 4 5 import org.contineo.core.text.analyze.AnalyseResult; 6 import org.contineo.core.text.analyze.Analyzer; 7 import org.contineo.core.text.analyze.Stemmer; 8 import org.contineo.core.text.analyze.StopTable; 9 import org.contineo.core.text.analyze.Stopwords; 10 import org.contineo.core.text.analyze.WordRanker; 11 import org.contineo.core.text.analyze.WordTable; 12 13 19 public class EnglishAnalyzer extends WordRanker implements Analyzer { 20 21 private String [] englishStopwords; 22 23 26 public EnglishAnalyzer() { 27 englishStopwords = Stopwords.getStopwords("en"); 28 stoptable = StopTable.setStopWords(englishStopwords); 29 } 30 31 35 public EnglishAnalyzer(String stopwords[]) { 36 stoptable = StopTable.setStopWords(stopwords); 37 } 38 39 43 public EnglishAnalyzer(int len) { 44 minlen = len; 45 englishStopwords = Stopwords.getStopwords("en"); 46 stoptable = StopTable.setStopWords(englishStopwords); 47 } 48 49 53 public void analyze(String text) { 54 BreakIterator boundary = BreakIterator.getWordInstance(); 55 boundary.setText(text); 56 Stemmer stemmer = new EnglishStemmer(); 57 AnalyseResult result = WordTable.fillWordTable(boundary, new StringBuffer (text), stoptable, minlen, stemmer); 58 wordcount = result.getWordCount(); 59 wordtable = result.getWordTable(); 60 } 61 62 } 63 | Popular Tags |