KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > contineo > core > text > analyze > en > EnglishAnalyzer


1 package org.contineo.core.text.analyze.en;
2
3 import java.text.BreakIterator JavaDoc;
4
5 import org.contineo.core.text.analyze.AnalyseResult;
6 import org.contineo.core.text.analyze.Analyzer;
7 import org.contineo.core.text.analyze.Stemmer;
8 import org.contineo.core.text.analyze.StopTable;
9 import org.contineo.core.text.analyze.Stopwords;
10 import org.contineo.core.text.analyze.WordRanker;
11 import org.contineo.core.text.analyze.WordTable;
12
13 /**
14  * This class can analyze English texts by using the english stop word list.
15  * Created on 24. Juli 2003, 22:46
16  * @author Michael Scholz
17  * @version 1.0
18  */

19 public class EnglishAnalyzer extends WordRanker implements Analyzer {
20
21     private String JavaDoc[] englishStopwords;
22     
23     /**
24      * Creates a new instance of EnglishAnalyzer.
25      */

26     public EnglishAnalyzer() {
27         englishStopwords = Stopwords.getStopwords("en");
28         stoptable = StopTable.setStopWords(englishStopwords);
29     }
30     
31     /**
32      * Creates a new instance of EnglishAnalyzer.
33      * @param stopwords - Array of user specific stop words.
34      */

35     public EnglishAnalyzer(String JavaDoc stopwords[]) {
36         stoptable = StopTable.setStopWords(stopwords);
37     }
38     
39     /**
40      * Creates a new instance of EnglishAnalyzer.
41      * @param len - Minimum length of words which should analyzed.
42      */

43     public EnglishAnalyzer(int len) {
44         minlen = len;
45         englishStopwords = Stopwords.getStopwords("en");
46         stoptable = StopTable.setStopWords(englishStopwords);
47     }
48     
49     /**
50      * This method analyzes a given text an fills a hitlist.
51      * @param text - Text which should analyzed.
52      */

53     public void analyze(String JavaDoc text) {
54         BreakIterator JavaDoc boundary = BreakIterator.getWordInstance();
55         boundary.setText(text);
56         Stemmer stemmer = new EnglishStemmer();
57         AnalyseResult result = WordTable.fillWordTable(boundary, new StringBuffer JavaDoc(text), stoptable, minlen, stemmer);
58         wordcount = result.getWordCount();
59         wordtable = result.getWordTable();
60     }
61     
62 }
63
Popular Tags