1 package org.apache.lucene.analysis.de; 2 4 19 20 import org.apache.lucene.analysis.Analyzer; 21 import org.apache.lucene.analysis.LowerCaseFilter; 22 import org.apache.lucene.analysis.StopFilter; 23 import org.apache.lucene.analysis.TokenStream; 24 import org.apache.lucene.analysis.WordlistLoader; 25 import org.apache.lucene.analysis.standard.StandardFilter; 26 import org.apache.lucene.analysis.standard.StandardTokenizer; 27 28 import java.io.File ; 29 import java.io.IOException ; 30 import java.io.Reader ; 31 import java.util.HashSet ; 32 import java.util.Hashtable ; 33 import java.util.Set ; 34 35 45 public class GermanAnalyzer extends Analyzer { 46 47 50 public final static String [] GERMAN_STOP_WORDS = { 51 "einer", "eine", "eines", "einem", "einen", 52 "der", "die", "das", "dass", "daß", 53 "du", "er", "sie", "es", 54 "was", "wer", "wie", "wir", 55 "und", "oder", "ohne", "mit", 56 "am", "im", "in", "aus", "auf", 57 "ist", "sein", "war", "wird", 58 "ihr", "ihre", "ihres", 59 "als", "für", "von", "mit", 60 "dich", "dir", "mich", "mir", 61 "mein", "sein", "kein", 62 "durch", "wegen", "wird" 63 }; 64 65 68 private Set stopSet = new HashSet (); 69 70 73 private Set exclusionSet = new HashSet (); 74 75 79 public GermanAnalyzer() { 80 stopSet = StopFilter.makeStopSet(GERMAN_STOP_WORDS); 81 } 82 83 86 public GermanAnalyzer(String [] stopwords) { 87 stopSet = StopFilter.makeStopSet(stopwords); 88 } 89 90 93 public GermanAnalyzer(Hashtable stopwords) { 94 stopSet = new HashSet (stopwords.keySet()); 95 } 96 97 100 public GermanAnalyzer(File stopwords) throws IOException { 101 stopSet = WordlistLoader.getWordSet(stopwords); 102 } 103 104 107 public void setStemExclusionTable(String [] exclusionlist) { 108 exclusionSet = StopFilter.makeStopSet(exclusionlist); 109 } 110 111 114 public void setStemExclusionTable(Hashtable exclusionlist) { 115 exclusionSet = new HashSet (exclusionlist.keySet()); 116 } 117 118 121 public void setStemExclusionTable(File exclusionlist) throws IOException { 122 exclusionSet = WordlistLoader.getWordSet(exclusionlist); 123 } 124 125 131 public TokenStream tokenStream(String fieldName, Reader reader) { 132 TokenStream result = new StandardTokenizer(reader); 133 result = new StandardFilter(result); 134 result = new LowerCaseFilter(result); 135 result = new StopFilter(result, stopSet); 136 result = new GermanStemFilter(result, exclusionSet); 137 return result; 138 } 139 } 140 | Popular Tags |