1 package org.apache.lucene.analysis.nl; 2 3 18 19 import org.apache.lucene.analysis.Analyzer; 20 import org.apache.lucene.analysis.StopFilter; 21 import org.apache.lucene.analysis.TokenStream; 22 import org.apache.lucene.analysis.standard.StandardFilter; 23 import org.apache.lucene.analysis.standard.StandardTokenizer; 24 25 import java.io.File ; 26 import java.io.Reader ; 27 import java.util.HashMap ; 28 import java.util.HashSet ; 29 import java.util.Set ; 30 import java.util.Map ; 31 32 42 public class DutchAnalyzer extends Analyzer { 43 46 public final static String [] DUTCH_STOP_WORDS = 47 { 48 "de", "en", "van", "ik", "te", "dat", "die", "in", "een", 49 "hij", "het", "niet", "zijn", "is", "was", "op", "aan", "met", "als", "voor", "had", 50 "er", "maar", "om", "hem", "dan", "zou", "of", "wat", "mijn", "men", "dit", "zo", 51 "door", "over", "ze", "zich", "bij", "ook", "tot", "je", "mij", "uit", "der", "daar", 52 "haar", "naar", "heb", "hoe", "heeft", "hebben", "deze", "u", "want", "nog", "zal", 53 "me", "zij", "nu", "ge", "geen", "omdat", "iets", "worden", "toch", "al", "waren", 54 "veel", "meer", "doen", "toen", "moet", "ben", "zonder", "kan", "hun", "dus", 55 "alles", "onder", "ja", "eens", "hier", "wie", "werd", "altijd", "doch", "wordt", 56 "wezen", "kunnen", "ons", "zelf", "tegen", "na", "reeds", "wil", "kon", "niets", 57 "uw", "iemand", "geweest", "andere" 58 }; 59 60 61 64 private Set stoptable = new HashSet (); 65 66 69 private Set excltable = new HashSet (); 70 71 private Map _stemdict = new HashMap (); 72 73 74 77 public DutchAnalyzer() { 78 stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS); 79 _stemdict.put("fiets", "fiets"); _stemdict.put("bromfiets", "bromfiets"); _stemdict.put("ei", "eier"); 82 _stemdict.put("kind", "kinder"); 83 } 84 85 90 public DutchAnalyzer(String [] stopwords) { 91 stoptable = StopFilter.makeStopSet(stopwords); 92 } 93 94 99 public DutchAnalyzer(HashSet stopwords) { 100 stoptable = stopwords; 101 } 102 103 108 public DutchAnalyzer(File stopwords) { 109 stoptable = new HashSet (WordlistLoader.getWordtable(stopwords).keySet()); 110 } 111 112 117 public void setStemExclusionTable(String [] exclusionlist) { 118 excltable = StopFilter.makeStopSet(exclusionlist); 119 } 120 121 124 public void setStemExclusionTable(HashSet exclusionlist) { 125 excltable = exclusionlist; 126 } 127 128 131 public void setStemExclusionTable(File exclusionlist) { 132 excltable = new HashSet (WordlistLoader.getWordtable(exclusionlist).keySet()); 133 } 134 135 141 public void setStemDictionary(File stemdict) { 142 _stemdict = WordlistLoader.getStemDict(stemdict); 143 } 144 145 151 public TokenStream tokenStream(String fieldName, Reader reader) { 152 TokenStream result = new StandardTokenizer(reader); 153 result = new StandardFilter(result); 154 result = new StopFilter(result, stoptable); 155 result = new DutchStemFilter(result, excltable, _stemdict); 156 return result; 157 } 158 } 159 | Popular Tags |