1 package org.apache.lucene.analysis.cjk; 2 3 18 19 import org.apache.lucene.analysis.Analyzer; 20 import org.apache.lucene.analysis.StopFilter; 21 import org.apache.lucene.analysis.TokenStream; 22 23 import java.io.Reader ; 24 import java.util.Set ; 25 26 27 32 public class CJKAnalyzer extends Analyzer { 33 35 39 public final static String [] STOP_WORDS = { 40 "a", "and", "are", "as", "at", "be", 41 "but", "by", "for", "if", "in", 42 "into", "is", "it", "no", "not", 43 "of", "on", "or", "s", "such", "t", 44 "that", "the", "their", "then", 45 "there", "these", "they", "this", 46 "to", "was", "will", "with", "", 47 "www" 48 }; 49 50 52 55 private Set stopTable; 56 57 59 62 public CJKAnalyzer() { 63 stopTable = StopFilter.makeStopSet(STOP_WORDS); 64 } 65 66 71 public CJKAnalyzer(String [] stopWords) { 72 stopTable = StopFilter.makeStopSet(stopWords); 73 } 74 75 77 84 public final TokenStream tokenStream(String fieldName, Reader reader) { 85 return new StopFilter(new CJKTokenizer(reader), stopTable); 86 } 87 } 88 | Popular Tags |