1 package org.apache.lucene.analysis; 2 3 18 19 import java.io.IOException ; 20 import java.util.HashSet ; 21 import java.util.Hashtable ; 22 import java.util.Set ; 23 24 27 28 public final class StopFilter extends TokenFilter { 29 30 private final Set stopWords; 31 private final boolean ignoreCase; 32 33 36 public StopFilter(TokenStream input, String [] stopWords) 37 { 38 this(input, stopWords, false); 39 } 40 41 45 public StopFilter(TokenStream in, String [] stopWords, boolean ignoreCase) { 46 super(in); 47 this.ignoreCase = ignoreCase; 48 this.stopWords = makeStopSet(stopWords, ignoreCase); 49 } 50 51 57 public StopFilter(TokenStream in, Hashtable stopTable) { 58 this(in, stopTable, false); 59 } 60 67 public StopFilter(TokenStream in, Hashtable stopTable, boolean ignoreCase) { 68 this(in, stopTable.keySet(), ignoreCase); 69 } 70 71 77 public StopFilter(TokenStream input, Set stopWords, boolean ignoreCase) 78 { 79 super(input); 80 this.ignoreCase = ignoreCase; 81 this.stopWords = stopWords; 82 } 83 84 92 public StopFilter(TokenStream in, Set stopWords) { 93 this(in, stopWords, false); 94 } 95 103 public static final Hashtable makeStopTable(String [] stopWords) { 104 return makeStopTable(stopWords, false); 105 } 106 107 114 public static final Hashtable makeStopTable(String [] stopWords, boolean ignoreCase) { 115 Hashtable stopTable = new Hashtable (stopWords.length); 116 for (int i = 0; i < stopWords.length; i++) 117 { 118 String stopWord = ignoreCase ? stopWords[i].toLowerCase() : stopWords[i]; 119 stopTable.put(stopWord, stopWord); 120 } 121 return stopTable; 122 } 123 124 132 public static final Set makeStopSet(String [] stopWords) { 133 return makeStopSet(stopWords, false); 134 } 135 136 142 public static final Set makeStopSet(String [] stopWords, boolean ignoreCase) { 143 HashSet stopTable = new HashSet (stopWords.length); 144 for (int i = 0; i < stopWords.length; i++) 145 stopTable.add(ignoreCase ? stopWords[i].toLowerCase() : stopWords[i]); 146 return stopTable; 147 } 148 149 152 public final Token next() throws IOException { 153 for (Token token = input.next(); token != null; token = input.next()) 155 { 156 String termText = ignoreCase ? token.termText.toLowerCase() : token.termText; 157 if (!stopWords.contains(termText)) 158 return token; 159 } 160 return null; 162 } 163 } 164 | Popular Tags |