KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > nl > DutchStemFilter


1 package org.apache.lucene.analysis.nl;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.analysis.Token;
20 import org.apache.lucene.analysis.TokenFilter;
21 import org.apache.lucene.analysis.TokenStream;
22
23 import java.io.IOException JavaDoc;
24 import java.util.HashMap JavaDoc;
25 import java.util.HashSet JavaDoc;
26 import java.util.Set JavaDoc;
27 import java.util.Map JavaDoc;
28
29 /**
30  * A filter that stems Dutch words. It supports a table of words that should
31  * not be stemmed at all. The stemmer used can be changed at runtime after the
32  * filter object is created (as long as it is a DutchStemmer).
33  *
34  * @author Edwin de Jonge
35  */

36 public final class DutchStemFilter extends TokenFilter {
37   /**
38    * The actual token in the input stream.
39    */

40   private Token token = null;
41   private DutchStemmer stemmer = null;
42   private Set JavaDoc exclusions = null;
43
44   public DutchStemFilter(TokenStream _in) {
45     super(_in);
46     stemmer = new DutchStemmer();
47   }
48
49   /**
50    * Builds a DutchStemFilter that uses an exclusiontable.
51    */

52   public DutchStemFilter(TokenStream _in, Set JavaDoc exclusiontable) {
53     this(_in);
54     exclusions = exclusiontable;
55   }
56
57   /**
58    * @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
59    */

60   public DutchStemFilter(TokenStream _in, Set JavaDoc exclusiontable, Map JavaDoc stemdictionary) {
61     this(_in, exclusiontable);
62     stemmer.setStemDictionary(stemdictionary);
63   }
64
65   /**
66    * @return Returns the next token in the stream, or null at EOS
67    */

68   public Token next() throws IOException JavaDoc {
69     if ((token = input.next()) == null) {
70       return null;
71     }
72
73     // Check the exclusiontable
74
else if (exclusions != null && exclusions.contains(token.termText())) {
75       return token;
76     } else {
77       String JavaDoc s = stemmer.stem(token.termText());
78       // If not stemmed, dont waste the time creating a new token
79
if (!s.equals(token.termText())) {
80         return new Token(s, token.startOffset(),
81             token.endOffset(), token.type());
82       }
83       return token;
84     }
85   }
86
87   /**
88    * Set a alternative/custom DutchStemmer for this filter.
89    */

90   public void setStemmer(DutchStemmer stemmer) {
91     if (stemmer != null) {
92       this.stemmer = stemmer;
93     }
94   }
95
96   /**
97    * Set an alternative exclusion list for this filter.
98    */

99   public void setExclusionTable(HashSet JavaDoc exclusiontable) {
100     exclusions = exclusiontable;
101   }
102
103   /**
104    * Set dictionary for stemming, this dictionary overrules the algorithm,
105    * so you can correct for a particular unwanted word-stem pair.
106    */

107   public void setStemDictionary(HashMap JavaDoc dict) {
108     if (stemmer != null)
109       stemmer.setStemDictionary(dict);
110   }
111 }
Popular Tags