KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > de > GermanStemFilter


1 package org.apache.lucene.analysis.de;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.analysis.Token;
20 import org.apache.lucene.analysis.TokenFilter;
21 import org.apache.lucene.analysis.TokenStream;
22 import java.io.IOException JavaDoc;
23 import java.util.Hashtable JavaDoc;
24 import java.util.Set JavaDoc;
25 import java.util.HashSet JavaDoc;
26
27 /**
28  * A filter that stems German words. It supports a table of words that should
29  * not be stemmed at all. The stemmer used can be changed at runtime after the
30  * filter object is created (as long as it is a GermanStemmer).
31  *
32  * @author Gerhard Schwarz
33  * @version $Id: GermanStemFilter.java 150998 2004-08-16 20:30:46Z dnaber $
34  */

35 public final class GermanStemFilter extends TokenFilter
36 {
37     /**
38      * The actual token in the input stream.
39      */

40     private Token token = null;
41     private GermanStemmer stemmer = null;
42     private Set JavaDoc exclusionSet = null;
43
44     public GermanStemFilter( TokenStream in )
45     {
46       super(in);
47       stemmer = new GermanStemmer();
48     }
49
50     /**
51      * Builds a GermanStemFilter that uses an exclusiontable.
52      * @deprecated Use {@link #GermanStemFilter(org.apache.lucene.analysis.TokenStream, java.util.Set)} instead.
53      */

54     public GermanStemFilter( TokenStream in, Hashtable JavaDoc exclusiontable )
55     {
56       this( in );
57       exclusionSet = new HashSet JavaDoc(exclusiontable.keySet());
58     }
59
60     /**
61      * Builds a GermanStemFilter that uses an exclusiontable.
62      */

63     public GermanStemFilter( TokenStream in, Set JavaDoc exclusionSet )
64     {
65       this( in );
66       this.exclusionSet = exclusionSet;
67     }
68
69     /**
70      * @return Returns the next token in the stream, or null at EOS
71      */

72     public final Token next()
73       throws IOException JavaDoc
74     {
75       if ( ( token = input.next() ) == null ) {
76         return null;
77       }
78       // Check the exclusiontable
79
else if ( exclusionSet != null && exclusionSet.contains( token.termText() ) ) {
80         return token;
81       }
82       else {
83         String JavaDoc s = stemmer.stem( token.termText() );
84         // If not stemmed, dont waste the time creating a new token
85
if ( !s.equals( token.termText() ) ) {
86           return new Token( s, token.startOffset(),
87             token.endOffset(), token.type() );
88         }
89         return token;
90       }
91     }
92
93     /**
94      * Set a alternative/custom GermanStemmer for this filter.
95      */

96     public void setStemmer( GermanStemmer stemmer )
97     {
98       if ( stemmer != null ) {
99         this.stemmer = stemmer;
100       }
101     }
102
103     /**
104      * Set an alternative exclusion list for this filter.
105      * @deprecated Use {@link #setExclusionSet(java.util.Set)} instead.
106      */

107     public void setExclusionTable( Hashtable JavaDoc exclusiontable )
108     {
109       exclusionSet = new HashSet JavaDoc(exclusiontable.keySet());
110     }
111
112     /**
113      * Set an alternative exclusion list for this filter.
114      */

115     public void setExclusionSet( Set JavaDoc exclusionSet )
116     {
117       this.exclusionSet = exclusionSet;
118     }
119 }
120
Popular Tags