KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > fr > FrenchStemFilter


1 package org.apache.lucene.analysis.fr;
2
3 /**
4  * Copyright 2004-2005 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.analysis.Token;
20 import org.apache.lucene.analysis.TokenFilter;
21 import org.apache.lucene.analysis.TokenStream;
22 import java.io.IOException JavaDoc;
23 import java.util.Hashtable JavaDoc;
24 import java.util.HashSet JavaDoc;
25 import java.util.Set JavaDoc;
26
27 /**
28  * A filter that stemms french words. It supports a table of words that should
29  * not be stemmed at all. The used stemmer can be changed at runtime after the
30  * filter object is created (as long as it is a FrenchStemmer).
31  *
32  * @author Patrick Talbot (based on Gerhard Schwarz work for German)
33  */

34 public final class FrenchStemFilter extends TokenFilter {
35
36     /**
37      * The actual token in the input stream.
38      */

39     private Token token = null;
40     private FrenchStemmer stemmer = null;
41     private Set JavaDoc exclusions = null;
42
43     public FrenchStemFilter( TokenStream in ) {
44     super(in);
45         stemmer = new FrenchStemmer();
46     }
47
48     /**
49      * Builds a FrenchStemFilter that uses an exclusiontable.
50    *
51    * @deprecated
52      */

53     public FrenchStemFilter( TokenStream in, Hashtable JavaDoc exclusiontable ) {
54         this( in );
55         exclusions = new HashSet JavaDoc(exclusiontable.keySet());
56     }
57
58     public FrenchStemFilter( TokenStream in, Set JavaDoc exclusiontable ) {
59         this( in );
60         exclusions = exclusiontable;
61     }
62
63     /**
64      * @return Returns the next token in the stream, or null at EOS
65      */

66     public final Token next()
67         throws IOException JavaDoc {
68         if ( ( token = input.next() ) == null ) {
69             return null;
70         }
71         // Check the exclusiontable
72
else if ( exclusions != null && exclusions.contains( token.termText() ) ) {
73             return token;
74         }
75         else {
76             String JavaDoc s = stemmer.stem( token.termText() );
77             // If not stemmed, dont waste the time creating a new token
78
if ( !s.equals( token.termText() ) ) {
79                return new Token( s, token.startOffset(), token.endOffset(), token.type());
80             }
81             return token;
82         }
83     }
84     /**
85      * Set a alternative/custom FrenchStemmer for this filter.
86      */

87     public void setStemmer( FrenchStemmer stemmer ) {
88         if ( stemmer != null ) {
89             this.stemmer = stemmer;
90         }
91     }
92     /**
93      * Set an alternative exclusion list for this filter.
94      */

95     public void setExclusionTable( Hashtable JavaDoc exclusiontable ) {
96         exclusions = new HashSet JavaDoc(exclusiontable.keySet());
97     }
98 }
99
100
101
Popular Tags