KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > ru > RussianStemFilter


1 package org.apache.lucene.analysis.ru;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.analysis.Token;
20 import org.apache.lucene.analysis.TokenFilter;
21 import org.apache.lucene.analysis.TokenStream;
22 import java.io.IOException JavaDoc;
23
24 /**
25  * A filter that stems Russian words. The implementation was inspired by GermanStemFilter.
26  * The input should be filtered by RussianLowerCaseFilter before passing it to RussianStemFilter ,
27  * because RussianStemFilter only works with lowercase part of any "russian" charset.
28  *
29  * @author Boris Okner, b.okner@rogers.com
30  * @version $Id: RussianStemFilter.java 150998 2004-08-16 20:30:46Z dnaber $
31  */

32 public final class RussianStemFilter extends TokenFilter
33 {
34     /**
35      * The actual token in the input stream.
36      */

37     private Token token = null;
38     private RussianStemmer stemmer = null;
39
40     public RussianStemFilter(TokenStream in, char[] charset)
41     {
42         super(in);
43         stemmer = new RussianStemmer(charset);
44     }
45
46     /**
47      * @return Returns the next token in the stream, or null at EOS
48      */

49     public final Token next() throws IOException JavaDoc
50     {
51         if ((token = input.next()) == null)
52         {
53             return null;
54         }
55         else
56         {
57             String JavaDoc s = stemmer.stem(token.termText());
58             if (!s.equals(token.termText()))
59             {
60                 return new Token(s, token.startOffset(), token.endOffset(),
61                     token.type());
62             }
63             return token;
64         }
65     }
66
67     /**
68      * Set a alternative/custom RussianStemmer for this filter.
69      */

70     public void setStemmer(RussianStemmer stemmer)
71     {
72         if (stemmer != null)
73         {
74             this.stemmer = stemmer;
75         }
76     }
77 }
78
Popular Tags