KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > ru > RussianLowerCaseFilter


1 package org.apache.lucene.analysis.ru;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.analysis.TokenFilter;
20 import org.apache.lucene.analysis.Token;
21 import org.apache.lucene.analysis.TokenStream;
22
23 /**
24  * Normalizes token text to lower case, analyzing given ("russian") charset.
25  *
26  * @author Boris Okner, b.okner@rogers.com
27  * @version $Id: RussianLowerCaseFilter.java 150998 2004-08-16 20:30:46Z dnaber $
28  */

29 public final class RussianLowerCaseFilter extends TokenFilter
30 {
31     char[] charset;
32
33     public RussianLowerCaseFilter(TokenStream in, char[] charset)
34     {
35         super(in);
36         this.charset = charset;
37     }
38
39     public final Token next() throws java.io.IOException JavaDoc
40     {
41         Token t = input.next();
42
43         if (t == null)
44             return null;
45
46         String JavaDoc txt = t.termText();
47
48         char[] chArray = txt.toCharArray();
49         for (int i = 0; i < chArray.length; i++)
50         {
51             chArray[i] = RussianCharsets.toLowerCase(chArray[i], charset);
52         }
53
54         String JavaDoc newTxt = new String JavaDoc(chArray);
55         // create new token
56
Token newToken = new Token(newTxt, t.startOffset(), t.endOffset());
57
58         return newToken;
59     }
60 }
61
Popular Tags