KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > LowerCaseTokenizer


1 package org.apache.lucene.analysis;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.Reader JavaDoc;
20
21 /**
22  * LowerCaseTokenizer performs the function of LetterTokenizer
23  * and LowerCaseFilter together. It divides text at non-letters and converts
24  * them to lower case. While it is functionally equivalent to the combination
25  * of LetterTokenizer and LowerCaseFilter, there is a performance advantage
26  * to doing the two tasks at once, hence this (redundant) implementation.
27  * <P>
28  * Note: this does a decent job for most European languages, but does a terrible
29  * job for some Asian languages, where words are not separated by spaces.
30  */

31 public final class LowerCaseTokenizer extends LetterTokenizer {
32   /** Construct a new LowerCaseTokenizer. */
33   public LowerCaseTokenizer(Reader JavaDoc in) {
34     super(in);
35   }
36
37   /** Collects only characters which satisfy
38    * {@link Character#isLetter(char)}.*/

39   protected char normalize(char c) {
40     return Character.toLowerCase(c);
41   }
42 }
43
Popular Tags