1 package org.apache.lucene.analysis; 2 3 /** 4 * Copyright 2004 The Apache Software Foundation 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 import java.io.Reader; 20 21 /** 22 * LowerCaseTokenizer performs the function of LetterTokenizer 23 * and LowerCaseFilter together. It divides text at non-letters and converts 24 * them to lower case. While it is functionally equivalent to the combination 25 * of LetterTokenizer and LowerCaseFilter, there is a performance advantage 26 * to doing the two tasks at once, hence this (redundant) implementation. 27 * <P> 28 * Note: this does a decent job for most European languages, but does a terrible 29 * job for some Asian languages, where words are not separated by spaces. 30 */ 31 public final class LowerCaseTokenizer extends LetterTokenizer { 32 /** Construct a new LowerCaseTokenizer. */ 33 public LowerCaseTokenizer(Reader in) { 34 super(in); 35 } 36 37 /** Collects only characters which satisfy 38 * {@link Character#isLetter(char)}.*/ 39 protected char normalize(char c) { 40 return Character.toLowerCase(c); 41 } 42 } 43