LowerCaseTokenizer


1   package org.apache.lucene.analysis;
2   
3   /**
4    * Copyright 2004 The Apache Software Foundation
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.Reader  ;
20  
21  /**
22   * LowerCaseTokenizer performs the function of LetterTokenizer
23   * and LowerCaseFilter together.  It divides text at non-letters and converts
24   * them to lower case.  While it is functionally equivalent to the combination
25   * of LetterTokenizer and LowerCaseFilter, there is a performance advantage
26   * to doing the two tasks at once, hence this (redundant) implementation.
27   * <P>
28   * Note: this does a decent job for most European languages, but does a terrible
29   * job for some Asian languages, where words are not separated by spaces.
30   */
31  public final class LowerCaseTokenizer extends LetterTokenizer {
32    /** Construct a new LowerCaseTokenizer. */
33    public LowerCaseTokenizer(Reader   in) {
34      super(in);
35    }
36  
37    /** Collects only characters which satisfy
38     * {@link Character#isLetter(char)}.*/
39    protected char normalize(char c) {
40      return Character.toLowerCase(c);
41    }
42  }
43

A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame

Popular Tags