KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > LetterTokenizer


1 package org.apache.lucene.analysis;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.Reader JavaDoc;
20
21 /** A LetterTokenizer is a tokenizer that divides text at non-letters. That's
22   to say, it defines tokens as maximal strings of adjacent letters, as defined
23   by java.lang.Character.isLetter() predicate.
24
25   Note: this does a decent job for most European languages, but does a terrible
26   job for some Asian languages, where words are not separated by spaces. */

27
28 public class LetterTokenizer extends CharTokenizer {
29   /** Construct a new LetterTokenizer. */
30   public LetterTokenizer(Reader JavaDoc in) {
31     super(in);
32   }
33
34   /** Collects only characters which satisfy
35    * {@link Character#isLetter(char)}.*/

36   protected boolean isTokenChar(char c) {
37     return Character.isLetter(c);
38   }
39 }
40
Popular Tags