KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > cjk > CJKAnalyzer


1 package org.apache.lucene.analysis.cjk;
2
3 /**
4  * Copyright 2004-2005 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.StopFilter;
21 import org.apache.lucene.analysis.TokenStream;
22
23 import java.io.Reader JavaDoc;
24 import java.util.Set JavaDoc;
25
26
27 /**
28  * Filters CJKTokenizer with StopFilter.
29  *
30  * @author Che, Dong
31  */

32 public class CJKAnalyzer extends Analyzer {
33   //~ Static fields/initializers ---------------------------------------------
34

35   /**
36    * An array containing some common English words that are not usually
37    * useful for searching and some double-byte interpunctions.
38    */

39   public final static String JavaDoc[] STOP_WORDS = {
40     "a", "and", "are", "as", "at", "be",
41     "but", "by", "for", "if", "in",
42     "into", "is", "it", "no", "not",
43     "of", "on", "or", "s", "such", "t",
44     "that", "the", "their", "then",
45     "there", "these", "they", "this",
46     "to", "was", "will", "with", "",
47     "www"
48   };
49
50   //~ Instance fields --------------------------------------------------------
51

52   /**
53    * stop word list
54    */

55   private Set JavaDoc stopTable;
56
57   //~ Constructors -----------------------------------------------------------
58

59   /**
60    * Builds an analyzer which removes words in {@link #STOP_WORDS}.
61    */

62   public CJKAnalyzer() {
63     stopTable = StopFilter.makeStopSet(STOP_WORDS);
64   }
65
66   /**
67    * Builds an analyzer which removes words in the provided array.
68    *
69    * @param stopWords stop word array
70    */

71   public CJKAnalyzer(String JavaDoc[] stopWords) {
72     stopTable = StopFilter.makeStopSet(stopWords);
73   }
74
75   //~ Methods ----------------------------------------------------------------
76

77   /**
78    * get token stream from input
79    *
80    * @param fieldName lucene field name
81    * @param reader input reader
82    * @return TokenStream
83    */

84   public final TokenStream tokenStream(String JavaDoc fieldName, Reader JavaDoc reader) {
85     return new StopFilter(new CJKTokenizer(reader), stopTable);
86   }
87 }
88
Popular Tags