KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > help > internal > search > Analyzer_en


1 /*******************************************************************************
2  * Copyright (c) 2000, 2006 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.help.internal.search;
12 import java.io.*;
13
14 import org.apache.lucene.analysis.*;
15 /**
16  * Lucene Analyzer for English. LowerCaseTokenizer->StopFilter->PorterStemFilter
17  */

18 public class Analyzer_en extends Analyzer {
19     /**
20      * Constructor for Analyzer_en.
21      */

22     public Analyzer_en() {
23         super();
24     }
25     /**
26      * Creates a TokenStream which tokenizes all the text in the provided
27      * Reader.
28      */

29     public final TokenStream tokenStream(String JavaDoc fieldName, Reader reader) {
30         return new PorterStemFilter(new StopFilter(new LowerCaseAndDigitsTokenizer(reader), STOP_WORDS));
31     }
32     /**
33      * Array of English stop words. Differs from StandardAnalyzer's default stop
34      * words by not having "for", "if", and "this" that are java keywords.
35      */

36     private final static String JavaDoc[] STOP_WORDS = {"a", //$NON-NLS-1$
37
"and", //$NON-NLS-1$
38
"are", //$NON-NLS-1$
39
"as", //$NON-NLS-1$
40
"at", //$NON-NLS-1$
41
"be", //$NON-NLS-1$
42
"but", //$NON-NLS-1$
43
"by", //$NON-NLS-1$
44
"in", //$NON-NLS-1$
45
"into", //$NON-NLS-1$
46
"is", //$NON-NLS-1$
47
"it", //$NON-NLS-1$
48
"no", //$NON-NLS-1$
49
"not", //$NON-NLS-1$
50
"of", //$NON-NLS-1$
51
"on", //$NON-NLS-1$
52
"or", //$NON-NLS-1$
53
"s", //$NON-NLS-1$
54
"such", //$NON-NLS-1$
55
"t", //$NON-NLS-1$
56
"that", //$NON-NLS-1$
57
"the", //$NON-NLS-1$
58
"their", //$NON-NLS-1$
59
"then", //$NON-NLS-1$
60
"there", //$NON-NLS-1$
61
"these", //$NON-NLS-1$
62
"they", //$NON-NLS-1$
63
"to", //$NON-NLS-1$
64
"was", //$NON-NLS-1$
65
"will", //$NON-NLS-1$
66
"with"}; //$NON-NLS-1$
67

68 }
69
Popular Tags