KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > search > DSAnalyzer


1 /*
2  * DSAnalyzer.java
3  *
4  * Version: $Revision: 1.8 $
5  *
6  * Date: $Date: 2005/04/20 14:23:23 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40 package org.dspace.search;
41
42 import java.io.Reader JavaDoc;
43 import java.util.Set JavaDoc;
44
45 import org.apache.lucene.analysis.Analyzer;
46 import org.apache.lucene.analysis.LowerCaseFilter;
47 import org.apache.lucene.analysis.PorterStemFilter;
48 import org.apache.lucene.analysis.StopFilter;
49 import org.apache.lucene.analysis.TokenStream;
50 import org.apache.lucene.analysis.standard.StandardFilter;
51
52 /**
53  * Custom Lucene Analyzer that combines the standard filter, lowercase filter,
54  * stemming and stopword filters.
55  */

56 public class DSAnalyzer extends Analyzer
57 {
58     /*
59      * An array containing some common words that are not usually useful for
60      * searching.
61      */

62     private static final String JavaDoc[] STOP_WORDS =
63     {
64
65     // new stopwords (per MargretB)
66
"a", "am", "and", "are", "as", "at", "be", "but", "by", "for",
67             "if", "in", "into", "is", "it", "no", "not", "of", "on", "or",
68             "the", "to", "was"
69     // old stopwords (Lucene default)
70
/*
71      * "a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in",
72      * "into", "is", "it", "no", "not", "of", "on", "or", "s", "such", "t",
73      * "that", "the", "their","then", "there","these", "they", "this", "to",
74      * "was", "will", "with"
75      */

76     };
77
78     /*
79      * Stop table
80      */

81     final static private Set JavaDoc stopSet = StopFilter.makeStopSet(STOP_WORDS);
82
83     /*
84      * Create a token stream for this analyzer.
85      */

86     public final TokenStream tokenStream(String JavaDoc fieldName, final Reader JavaDoc reader)
87     {
88         TokenStream result = new DSTokenizer(reader);
89
90         result = new StandardFilter(result);
91         result = new LowerCaseFilter(result);
92         result = new StopFilter(result, stopSet);
93         result = new PorterStemFilter(result);
94
95         return result;
96     }
97 }
98
Popular Tags