KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > blandware > atleap > search > analysis > PortugueseAnalyzer


1 /*
2  * Copyright 2004 Blandware (http://www.blandware.com)
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package com.blandware.atleap.search.analysis;
17
18 import org.apache.lucene.analysis.Analyzer;
19 import org.apache.lucene.analysis.TokenStream;
20 import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
21
22 import java.io.Reader JavaDoc;
23
24 /**
25  * <p>Analyzer for Portuguese language</p>
26  * <p><a HREF="PortugueseAnalyzer.java.htm"><i>View Source</i></a></p>
27  * <p/>
28  *
29  * @author Andrey Grebnev <a HREF="mailto:andrey.grebnev@blandware.com">&lt;andrey.grebnev@blandware.com&gt;</a>
30  * @version $Revision: 1.3 $ $Date: 2005/02/24 19:51:22 $
31  */

32 public class PortugueseAnalyzer extends Analyzer {
33
34     private static SnowballAnalyzer analyzer;
35
36     private String JavaDoc PORTUGUESE_STOP_WORDS[] = {
37
38         "a", "ainda", "alem", "ambas", "ambos", "antes",
39         "ao", "aonde", "aos", "apos", "aquele", "aqueles",
40         "as", "assim", "com", "como", "contra", "contudo",
41         "cuja", "cujas", "cujo", "cujos", "da", "das", "de",
42         "dela", "dele", "deles", "demais", "depois", "desde",
43         "desta", "deste", "dispoe", "dispoem", "diversa",
44         "diversas", "diversos", "do", "dos", "durante", "e",
45         "ela", "elas", "ele", "eles", "em", "entao", "entre",
46         "essa", "essas", "esse", "esses", "esta", "estas",
47         "este", "estes", "ha", "isso", "isto", "logo", "mais",
48         "mas", "mediante", "menos", "mesma", "mesmas", "mesmo",
49         "mesmos", "na", "nas", "nao", "nas", "nem", "nesse", "neste",
50         "nos", "o", "os", "ou", "outra", "outras", "outro", "outros",
51         "pelas", "pelas", "pelo", "pelos", "perante", "pois", "por",
52         "porque", "portanto", "proprio", "propios", "quais", "qual",
53         "qualquer", "quando", "quanto", "que", "quem", "quer", "se",
54         "seja", "sem", "sendo", "seu", "seus", "sob", "sobre", "sua",
55         "suas", "tal", "tambem", "teu", "teus", "toda", "todas", "todo",
56         "todos", "tua", "tuas", "tudo", "um", "uma", "umas", "uns"};
57
58     /**
59      * Creates new instance of SpanishAnalyzer
60      */

61     public PortugueseAnalyzer() {
62         analyzer = new SnowballAnalyzer("Portuguese", PORTUGUESE_STOP_WORDS);
63     }
64
65     public PortugueseAnalyzer(String JavaDoc stopWords[]) {
66         analyzer = new SnowballAnalyzer("Portuguese", stopWords);
67     }
68
69     public TokenStream tokenStream(String JavaDoc fieldName, Reader JavaDoc reader) {
70         return analyzer.tokenStream(fieldName, reader);
71     }
72 }
73
Popular Tags