KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > standard > StandardFilter


1 package org.apache.lucene.analysis.standard;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.analysis.*;
20
21 /** Normalizes tokens extracted with {@link StandardTokenizer}. */
22
23 public final class StandardFilter extends TokenFilter
24   implements StandardTokenizerConstants {
25
26
27   /** Construct filtering <i>in</i>. */
28   public StandardFilter(TokenStream in) {
29     super(in);
30   }
31
32   private static final String JavaDoc APOSTROPHE_TYPE = tokenImage[APOSTROPHE];
33   private static final String JavaDoc ACRONYM_TYPE = tokenImage[ACRONYM];
34   
35   /** Returns the next token in the stream, or null at EOS.
36    * <p>Removes <tt>'s</tt> from the end of words.
37    * <p>Removes dots from acronyms.
38    */

39   public final org.apache.lucene.analysis.Token next() throws java.io.IOException JavaDoc {
40     org.apache.lucene.analysis.Token t = input.next();
41
42     if (t == null)
43       return null;
44
45     String JavaDoc text = t.termText();
46     String JavaDoc type = t.type();
47
48     if (type == APOSTROPHE_TYPE && // remove 's
49
(text.endsWith("'s") || text.endsWith("'S"))) {
50       return new org.apache.lucene.analysis.Token
51     (text.substring(0,text.length()-2),
52      t.startOffset(), t.endOffset(), type);
53
54     } else if (type == ACRONYM_TYPE) { // remove dots
55
StringBuffer JavaDoc trimmed = new StringBuffer JavaDoc();
56       for (int i = 0; i < text.length(); i++) {
57     char c = text.charAt(i);
58     if (c != '.')
59       trimmed.append(c);
60       }
61       return new org.apache.lucene.analysis.Token
62     (trimmed.toString(), t.startOffset(), t.endOffset(), type);
63
64     } else {
65       return t;
66     }
67   }
68 }
69
Popular Tags