KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > services > search > analyzer > StandardFilter


1 package org.jahia.services.search.analyzer;
2
3 /* ====================================================================
4  * The Apache Software License, Version 1.1
5  *
6  * Copyright (c) 2001 The Apache Software Foundation. All rights
7  * reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  *
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in
18  * the documentation and/or other materials provided with the
19  * distribution.
20  *
21  * 3. The end-user documentation included with the redistribution,
22  * if any, must include the following acknowledgment:
23  * "This product includes software developed by the
24  * Apache Software Foundation (http://www.apache.org/)."
25  * Alternately, this acknowledgment may appear in the software itself,
26  * if and wherever such third-party acknowledgments normally appear.
27  *
28  * 4. The names "Apache" and "Apache Software Foundation" and
29  * "Apache Lucene" must not be used to endorse or promote products
30  * derived from this software without prior written permission. For
31  * written permission, please contact apache@apache.org.
32  *
33  * 5. Products derived from this software may not be called "Apache",
34  * "Apache Lucene", nor may "Apache" appear in their name, without
35  * prior written permission of the Apache Software Foundation.
36  *
37  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  * ====================================================================
50  *
51  * This software consists of voluntary contributions made by many
52  * individuals on behalf of the Apache Software Foundation. For more
53  * information on the Apache Software Foundation, please see
54  * <http://www.apache.org/>.
55  */

56
57 import org.apache.lucene.analysis.*;
58
59 /** Normalizes tokens extracted with {@link StandardTokenizer}. */
60
61 public final class StandardFilter extends TokenFilter
62   implements StandardTokenizerConstants {
63
64
65   /** Construct filtering <i>in</i>. */
66   public StandardFilter(TokenStream in) {
67     super(in);
68   }
69
70   private static final String JavaDoc APOSTROPHE_TYPE = tokenImage[APOSTROPHE];
71   private static final String JavaDoc ACRONYM_TYPE = tokenImage[ACRONYM];
72
73   /** Returns the next token in the stream, or null at EOS.
74    * <p>Removes <tt>'s</tt> from the end of words.
75    * <p>Removes dots from acronyms.
76    */

77   public final org.apache.lucene.analysis.Token next() throws java.io.IOException JavaDoc {
78     org.apache.lucene.analysis.Token t = input.next();
79
80     if (t == null)
81       return null;
82
83     String JavaDoc text = t.termText();
84     String JavaDoc type = t.type();
85
86     if (type == APOSTROPHE_TYPE && // remove 's
87
(text.endsWith("'s") || text.endsWith("'S"))) {
88       return new org.apache.lucene.analysis.Token
89     (text.substring(0,text.length()-2),
90      t.startOffset(), t.endOffset(), type);
91
92     } else if (type == ACRONYM_TYPE) { // remove dots
93
StringBuffer JavaDoc trimmed = new StringBuffer JavaDoc();
94       for (int i = 0; i < text.length(); i++) {
95     char c = text.charAt(i);
96     if (c != '.')
97       trimmed.append(c);
98       }
99       return new org.apache.lucene.analysis.Token
100     (trimmed.toString(), t.startOffset(), t.endOffset(), type);
101
102     } else {
103       return t;
104     }
105   }
106 }
107
Popular Tags