KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > search > DSTokenizer


1 /*
2  * DSTokenizer.java
3  *
4  * Version: $Revision: 1.7 $
5  *
6  * Date: $Date: 2005/04/20 14:23:23 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40 package org.dspace.search;
41
42 import java.io.Reader JavaDoc;
43
44 import org.apache.lucene.analysis.CharTokenizer;
45
46 /**
47  * Customized Lucene Tokenizer, since the standard one rejects numbers from
48  * indexing/querying.
49  */

50 public final class DSTokenizer extends CharTokenizer
51 {
52     /**
53      * Construct a new LowerCaseTokenizer.
54      */

55     public DSTokenizer(Reader JavaDoc in)
56     {
57         super(in);
58     }
59
60     /**
61      * Collects only characters which satisfy {@link Character#isLetter(char)}.
62      */

63     protected char normalize(char c)
64     {
65         return Character.toLowerCase(c);
66     }
67
68     /**
69      * Collects only characters which do not satisfy
70      * {@link Character#isWhitespace(char)}.
71      */

72     protected boolean isTokenChar(char c)
73     {
74         return Character.isLetterOrDigit(c);
75     }
76 }
77
Popular Tags