KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > WordlistLoader


1 package org.apache.lucene.analysis;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.File JavaDoc;
20 import java.io.FileReader JavaDoc;
21 import java.io.IOException JavaDoc;
22 import java.io.Reader JavaDoc;
23 import java.io.BufferedReader JavaDoc;
24 import java.util.HashSet JavaDoc;
25 import java.util.Hashtable JavaDoc;
26 import java.util.Iterator JavaDoc;
27
28 /**
29  * Loader for text files that represent a list of stopwords.
30  *
31  * @author Gerhard Schwarz
32  * @version $Id: WordlistLoader.java 192989 2005-06-22 19:59:03Z dnaber $
33  */

34 public class WordlistLoader {
35
36   /**
37    * Loads a text file and adds every line as an entry to a HashSet (omitting
38    * leading and trailing whitespace). Every line of the file should contain only
39    * one word. The words need to be in lowercase if you make use of an
40    * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
41    *
42    * @param wordfile File containing the wordlist
43    * @return A HashSet with the file's words
44    */

45   public static HashSet JavaDoc getWordSet(File JavaDoc wordfile) throws IOException JavaDoc {
46     HashSet JavaDoc result = new HashSet JavaDoc();
47     FileReader JavaDoc reader = null;
48     try {
49       reader = new FileReader JavaDoc(wordfile);
50       result = getWordSet(reader);
51     }
52     finally {
53       if (reader != null)
54         reader.close();
55     }
56     return result;
57   }
58
59   /**
60    * Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
61    * leading and trailing whitespace). Every line of the Reader should contain only
62    * one word. The words need to be in lowercase if you make use of an
63    * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
64    *
65    * @param reader Reader containing the wordlist
66    * @return A HashSet with the reader's words
67    */

68   public static HashSet JavaDoc getWordSet(Reader JavaDoc reader) throws IOException JavaDoc {
69     HashSet JavaDoc result = new HashSet JavaDoc();
70     BufferedReader JavaDoc br = null;
71     try {
72       if (reader instanceof BufferedReader JavaDoc) {
73         br = (BufferedReader JavaDoc) reader;
74       } else {
75         br = new BufferedReader JavaDoc(reader);
76       }
77       String JavaDoc word = null;
78       while ((word = br.readLine()) != null) {
79         result.add(word.trim());
80       }
81     }
82     finally {
83       if (br != null)
84         br.close();
85     }
86     return result;
87   }
88
89   /**
90    * @param path Path to the wordlist
91    * @param wordfile Name of the wordlist
92    *
93    * @deprecated Use {@link #getWordSet(File)} instead
94    */

95   public static Hashtable JavaDoc getWordtable(String JavaDoc path, String JavaDoc wordfile) throws IOException JavaDoc {
96     return getWordtable(new File JavaDoc(path, wordfile));
97   }
98
99   /**
100    * @param wordfile Complete path to the wordlist
101    *
102    * @deprecated Use {@link #getWordSet(File)} instead
103    */

104   public static Hashtable JavaDoc getWordtable(String JavaDoc wordfile) throws IOException JavaDoc {
105     return getWordtable(new File JavaDoc(wordfile));
106   }
107
108   /**
109    * @param wordfile File object that points to the wordlist
110    *
111    * @deprecated Use {@link #getWordSet(File)} instead
112    */

113   public static Hashtable JavaDoc getWordtable(File JavaDoc wordfile) throws IOException JavaDoc {
114     HashSet JavaDoc wordSet = (HashSet JavaDoc)getWordSet(wordfile);
115     Hashtable JavaDoc result = makeWordTable(wordSet);
116     return result;
117   }
118
119   /**
120    * Builds a wordlist table, using words as both keys and values
121    * for backward compatibility.
122    *
123    * @param wordSet stopword set
124    */

125   private static Hashtable JavaDoc makeWordTable(HashSet JavaDoc wordSet) {
126     Hashtable JavaDoc table = new Hashtable JavaDoc();
127     for (Iterator JavaDoc iter = wordSet.iterator(); iter.hasNext();) {
128       String JavaDoc word = (String JavaDoc)iter.next();
129       table.put(word, word);
130     }
131     return table;
132   }
133 }
134
Popular Tags