KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > nl > WordlistLoader


1 package org.apache.lucene.analysis.nl;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import java.io.File JavaDoc;
20 import java.io.FileReader JavaDoc;
21 import java.io.IOException JavaDoc;
22 import java.io.LineNumberReader JavaDoc;
23 import java.util.HashMap JavaDoc;
24
25 /**
26  * @author Gerhard Schwarz
27  * <p/>
28  * Loads a text file and adds every line as an entry to a Hashtable. Every line
29  * should contain only one word. If the file is not found or on any error, an
30  * empty table is returned.
31  */

32 public class WordlistLoader {
33   /**
34    * @param path Path to the wordlist
35    * @param wordfile Name of the wordlist
36    */

37   public static HashMap JavaDoc getWordtable(String JavaDoc path, String JavaDoc wordfile) {
38     if (path == null || wordfile == null) {
39       return new HashMap JavaDoc();
40     }
41     return getWordtable(new File JavaDoc(path, wordfile));
42   }
43
44   /**
45    * @param wordfile Complete path to the wordlist
46    */

47   public static HashMap JavaDoc getWordtable(String JavaDoc wordfile) {
48     if (wordfile == null) {
49       return new HashMap JavaDoc();
50     }
51     return getWordtable(new File JavaDoc(wordfile));
52   }
53
54   /**
55    * Reads a stemsdictionary. Each line contains:
56    * word \t stem
57    * i.e. tab seperated)
58    *
59    * @return Stem dictionary that overrules, the stemming algorithm
60    */

61   public static HashMap JavaDoc getStemDict(File JavaDoc wordstemfile) {
62     if (wordstemfile == null) {
63       return new HashMap JavaDoc();
64     }
65     HashMap JavaDoc result = new HashMap JavaDoc();
66     try {
67       LineNumberReader JavaDoc lnr = new LineNumberReader JavaDoc(new FileReader JavaDoc(wordstemfile));
68       String JavaDoc line;
69       String JavaDoc[] wordstem;
70       while ((line = lnr.readLine()) != null) {
71         wordstem = line.split("\t", 2);
72         result.put(wordstem[0], wordstem[1]);
73       }
74     } catch (IOException JavaDoc e) {
75     }
76     return result;
77   }
78
79   /**
80    * @param wordfile File containing the wordlist
81    */

82   public static HashMap JavaDoc getWordtable(File JavaDoc wordfile) {
83     if (wordfile == null) {
84       return new HashMap JavaDoc();
85     }
86     HashMap JavaDoc result = null;
87     try {
88       LineNumberReader JavaDoc lnr = new LineNumberReader JavaDoc(new FileReader JavaDoc(wordfile));
89       String JavaDoc word = null;
90       String JavaDoc[] stopwords = new String JavaDoc[100];
91       int wordcount = 0;
92       while ((word = lnr.readLine()) != null) {
93         wordcount++;
94         if (wordcount == stopwords.length) {
95           String JavaDoc[] tmp = new String JavaDoc[stopwords.length + 50];
96           System.arraycopy(stopwords, 0, tmp, 0, wordcount);
97           stopwords = tmp;
98         }
99         stopwords[wordcount - 1] = word;
100       }
101       result = makeWordTable(stopwords, wordcount);
102     }
103         // On error, use an empty table
104
catch (IOException JavaDoc e) {
105       result = new HashMap JavaDoc();
106     }
107     return result;
108   }
109
110   /**
111    * Builds the wordlist table.
112    *
113    * @param words Word that where read
114    * @param length Amount of words that where read into <tt>words</tt>
115    */

116   private static HashMap JavaDoc makeWordTable(String JavaDoc[] words, int length) {
117     HashMap JavaDoc table = new HashMap JavaDoc(length);
118     for (int i = 0; i < length; i++) {
119       table.put(words[i], words[i]);
120     }
121     return table;
122   }
123 }
Popular Tags