KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > incava > text > SpellChecker


1 package org.incava.text;
2
3 import java.io.*;
4 import java.util.*;
5 import org.incava.io.FileExt;
6
7
8 /**
9  * Calculates the edit distance between two strings.
10  */

11 public class SpellChecker
12 {
13     public static final int DEFAULT_MAX_DISTANCE = 4;
14
15     protected static final int COMP_LEN = 20;
16
17     protected static final int ARR_SIZE = COMP_LEN + 1;
18
19     private Map _words = new HashMap();
20
21     /**
22      * Computes the Levenstein edit distance between the two words, with a
23      * maximum of 3, at which point the distance is no longer computed.
24      */

25     public int editDistance(String JavaDoc str1, String JavaDoc str2)
26     {
27         return editDistance(str1, str2, 3);
28     }
29
30     /**
31      * Computes the Levenstein edit distance between the two words.
32      */

33     public int editDistance(String JavaDoc str1, String JavaDoc str2, int maximum)
34     {
35         int len1 = Math.min(str1.length(), COMP_LEN);
36         int len2 = Math.min(str2.length(), COMP_LEN);
37         
38         // A minimum threshold of three is used for better results with short
39
// strings (A modification to the original C code.)
40

41         int threshold = Math.max(maximum, (int)Math.floor((double)1 + (len1 + 2) / 4.0));
42         
43         int diff = Math.abs(len1 - len2);
44         if (diff > threshold) {
45             return -1 * diff;
46         }
47         else {
48             return compare(str1, len1, str2, len2);
49         }
50     }
51
52     public boolean nearMatch(String JavaDoc str1, String JavaDoc str2)
53     {
54         int edist = editDistance(str1, str2);
55         
56         // the edit distance is misleading for very short words, so it must be
57
// no more than the length of either word:
58
return edist >= 0 && edist <= DEFAULT_MAX_DISTANCE && edist < str1.length() && edist < str2.length();
59     }
60
61     /**
62      * Adds the given dictionary. Returns whether it could be read and had content.
63      */

64     public boolean addDictionary(String JavaDoc dictionary)
65     {
66         tr.Ace.log("adding dictionary: " + dictionary);
67
68         try {
69             BufferedReader br = new BufferedReader(new FileReader(dictionary));
70             
71             while (true) {
72                 String JavaDoc word = br.readLine();
73                 if (word == null) {
74                     break;
75                 }
76                 else {
77                     addWord(word);
78                 }
79             }
80             return true;
81         }
82         catch (IOException ioe) {
83             tr.Ace.log("ioe", ioe);
84             return false;
85         }
86     }
87
88     public String JavaDoc getKey(String JavaDoc word)
89     {
90         char[] chars = word.toCharArray();
91         if (chars.length > 1) {
92             char c0 = chars[0];
93             char c1 = chars[1];
94             if (c0 > c1) {
95                 char t = c0;
96                 c0 = c1;
97                 c1 = t;
98             }
99             return "" + c0 + c1;
100         }
101         else if (chars.length > 0) {
102             return "" + chars[0];
103         }
104         else {
105             return null;
106         }
107     }
108
109     public void addWord(String JavaDoc word)
110     {
111         String JavaDoc key = getKey(word);
112         List atLtrs = (List)_words.get(key);
113         if (atLtrs == null) {
114             atLtrs = new ArrayList();
115             _words.put(key, atLtrs);
116         }
117         atLtrs.add(word);
118     }
119
120     public boolean hasWord(String JavaDoc word)
121     {
122         String JavaDoc key = getKey(word);
123         List atLtrs = (List)_words.get(key);
124         return atLtrs != null && atLtrs.contains(word);
125     }
126
127     /**
128      * @param nearMatches a map from edit distances to matches.
129      */

130     public boolean isCorrect(String JavaDoc word, int maxEditDistance, Map nearMatches)
131     {
132         if (hasWord(word)) {
133             return true;
134         }
135         else if (nearMatches != null) {
136             char[] wordChars = word.toCharArray();
137             int wordLen = wordChars.length;
138             String JavaDoc key = getKey(word);
139             List wds = (List)_words.get(key);
140
141             if (wds != null) {
142                 Iterator wit = wds.iterator();
143                 while (wit.hasNext()) {
144                     String JavaDoc w = (String JavaDoc)wit.next();
145
146                     int ed = editDistance(word, w, maxEditDistance);
147                     if (ed >= 0 && ed <= maxEditDistance) {
148                         Integer JavaDoc eDist = new Integer JavaDoc(ed);
149                         List matches = (List)nearMatches.get(eDist);
150                         if (matches == null) {
151                             matches = new ArrayList();
152                             nearMatches.put(eDist, matches);
153                         }
154                         matches.add(w);
155                     }
156                 }
157             }
158         }
159         return false;
160     }
161     
162     public boolean isCorrect(String JavaDoc word, Map nearMatches)
163     {
164         return isCorrect(word, DEFAULT_MAX_DISTANCE, nearMatches);
165     }
166
167     /**
168      * Compares the two characters. English words should probably be case
169      * insensitive; code should not.
170      */

171     protected int compare(String JavaDoc str1, int len1, String JavaDoc str2, int len2)
172     {
173         final int ADDITION = 1;
174         final int CHANGE = 2;
175         final int DELETION = 1;
176
177         int[][] distance = new int[ARR_SIZE][ARR_SIZE];
178         distance[0][0] = 0;
179     
180         for (int j = 1; j < ARR_SIZE; ++j) {
181             distance[0][j] = distance[0][j - 1] + ADDITION;
182             distance[j][0] = distance[j - 1][0] + DELETION;
183         }
184     
185         for (int i = 1; i <= len1; ++i) {
186             for (int j = 1; j <= len2; ++j) {
187                 distance[i][j] = min3(distance[i - 1][j - 1] + (str1.charAt(i - 1) == str2.charAt(j - 1) ? 0 : CHANGE),
188                                       distance[i][j - 1] + ADDITION,
189                                       distance[i - 1][j] + DELETION);
190             }
191         }
192         
193         return distance[len1][len2];
194     }
195
196     protected static int min3(int x, int y, int z)
197     {
198         return (x < y) ? (x < z ? x : z) : (y < z ? y : z);
199     }
200
201 }
202
Popular Tags