1 package org.incava.text; 2 3 import java.io.*; 4 import java.util.*; 5 import org.incava.io.FileExt; 6 7 8 11 public class SpellChecker 12 { 13 public static final int DEFAULT_MAX_DISTANCE = 4; 14 15 protected static final int COMP_LEN = 20; 16 17 protected static final int ARR_SIZE = COMP_LEN + 1; 18 19 private Map _words = new HashMap(); 20 21 25 public int editDistance(String str1, String str2) 26 { 27 return editDistance(str1, str2, 3); 28 } 29 30 33 public int editDistance(String str1, String str2, int maximum) 34 { 35 int len1 = Math.min(str1.length(), COMP_LEN); 36 int len2 = Math.min(str2.length(), COMP_LEN); 37 38 41 int threshold = Math.max(maximum, (int)Math.floor((double)1 + (len1 + 2) / 4.0)); 42 43 int diff = Math.abs(len1 - len2); 44 if (diff > threshold) { 45 return -1 * diff; 46 } 47 else { 48 return compare(str1, len1, str2, len2); 49 } 50 } 51 52 public boolean nearMatch(String str1, String str2) 53 { 54 int edist = editDistance(str1, str2); 55 56 return edist >= 0 && edist <= DEFAULT_MAX_DISTANCE && edist < str1.length() && edist < str2.length(); 59 } 60 61 64 public boolean addDictionary(String dictionary) 65 { 66 tr.Ace.log("adding dictionary: " + dictionary); 67 68 try { 69 BufferedReader br = new BufferedReader(new FileReader(dictionary)); 70 71 while (true) { 72 String word = br.readLine(); 73 if (word == null) { 74 break; 75 } 76 else { 77 addWord(word); 78 } 79 } 80 return true; 81 } 82 catch (IOException ioe) { 83 tr.Ace.log("ioe", ioe); 84 return false; 85 } 86 } 87 88 public String getKey(String word) 89 { 90 char[] chars = word.toCharArray(); 91 if (chars.length > 1) { 92 char c0 = chars[0]; 93 char c1 = chars[1]; 94 if (c0 > c1) { 95 char t = c0; 96 c0 = c1; 97 c1 = t; 98 } 99 return "" + c0 + c1; 100 } 101 else if (chars.length > 0) { 102 return "" + chars[0]; 103 } 104 else { 105 return null; 106 } 107 } 108 109 public void addWord(String word) 110 { 111 String key = getKey(word); 112 List atLtrs = (List)_words.get(key); 113 if (atLtrs == null) { 114 atLtrs = new ArrayList(); 115 _words.put(key, atLtrs); 116 } 117 atLtrs.add(word); 118 } 119 120 public boolean hasWord(String word) 121 { 122 String key = getKey(word); 123 List atLtrs = (List)_words.get(key); 124 return atLtrs != null && atLtrs.contains(word); 125 } 126 127 130 public boolean isCorrect(String word, int maxEditDistance, Map nearMatches) 131 { 132 if (hasWord(word)) { 133 return true; 134 } 135 else if (nearMatches != null) { 136 char[] wordChars = word.toCharArray(); 137 int wordLen = wordChars.length; 138 String key = getKey(word); 139 List wds = (List)_words.get(key); 140 141 if (wds != null) { 142 Iterator wit = wds.iterator(); 143 while (wit.hasNext()) { 144 String w = (String )wit.next(); 145 146 int ed = editDistance(word, w, maxEditDistance); 147 if (ed >= 0 && ed <= maxEditDistance) { 148 Integer eDist = new Integer (ed); 149 List matches = (List)nearMatches.get(eDist); 150 if (matches == null) { 151 matches = new ArrayList(); 152 nearMatches.put(eDist, matches); 153 } 154 matches.add(w); 155 } 156 } 157 } 158 } 159 return false; 160 } 161 162 public boolean isCorrect(String word, Map nearMatches) 163 { 164 return isCorrect(word, DEFAULT_MAX_DISTANCE, nearMatches); 165 } 166 167 171 protected int compare(String str1, int len1, String str2, int len2) 172 { 173 final int ADDITION = 1; 174 final int CHANGE = 2; 175 final int DELETION = 1; 176 177 int[][] distance = new int[ARR_SIZE][ARR_SIZE]; 178 distance[0][0] = 0; 179 180 for (int j = 1; j < ARR_SIZE; ++j) { 181 distance[0][j] = distance[0][j - 1] + ADDITION; 182 distance[j][0] = distance[j - 1][0] + DELETION; 183 } 184 185 for (int i = 1; i <= len1; ++i) { 186 for (int j = 1; j <= len2; ++j) { 187 distance[i][j] = min3(distance[i - 1][j - 1] + (str1.charAt(i - 1) == str2.charAt(j - 1) ? 0 : CHANGE), 188 distance[i][j - 1] + ADDITION, 189 distance[i - 1][j] + DELETION); 190 } 191 } 192 193 return distance[len1][len2]; 194 } 195 196 protected static int min3(int x, int y, int z) 197 { 198 return (x < y) ? (x < z ? x : z) : (y < z ? y : z); 199 } 200 201 } 202 | Popular Tags |