1 16 17 package org.apache.commons.codec.language; 18 19 import org.apache.commons.codec.EncoderException; 20 import org.apache.commons.codec.StringEncoder; 21 22 29 public class Soundex implements StringEncoder { 30 31 36 public static final Soundex US_ENGLISH = new Soundex(); 37 38 48 public static final String US_ENGLISH_MAPPING_STRING = "01230120022455012623010202"; 49 50 56 public static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray(); 57 58 77 public int difference(String s1, String s2) throws EncoderException { 78 return SoundexUtils.difference(this, s1, s2); 79 } 80 81 86 private int maxLength = 4; 87 88 92 private char[] soundexMapping; 93 94 100 public Soundex() { 101 this(US_ENGLISH_MAPPING); 102 } 103 104 114 public Soundex(char[] mapping) { 115 this.setSoundexMapping(mapping); 116 } 117 118 131 public Object encode(Object pObject) throws EncoderException { 132 if (!(pObject instanceof String )) { 133 throw new EncoderException("Parameter supplied to Soundex encode is not of type java.lang.String"); 134 } 135 return soundex((String ) pObject); 136 } 137 138 147 public String encode(String pString) { 148 return soundex(pString); 149 } 150 151 164 private char getMappingCode(String str, int index) { 165 char mappedChar = this.map(str.charAt(index)); 166 if (index > 1 && mappedChar != '0') { 168 char hwChar = str.charAt(index - 1); 169 if ('H' == hwChar || 'W' == hwChar) { 170 char preHWChar = str.charAt(index - 2); 171 char firstCode = this.map(preHWChar); 172 if (firstCode == mappedChar || 'H' == preHWChar || 'W' == preHWChar) { 173 return 0; 174 } 175 } 176 } 177 return mappedChar; 178 } 179 180 186 public int getMaxLength() { 187 return this.maxLength; 188 } 189 190 195 private char[] getSoundexMapping() { 196 return this.soundexMapping; 197 } 198 199 208 private char map(char ch) { 209 int index = ch - 'A'; 210 if (index < 0 || index >= this.getSoundexMapping().length) { 211 throw new IllegalArgumentException ("The character is not mapped: " + ch); 212 } 213 return this.getSoundexMapping()[index]; 214 } 215 216 223 public void setMaxLength(int maxLength) { 224 this.maxLength = maxLength; 225 } 226 227 233 private void setSoundexMapping(char[] soundexMapping) { 234 this.soundexMapping = soundexMapping; 235 } 236 237 246 public String soundex(String str) { 247 if (str == null) { 248 return null; 249 } 250 str = SoundexUtils.clean(str); 251 if (str.length() == 0) { 252 return str; 253 } 254 char out[] = {'0', '0', '0', '0'}; 255 char last, mapped; 256 int incount = 1, count = 1; 257 out[0] = str.charAt(0); 258 last = getMappingCode(str, 0); 259 while ((incount < str.length()) && (count < out.length)) { 260 mapped = getMappingCode(str, incount++); 261 if (mapped != 0) { 262 if ((mapped != '0') && (mapped != last)) { 263 out[count++] = mapped; 264 } 265 last = mapped; 266 } 267 } 268 return new String (out); 269 } 270 271 } | Popular Tags |