1 16 17 package org.apache.commons.codec.language; 18 19 import org.apache.commons.codec.EncoderException; 20 import org.apache.commons.codec.StringEncoder; 21 22 36 public class Metaphone implements StringEncoder { 37 38 41 private String vowels = "AEIOU" ; 42 43 46 private String frontv = "EIY" ; 47 48 51 private String varson = "CSPTG" ; 52 53 56 private int maxCodeLen = 4 ; 57 58 61 public Metaphone() { 62 super(); 63 } 64 65 75 public String metaphone(String txt) { 76 boolean hard = false ; 77 if ((txt == null) || (txt.length() == 0)) { 78 return "" ; 79 } 80 if (txt.length() == 1) { 82 return txt.toUpperCase() ; 83 } 84 85 char[] inwd = txt.toUpperCase().toCharArray() ; 86 87 StringBuffer local = new StringBuffer (40); StringBuffer code = new StringBuffer (10) ; switch(inwd[0]) { 91 case 'K' : 92 case 'G' : 93 case 'P' : 94 if (inwd[1] == 'N') { 95 local.append(inwd, 1, inwd.length - 1); 96 } else { 97 local.append(inwd); 98 } 99 break; 100 case 'A': 101 if (inwd[1] == 'E') { 102 local.append(inwd, 1, inwd.length - 1); 103 } else { 104 local.append(inwd); 105 } 106 break; 107 case 'W' : 108 if (inwd[1] == 'R') { local.append(inwd, 1, inwd.length - 1); 110 break ; 111 } 112 if (inwd[1] == 'H') { 113 local.append(inwd, 1, inwd.length - 1); 114 local.setCharAt(0, 'W'); } else { 116 local.append(inwd); 117 } 118 break; 119 case 'X' : 120 inwd[0] = 'S'; 121 local.append(inwd); 122 break ; 123 default : 124 local.append(inwd); 125 } 127 int wdsz = local.length(); 128 int n = 0 ; 129 130 while ((code.length() < this.getMaxCodeLen()) && 131 (n < wdsz) ) { char symb = local.charAt(n) ; 133 if ((symb != 'C') && (isPreviousChar( local, n, symb )) ) { 135 n++ ; 136 } else { switch(symb) { 138 case 'A' : case 'E' : case 'I' : case 'O' : case 'U' : 139 if (n == 0) { 140 code.append(symb); 141 } 142 break ; case 'B' : 144 if ( isPreviousChar(local, n, 'M') && 145 isLastChar(wdsz, n) ) { break; 147 } 148 code.append(symb); 149 break; 150 case 'C' : 152 if ( isPreviousChar(local, n, 'S') && 153 !isLastChar(wdsz, n) && 154 (this.frontv.indexOf(local.charAt(n + 1)) >= 0) ) { 155 break; 156 } 157 if (regionMatch(local, n, "CIA")) { code.append('X'); 159 break; 160 } 161 if (!isLastChar(wdsz, n) && 162 (this.frontv.indexOf(local.charAt(n + 1)) >= 0)) { 163 code.append('S'); 164 break; } 166 if (isPreviousChar(local, n, 'S') && 167 isNextChar(local, n, 'H') ) { code.append('K') ; 169 break ; 170 } 171 if (isNextChar(local, n, 'H')) { if ((n == 0) && 173 (wdsz >= 3) && 174 isVowel(local,2) ) { code.append('K'); 176 } else { 177 code.append('X'); } 179 } else { 180 code.append('K'); 181 } 182 break ; 183 case 'D' : 184 if (!isLastChar(wdsz, n + 1) && 185 isNextChar(local, n, 'G') && 186 (this.frontv.indexOf(local.charAt(n + 2)) >= 0)) { code.append('J'); n += 2 ; 188 } else { 189 code.append('T'); 190 } 191 break ; 192 case 'G' : if (isLastChar(wdsz, n + 1) && 194 isNextChar(local, n, 'H')) { 195 break; 196 } 197 if (!isLastChar(wdsz, n + 1) && 198 isNextChar(local,n,'H') && 199 !isVowel(local,n+2)) { 200 break; 201 } 202 if ((n > 0) && 203 ( regionMatch(local, n, "GN") || 204 regionMatch(local, n, "GNED") ) ) { 205 break; } 207 if (isPreviousChar(local, n, 'G')) { 208 hard = true ; 209 } else { 210 hard = false ; 211 } 212 if (!isLastChar(wdsz, n) && 213 (this.frontv.indexOf(local.charAt(n + 1)) >= 0) && 214 (!hard)) { 215 code.append('J'); 216 } else { 217 code.append('K'); 218 } 219 break ; 220 case 'H': 221 if (isLastChar(wdsz, n)) { 222 break ; } 224 if ((n > 0) && 225 (this.varson.indexOf(local.charAt(n - 1)) >= 0)) { 226 break; 227 } 228 if (isVowel(local,n+1)) { 229 code.append('H'); } 231 break; 232 case 'F': 233 case 'J' : 234 case 'L' : 235 case 'M': 236 case 'N' : 237 case 'R' : 238 code.append(symb); 239 break; 240 case 'K' : 241 if (n > 0) { if (!isPreviousChar(local, n, 'C')) { 243 code.append(symb); 244 } 245 } else { 246 code.append(symb); } 248 break ; 249 case 'P' : 250 if (isNextChar(local,n,'H')) { 251 code.append('F'); 253 } else { 254 code.append(symb); 255 } 256 break ; 257 case 'Q' : 258 code.append('K'); 259 break; 260 case 'S' : 261 if (regionMatch(local,n,"SH") || 262 regionMatch(local,n,"SIO") || 263 regionMatch(local,n,"SIA")) { 264 code.append('X'); 265 } else { 266 code.append('S'); 267 } 268 break; 269 case 'T' : 270 if (regionMatch(local,n,"TIA") || 271 regionMatch(local,n,"TIO")) { 272 code.append('X'); 273 break; 274 } 275 if (regionMatch(local,n,"TCH")) { 276 break; 278 } 279 if (regionMatch(local,n,"TH")) { 281 code.append('0'); 282 } else { 283 code.append('T'); 284 } 285 break ; 286 case 'V' : 287 code.append('F'); break ; 288 case 'W' : case 'Y' : if (!isLastChar(wdsz,n) && 290 isVowel(local,n+1)) { 291 code.append(symb); 292 } 293 break ; 294 case 'X' : 295 code.append('K'); code.append('S'); 296 break ; 297 case 'Z' : 298 code.append('S'); break ; 299 } n++ ; 301 } if (code.length() > this.getMaxCodeLen()) { 303 code.setLength(this.getMaxCodeLen()); 304 } 305 } 306 return code.toString(); 307 } 308 309 private boolean isVowel(StringBuffer string, int index) { 310 return (this.vowels.indexOf(string.charAt(index)) >= 0); 311 } 312 313 private boolean isPreviousChar(StringBuffer string, int index, char c) { 314 boolean matches = false; 315 if( index > 0 && 316 index < string.length() ) { 317 matches = string.charAt(index - 1) == c; 318 } 319 return matches; 320 } 321 322 private boolean isNextChar(StringBuffer string, int index, char c) { 323 boolean matches = false; 324 if( index >= 0 && 325 index < string.length() - 1 ) { 326 matches = string.charAt(index + 1) == c; 327 } 328 return matches; 329 } 330 331 private boolean regionMatch(StringBuffer string, int index, String test) { 332 boolean matches = false; 333 if( index >= 0 && 334 (index + test.length() - 1) < string.length() ) { 335 String substring = string.substring( index, index + test.length()); 336 matches = substring.equals( test ); 337 } 338 return matches; 339 } 340 341 private boolean isLastChar(int wdsz, int n) { 342 return n + 1 == wdsz; 343 } 344 345 346 358 public Object encode(Object pObject) throws EncoderException { 359 if (!(pObject instanceof java.lang.String )) { 360 throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String"); 361 } 362 return metaphone((String ) pObject); 363 } 364 365 371 public String encode(String pString) { 372 return metaphone(pString); 373 } 374 375 383 public boolean isMetaphoneEqual(String str1, String str2) { 384 return metaphone(str1).equals(metaphone(str2)); 385 } 386 387 391 public int getMaxCodeLen() { return this.maxCodeLen; } 392 393 397 public void setMaxCodeLen(int maxCodeLen) { this.maxCodeLen = maxCodeLen; } 398 399 } 400 | Popular Tags |