1 16 17 package org.apache.commons.codec.language; 19 20 import junit.framework.Test; 21 import junit.framework.TestSuite; 22 import org.apache.commons.codec.EncoderException; 23 import org.apache.commons.codec.StringEncoder; 24 import org.apache.commons.codec.StringEncoderAbstractTest; 25 26 32 public class SoundexTest extends StringEncoderAbstractTest { 33 34 public static Test suite() { 35 return (new TestSuite(SoundexTest.class)); 36 } 37 38 private Soundex encoder = null; 39 40 public SoundexTest(String name) { 41 super(name); 42 } 43 44 void encodeAll(String [] strings, String expectedEncoding) { 45 for (int i = 0; i < strings.length; i++) { 46 assertEquals(expectedEncoding, this.getEncoder().encode(strings[i])); 47 } 48 } 49 50 53 public Soundex getEncoder() { 54 return this.encoder; 55 } 56 57 protected StringEncoder makeEncoder() { 58 return new Soundex(); 59 } 60 61 65 public void setEncoder(Soundex encoder) { 66 this.encoder = encoder; 67 } 68 69 public void setUp() throws Exception { 70 super.setUp(); 71 this.setEncoder(new Soundex()); 72 } 73 74 public void tearDown() throws Exception { 75 super.tearDown(); 76 this.setEncoder(null); 77 } 78 79 public void testB650() { 80 this.encodeAll( 81 new String [] { 82 "BARHAM", 83 "BARONE", 84 "BARRON", 85 "BERNA", 86 "BIRNEY", 87 "BIRNIE", 88 "BOOROM", 89 "BOREN", 90 "BORN", 91 "BOURN", 92 "BOURNE", 93 "BOWRON", 94 "BRAIN", 95 "BRAME", 96 "BRANN", 97 "BRAUN", 98 "BREEN", 99 "BRIEN", 100 "BRIM", 101 "BRIMM", 102 "BRINN", 103 "BRION", 104 "BROOM", 105 "BROOME", 106 "BROWN", 107 "BROWNE", 108 "BRUEN", 109 "BRUHN", 110 "BRUIN", 111 "BRUMM", 112 "BRUN", 113 "BRUNO", 114 "BRYAN", 115 "BURIAN", 116 "BURN", 117 "BURNEY", 118 "BYRAM", 119 "BYRNE", 120 "BYRON", 121 "BYRUM" }, 122 "B650"); 123 } 124 125 public void testDifference() throws EncoderException { 126 assertEquals(0, this.getEncoder().difference(null, null)); 128 assertEquals(0, this.getEncoder().difference("", "")); 129 assertEquals(0, this.getEncoder().difference(" ", " ")); 130 assertEquals(4, this.getEncoder().difference("Smith", "Smythe")); 132 assertEquals(2, this.getEncoder().difference("Ann", "Andrew")); 133 assertEquals(1, this.getEncoder().difference("Margaret", "Andrew")); 134 assertEquals(0, this.getEncoder().difference("Janet", "Margaret")); 135 assertEquals(4, this.getEncoder().difference("Green", "Greene")); 137 assertEquals(0, this.getEncoder().difference("Blotchet-Halls", "Greene")); 138 assertEquals(4, this.getEncoder().difference("Smith", "Smythe")); 140 assertEquals(4, this.getEncoder().difference("Smithers", "Smythers")); 141 assertEquals(2, this.getEncoder().difference("Anothers", "Brothers")); 142 } 143 144 public void testEncodeBasic() { 145 assertEquals("T235", this.getEncoder().encode("testing")); 146 assertEquals("T000", this.getEncoder().encode("The")); 147 assertEquals("Q200", this.getEncoder().encode("quick")); 148 assertEquals("B650", this.getEncoder().encode("brown")); 149 assertEquals("F200", this.getEncoder().encode("fox")); 150 assertEquals("J513", this.getEncoder().encode("jumped")); 151 assertEquals("O160", this.getEncoder().encode("over")); 152 assertEquals("T000", this.getEncoder().encode("the")); 153 assertEquals("L200", this.getEncoder().encode("lazy")); 154 assertEquals("D200", this.getEncoder().encode("dogs")); 155 } 156 157 161 public void testEncodeBatch2() { 162 assertEquals("A462", this.getEncoder().encode("Allricht")); 163 assertEquals("E166", this.getEncoder().encode("Eberhard")); 164 assertEquals("E521", this.getEncoder().encode("Engebrethson")); 165 assertEquals("H512", this.getEncoder().encode("Heimbach")); 166 assertEquals("H524", this.getEncoder().encode("Hanselmann")); 167 assertEquals("H431", this.getEncoder().encode("Hildebrand")); 168 assertEquals("K152", this.getEncoder().encode("Kavanagh")); 169 assertEquals("L530", this.getEncoder().encode("Lind")); 170 assertEquals("L222", this.getEncoder().encode("Lukaschowsky")); 171 assertEquals("M235", this.getEncoder().encode("McDonnell")); 172 assertEquals("M200", this.getEncoder().encode("McGee")); 173 assertEquals("O155", this.getEncoder().encode("Opnian")); 174 assertEquals("O155", this.getEncoder().encode("Oppenheimer")); 175 assertEquals("R355", this.getEncoder().encode("Riedemanas")); 176 assertEquals("Z300", this.getEncoder().encode("Zita")); 177 assertEquals("Z325", this.getEncoder().encode("Zitzmeinn")); 178 } 179 180 184 public void testEncodeBatch3() { 185 assertEquals("W252", this.getEncoder().encode("Washington")); 186 assertEquals("L000", this.getEncoder().encode("Lee")); 187 assertEquals("G362", this.getEncoder().encode("Gutierrez")); 188 assertEquals("P236", this.getEncoder().encode("Pfister")); 189 assertEquals("J250", this.getEncoder().encode("Jackson")); 190 assertEquals("T522", this.getEncoder().encode("Tymczak")); 191 assertEquals("V532", this.getEncoder().encode("VanDeusen")); 194 } 195 196 199 public void testEncodeBatch4() { 200 assertEquals("H452", this.getEncoder().encode("HOLMES")); 201 assertEquals("A355", this.getEncoder().encode("ADOMOMI")); 202 assertEquals("V536", this.getEncoder().encode("VONDERLEHR")); 203 assertEquals("B400", this.getEncoder().encode("BALL")); 204 assertEquals("S000", this.getEncoder().encode("SHAW")); 205 assertEquals("J250", this.getEncoder().encode("JACKSON")); 206 assertEquals("S545", this.getEncoder().encode("SCANLON")); 207 assertEquals("S532", this.getEncoder().encode("SAINTJOHN")); 208 209 } 210 211 public void testBadCharacters() { 212 assertEquals("H452", this.getEncoder().encode("HOL>MES")); 213 214 } 215 216 public void testEncodeIgnoreApostrophes() { 217 this.encodeAll(new String [] { "OBrien", "'OBrien", "O'Brien", "OB'rien", "OBr'ien", "OBri'en", "OBrie'n", "OBrien'" }, "O165"); 218 } 219 220 223 public void testEncodeIgnoreHyphens() { 224 this.encodeAll( 225 new String [] { 226 "KINGSMITH", 227 "-KINGSMITH", 228 "K-INGSMITH", 229 "KI-NGSMITH", 230 "KIN-GSMITH", 231 "KING-SMITH", 232 "KINGS-MITH", 233 "KINGSM-ITH", 234 "KINGSMI-TH", 235 "KINGSMIT-H", 236 "KINGSMITH-" }, 237 "K525"); 238 } 239 240 public void testEncodeIgnoreTrimmable() { 241 assertEquals("W252", this.getEncoder().encode(" \t\n\r Washington \t\n\r ")); 242 } 243 244 248 public void testHWRuleEx1() { 249 assertEquals("A261", this.getEncoder().encode("Ashcraft")); 254 } 255 256 262 public void testHWRuleEx2() { 263 assertEquals("B312", this.getEncoder().encode("BOOTHDAVIS")); 264 assertEquals("B312", this.getEncoder().encode("BOOTH-DAVIS")); 265 } 266 267 271 public void testHWRuleEx3() { 272 assertEquals("S460", this.getEncoder().encode("Sgler")); 273 assertEquals("S460", this.getEncoder().encode("Swhgler")); 274 this.encodeAll( 276 new String [] { 277 "SAILOR", 278 "SALYER", 279 "SAYLOR", 280 "SCHALLER", 281 "SCHELLER", 282 "SCHILLER", 283 "SCHOOLER", 284 "SCHULER", 285 "SCHUYLER", 286 "SEILER", 287 "SEYLER", 288 "SHOLAR", 289 "SHULER", 290 "SILAR", 291 "SILER", 292 "SILLER" }, 293 "S460"); 294 } 295 296 public void testMaxLength() throws Exception { 297 Soundex soundex = new Soundex(); 298 soundex.setMaxLength(soundex.getMaxLength()); 299 assertEquals("S460", this.getEncoder().encode("Sgler")); 300 } 301 302 public void testMaxLengthLessThan3Fix() throws Exception { 303 Soundex soundex = new Soundex(); 304 soundex.setMaxLength(2); 305 assertEquals("S460", soundex.encode("SCHELLER")); 306 } 307 308 312 public void testMsSqlServer1() { 313 assertEquals("S530", this.getEncoder().encode("Smith")); 314 assertEquals("S530", this.getEncoder().encode("Smythe")); 315 } 316 317 321 public void testMsSqlServer2() { 322 this.encodeAll(new String []{"Erickson", "Erickson", "Erikson", "Ericson", "Ericksen", "Ericsen"}, "E625"); 323 } 324 328 public void testMsSqlServer3() { 329 assertEquals("A500", this.getEncoder().encode("Ann")); 330 assertEquals("A536", this.getEncoder().encode("Andrew")); 331 assertEquals("J530", this.getEncoder().encode("Janet")); 332 assertEquals("M626", this.getEncoder().encode("Margaret")); 333 assertEquals("S315", this.getEncoder().encode("Steven")); 334 assertEquals("M240", this.getEncoder().encode("Michael")); 335 assertEquals("R163", this.getEncoder().encode("Robert")); 336 assertEquals("L600", this.getEncoder().encode("Laura")); 337 assertEquals("A500", this.getEncoder().encode("Anne")); 338 } 339 340 345 public void testUsMappingOWithDiaeresis() { 346 assertEquals("O000", this.getEncoder().encode("o")); 347 try { 348 assertEquals("Ö000", this.getEncoder().encode("ö")); 349 fail("Expected IllegalArgumentException not thrown"); 350 } catch (IllegalArgumentException e) { 351 } 353 } 354 355 360 public void testUsMappingEWithAcute() { 361 assertEquals("E000", this.getEncoder().encode("e")); 362 try { 363 assertEquals("É000", this.getEncoder().encode("é")); 364 fail("Expected IllegalArgumentException not thrown"); 365 } catch (IllegalArgumentException e) { 366 } 368 } 369 } 370 | Popular Tags |