KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > commons > codec > language > SoundexTest


1 /*
2  * Copyright 2001-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 // (FYI: Formatted and sorted with Eclipse)
18
package org.apache.commons.codec.language;
19
20 import junit.framework.Test;
21 import junit.framework.TestSuite;
22 import org.apache.commons.codec.EncoderException;
23 import org.apache.commons.codec.StringEncoder;
24 import org.apache.commons.codec.StringEncoderAbstractTest;
25
26 /**
27  * Tests {@link Soundex}
28  *
29  * @version $Id: SoundexTest.java,v 1.18 2004/06/02 00:55:38 ggregory Exp $
30  * @author Apache Software Foundation
31  */

32 public class SoundexTest extends StringEncoderAbstractTest {
33
34     public static Test suite() {
35         return (new TestSuite(SoundexTest.class));
36     }
37
38     private Soundex encoder = null;
39
40     public SoundexTest(String JavaDoc name) {
41         super(name);
42     }
43
44     void encodeAll(String JavaDoc[] strings, String JavaDoc expectedEncoding) {
45         for (int i = 0; i < strings.length; i++) {
46             assertEquals(expectedEncoding, this.getEncoder().encode(strings[i]));
47         }
48     }
49     
50     /**
51      * @return Returns the _encoder.
52      */

53     public Soundex getEncoder() {
54         return this.encoder;
55     }
56
57     protected StringEncoder makeEncoder() {
58         return new Soundex();
59     }
60
61     /**
62      * @param encoder
63      * The encoder to set.
64      */

65     public void setEncoder(Soundex encoder) {
66         this.encoder = encoder;
67     }
68
69     public void setUp() throws Exception JavaDoc {
70         super.setUp();
71         this.setEncoder(new Soundex());
72     }
73
74     public void tearDown() throws Exception JavaDoc {
75         super.tearDown();
76         this.setEncoder(null);
77     }
78
79     public void testB650() {
80         this.encodeAll(
81             new String JavaDoc[] {
82                 "BARHAM",
83                 "BARONE",
84                 "BARRON",
85                 "BERNA",
86                 "BIRNEY",
87                 "BIRNIE",
88                 "BOOROM",
89                 "BOREN",
90                 "BORN",
91                 "BOURN",
92                 "BOURNE",
93                 "BOWRON",
94                 "BRAIN",
95                 "BRAME",
96                 "BRANN",
97                 "BRAUN",
98                 "BREEN",
99                 "BRIEN",
100                 "BRIM",
101                 "BRIMM",
102                 "BRINN",
103                 "BRION",
104                 "BROOM",
105                 "BROOME",
106                 "BROWN",
107                 "BROWNE",
108                 "BRUEN",
109                 "BRUHN",
110                 "BRUIN",
111                 "BRUMM",
112                 "BRUN",
113                 "BRUNO",
114                 "BRYAN",
115                 "BURIAN",
116                 "BURN",
117                 "BURNEY",
118                 "BYRAM",
119                 "BYRNE",
120                 "BYRON",
121                 "BYRUM" },
122             "B650");
123     }
124
125     public void testDifference() throws EncoderException {
126         // Edge cases
127
assertEquals(0, this.getEncoder().difference(null, null));
128         assertEquals(0, this.getEncoder().difference("", ""));
129         assertEquals(0, this.getEncoder().difference(" ", " "));
130         // Normal cases
131
assertEquals(4, this.getEncoder().difference("Smith", "Smythe"));
132         assertEquals(2, this.getEncoder().difference("Ann", "Andrew"));
133         assertEquals(1, this.getEncoder().difference("Margaret", "Andrew"));
134         assertEquals(0, this.getEncoder().difference("Janet", "Margaret"));
135         // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp
136
assertEquals(4, this.getEncoder().difference("Green", "Greene"));
137         assertEquals(0, this.getEncoder().difference("Blotchet-Halls", "Greene"));
138         // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
139
assertEquals(4, this.getEncoder().difference("Smith", "Smythe"));
140         assertEquals(4, this.getEncoder().difference("Smithers", "Smythers"));
141         assertEquals(2, this.getEncoder().difference("Anothers", "Brothers"));
142     }
143
144     public void testEncodeBasic() {
145         assertEquals("T235", this.getEncoder().encode("testing"));
146         assertEquals("T000", this.getEncoder().encode("The"));
147         assertEquals("Q200", this.getEncoder().encode("quick"));
148         assertEquals("B650", this.getEncoder().encode("brown"));
149         assertEquals("F200", this.getEncoder().encode("fox"));
150         assertEquals("J513", this.getEncoder().encode("jumped"));
151         assertEquals("O160", this.getEncoder().encode("over"));
152         assertEquals("T000", this.getEncoder().encode("the"));
153         assertEquals("L200", this.getEncoder().encode("lazy"));
154         assertEquals("D200", this.getEncoder().encode("dogs"));
155     }
156
157     /**
158      * Examples from
159      * http://www.bradandkathy.com/genealogy/overviewofsoundex.html
160      */

161     public void testEncodeBatch2() {
162         assertEquals("A462", this.getEncoder().encode("Allricht"));
163         assertEquals("E166", this.getEncoder().encode("Eberhard"));
164         assertEquals("E521", this.getEncoder().encode("Engebrethson"));
165         assertEquals("H512", this.getEncoder().encode("Heimbach"));
166         assertEquals("H524", this.getEncoder().encode("Hanselmann"));
167         assertEquals("H431", this.getEncoder().encode("Hildebrand"));
168         assertEquals("K152", this.getEncoder().encode("Kavanagh"));
169         assertEquals("L530", this.getEncoder().encode("Lind"));
170         assertEquals("L222", this.getEncoder().encode("Lukaschowsky"));
171         assertEquals("M235", this.getEncoder().encode("McDonnell"));
172         assertEquals("M200", this.getEncoder().encode("McGee"));
173         assertEquals("O155", this.getEncoder().encode("Opnian"));
174         assertEquals("O155", this.getEncoder().encode("Oppenheimer"));
175         assertEquals("R355", this.getEncoder().encode("Riedemanas"));
176         assertEquals("Z300", this.getEncoder().encode("Zita"));
177         assertEquals("Z325", this.getEncoder().encode("Zitzmeinn"));
178     }
179
180     /**
181      * Examples from
182      * http://www.archives.gov/research_room/genealogy/census/soundex.html
183      */

184     public void testEncodeBatch3() {
185         assertEquals("W252", this.getEncoder().encode("Washington"));
186         assertEquals("L000", this.getEncoder().encode("Lee"));
187         assertEquals("G362", this.getEncoder().encode("Gutierrez"));
188         assertEquals("P236", this.getEncoder().encode("Pfister"));
189         assertEquals("J250", this.getEncoder().encode("Jackson"));
190         assertEquals("T522", this.getEncoder().encode("Tymczak"));
191         // For VanDeusen: D-250 (D, 2 for the S, 5 for the N, 0 added) is also
192
// possible.
193
assertEquals("V532", this.getEncoder().encode("VanDeusen"));
194     }
195
196     /**
197      * Examples from: http://www.myatt.demon.co.uk/sxalg.htm
198      */

199     public void testEncodeBatch4() {
200         assertEquals("H452", this.getEncoder().encode("HOLMES"));
201         assertEquals("A355", this.getEncoder().encode("ADOMOMI"));
202         assertEquals("V536", this.getEncoder().encode("VONDERLEHR"));
203         assertEquals("B400", this.getEncoder().encode("BALL"));
204         assertEquals("S000", this.getEncoder().encode("SHAW"));
205         assertEquals("J250", this.getEncoder().encode("JACKSON"));
206         assertEquals("S545", this.getEncoder().encode("SCANLON"));
207         assertEquals("S532", this.getEncoder().encode("SAINTJOHN"));
208
209     }
210
211     public void testBadCharacters() {
212         assertEquals("H452", this.getEncoder().encode("HOL>MES"));
213
214     }
215
216     public void testEncodeIgnoreApostrophes() {
217         this.encodeAll(new String JavaDoc[] { "OBrien", "'OBrien", "O'Brien", "OB'rien", "OBr'ien", "OBri'en", "OBrie'n", "OBrien'" }, "O165");
218     }
219
220     /**
221      * Test data from http://www.myatt.demon.co.uk/sxalg.htm
222      */

223     public void testEncodeIgnoreHyphens() {
224         this.encodeAll(
225             new String JavaDoc[] {
226                 "KINGSMITH",
227                 "-KINGSMITH",
228                 "K-INGSMITH",
229                 "KI-NGSMITH",
230                 "KIN-GSMITH",
231                 "KING-SMITH",
232                 "KINGS-MITH",
233                 "KINGSM-ITH",
234                 "KINGSMI-TH",
235                 "KINGSMIT-H",
236                 "KINGSMITH-" },
237             "K525");
238     }
239
240     public void testEncodeIgnoreTrimmable() {
241         assertEquals("W252", this.getEncoder().encode(" \t\n\r Washington \t\n\r "));
242     }
243
244     /**
245      * Consonants from the same code group separated by W or H are treated as
246      * one.
247      */

248     public void testHWRuleEx1() {
249         // From
250
// http://www.archives.gov/research_room/genealogy/census/soundex.html:
251
// Ashcraft is coded A-261 (A, 2 for the S, C ignored, 6 for the R, 1
252
// for the F). It is not coded A-226.
253
assertEquals("A261", this.getEncoder().encode("Ashcraft"));
254     }
255
256     /**
257      * Consonants from the same code group separated by W or H are treated as
258      * one.
259      *
260      * Test data from http://www.myatt.demon.co.uk/sxalg.htm
261      */

262     public void testHWRuleEx2() {
263         assertEquals("B312", this.getEncoder().encode("BOOTHDAVIS"));
264         assertEquals("B312", this.getEncoder().encode("BOOTH-DAVIS"));
265     }
266
267     /**
268      * Consonants from the same code group separated by W or H are treated as
269      * one.
270      */

271     public void testHWRuleEx3() {
272         assertEquals("S460", this.getEncoder().encode("Sgler"));
273         assertEquals("S460", this.getEncoder().encode("Swhgler"));
274         // Also S460:
275
this.encodeAll(
276             new String JavaDoc[] {
277                 "SAILOR",
278                 "SALYER",
279                 "SAYLOR",
280                 "SCHALLER",
281                 "SCHELLER",
282                 "SCHILLER",
283                 "SCHOOLER",
284                 "SCHULER",
285                 "SCHUYLER",
286                 "SEILER",
287                 "SEYLER",
288                 "SHOLAR",
289                 "SHULER",
290                 "SILAR",
291                 "SILER",
292                 "SILLER" },
293             "S460");
294     }
295
296     public void testMaxLength() throws Exception JavaDoc {
297         Soundex soundex = new Soundex();
298         soundex.setMaxLength(soundex.getMaxLength());
299         assertEquals("S460", this.getEncoder().encode("Sgler"));
300     }
301
302     public void testMaxLengthLessThan3Fix() throws Exception JavaDoc {
303         Soundex soundex = new Soundex();
304         soundex.setMaxLength(2);
305         assertEquals("S460", soundex.encode("SCHELLER"));
306     }
307
308     /**
309      * Examples for MS SQLServer from
310      * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
311      */

312     public void testMsSqlServer1() {
313         assertEquals("S530", this.getEncoder().encode("Smith"));
314         assertEquals("S530", this.getEncoder().encode("Smythe"));
315     }
316
317     /**
318      * Examples for MS SQLServer from
319      * http://support.microsoft.com/default.aspx?scid=http://support.microsoft.com:80/support/kb/articles/Q100/3/65.asp&NoWebContent=1
320      */

321     public void testMsSqlServer2() {
322         this.encodeAll(new String JavaDoc[]{"Erickson", "Erickson", "Erikson", "Ericson", "Ericksen", "Ericsen"}, "E625");
323     }
324     /**
325      * Examples for MS SQLServer from
326      * http://databases.about.com/library/weekly/aa042901a.htm
327      */

328     public void testMsSqlServer3() {
329         assertEquals("A500", this.getEncoder().encode("Ann"));
330         assertEquals("A536", this.getEncoder().encode("Andrew"));
331         assertEquals("J530", this.getEncoder().encode("Janet"));
332         assertEquals("M626", this.getEncoder().encode("Margaret"));
333         assertEquals("S315", this.getEncoder().encode("Steven"));
334         assertEquals("M240", this.getEncoder().encode("Michael"));
335         assertEquals("R163", this.getEncoder().encode("Robert"));
336         assertEquals("L600", this.getEncoder().encode("Laura"));
337         assertEquals("A500", this.getEncoder().encode("Anne"));
338     }
339
340     /**
341      * Fancy characters are not mapped by the default US mapping.
342      *
343      * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=29080
344      */

345     public void testUsMappingOWithDiaeresis() {
346         assertEquals("O000", this.getEncoder().encode("o"));
347         try {
348             assertEquals("Ö000", this.getEncoder().encode("ö"));
349             fail("Expected IllegalArgumentException not thrown");
350         } catch (IllegalArgumentException JavaDoc e) {
351             // expected
352
}
353     }
354
355     /**
356      * Fancy characters are not mapped by the default US mapping.
357      *
358      * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=29080
359      */

360     public void testUsMappingEWithAcute() {
361         assertEquals("E000", this.getEncoder().encode("e"));
362         try {
363             assertEquals("É000", this.getEncoder().encode("é"));
364             fail("Expected IllegalArgumentException not thrown");
365         } catch (IllegalArgumentException JavaDoc e) {
366             // expected
367
}
368     }
369 }
370
Popular Tags