KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > commons > codec > language > RefinedSoundex


1 /*
2  * Copyright 2001-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.commons.codec.language;
18
19 import org.apache.commons.codec.EncoderException;
20 import org.apache.commons.codec.StringEncoder;
21
22 /**
23  * Encodes a string into a Refined Soundex value. A refined soundex code is
24  * optimized for spell checking words. Soundex method originally developed by
25  * <CITE>Margaret Odell</CITE> and <CITE>Robert Russell</CITE>.
26  *
27  * @author Apache Software Foundation
28  * @version $Id: RefinedSoundex.java,v 1.21 2004/06/05 18:32:04 ggregory Exp $
29  */

30 public class RefinedSoundex implements StringEncoder {
31
32     /**
33      * This static variable contains an instance of the RefinedSoundex using
34      * the US_ENGLISH mapping.
35      */

36     public static final RefinedSoundex US_ENGLISH = new RefinedSoundex();
37
38     /**
39      * RefinedSoundex is *refined* for a number of reasons one being that the
40      * mappings have been altered. This implementation contains default
41      * mappings for US English.
42      */

43     public static final char[] US_ENGLISH_MAPPING = "01360240043788015936020505".toCharArray();
44
45     /**
46      * Every letter of the alphabet is "mapped" to a numerical value. This char
47      * array holds the values to which each letter is mapped. This
48      * implementation contains a default map for US_ENGLISH
49      */

50     private char[] soundexMapping;
51
52     /**
53      * Creates an instance of the RefinedSoundex object using the default US
54      * English mapping.
55      */

56     public RefinedSoundex() {
57         this(US_ENGLISH_MAPPING);
58     }
59
60     /**
61      * Creates a refined soundex instance using a custom mapping. This
62      * constructor can be used to customize the mapping, and/or possibly
63      * provide an internationalized mapping for a non-Western character set.
64      *
65      * @param mapping
66      * Mapping array to use when finding the corresponding code for
67      * a given character
68      */

69     public RefinedSoundex(char[] mapping) {
70         this.soundexMapping = mapping;
71     }
72
73     /**
74      * Returns the number of characters in the two encoded Strings that are the
75      * same. This return value ranges from 0 to the length of the shortest
76      * encoded String: 0 indicates little or no similarity, and 4 out of 4 (for
77      * example) indicates strong similarity or identical values. For refined
78      * Soundex, the return value can be greater than 4.
79      *
80      * @param s1
81      * A String that will be encoded and compared.
82      * @param s2
83      * A String that will be encoded and compared.
84      * @return The number of characters in the two encoded Strings that are the
85      * same from 0 to to the length of the shortest encoded String.
86      *
87      * @see SoundexUtils#difference(StringEncoder,String,String)
88      * @see <a HREF="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
89      * MS T-SQL DIFFERENCE</a>
90      *
91      * @throws EncoderException
92      * if an error occurs encoding one of the strings
93      * @since 1.3
94      */

95     public int difference(String JavaDoc s1, String JavaDoc s2) throws EncoderException {
96         return SoundexUtils.difference(this, s1, s2);
97     }
98
99     /**
100      * Encodes an Object using the refined soundex algorithm. This method is
101      * provided in order to satisfy the requirements of the Encoder interface,
102      * and will throw an EncoderException if the supplied object is not of type
103      * java.lang.String.
104      *
105      * @param pObject
106      * Object to encode
107      * @return An object (or type java.lang.String) containing the refined
108      * soundex code which corresponds to the String supplied.
109      * @throws EncoderException
110      * if the parameter supplied is not of type java.lang.String
111      */

112     public Object JavaDoc encode(Object JavaDoc pObject) throws EncoderException {
113         if (!(pObject instanceof java.lang.String JavaDoc)) {
114             throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String");
115         }
116         return soundex((String JavaDoc) pObject);
117     }
118
119     /**
120      * Encodes a String using the refined soundex algorithm.
121      *
122      * @param pString
123      * A String object to encode
124      * @return A Soundex code corresponding to the String supplied
125      */

126     public String JavaDoc encode(String JavaDoc pString) {
127         return soundex(pString);
128     }
129
130     /**
131      * Returns the mapping code for a given character. The mapping codes are
132      * maintained in an internal char array named soundexMapping, and the
133      * default values of these mappings are US English.
134      *
135      * @param c
136      * char to get mapping for
137      * @return A character (really a numeral) to return for the given char
138      */

139     char getMappingCode(char c) {
140         if (!Character.isLetter(c)) {
141             return 0;
142         }
143         return this.soundexMapping[Character.toUpperCase(c) - 'A'];
144     }
145
146     /**
147      * Retreives the Refined Soundex code for a given String object.
148      *
149      * @param str
150      * String to encode using the Refined Soundex algorithm
151      * @return A soundex code for the String supplied
152      */

153     public String JavaDoc soundex(String JavaDoc str) {
154         if (str == null) {
155             return null;
156         }
157         str = SoundexUtils.clean(str);
158         if (str.length() == 0) {
159             return str;
160         }
161
162         StringBuffer JavaDoc sBuf = new StringBuffer JavaDoc();
163         sBuf.append(str.charAt(0));
164
165         char last, current;
166         last = '*';
167
168         for (int i = 0; i < str.length(); i++) {
169
170             current = getMappingCode(str.charAt(i));
171             if (current == last) {
172                 continue;
173             } else if (current != 0) {
174                 sBuf.append(current);
175             }
176
177             last = current;
178
179         }
180
181         return sBuf.toString();
182     }
183 }
184
Popular Tags