KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > taglibs > string > util > Metaphone


1 /*
2  * Copyright 1999,2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 //package com.bga.wbrogden.metaphone;
17
package org.apache.taglibs.string.util;
18
19 public class Metaphone extends Object JavaDoc {
20
21     static String JavaDoc vowels = "AEIOU" ;
22     static String JavaDoc frontv = "EIY" ;
23     static String JavaDoc varson = "CSPTG" ;
24
25     static final int maxCodeLen = 4 ;
26
27     static public String JavaDoc metaPhone( String JavaDoc txt ){
28       int mtsz = 0 ;
29       boolean hard = false ;
30       if(( txt == null ) ||
31          ( txt.length() == 0 )) return "" ;
32       // single character is itself
33
if( txt.length() == 1 ) return txt.toUpperCase() ;
34       //
35
char[] inwd = txt.toUpperCase().toCharArray() ;
36       //
37
String JavaDoc tmpS ;
38       StringBuffer JavaDoc local = new StringBuffer JavaDoc( 40 ); // manipulate
39
StringBuffer JavaDoc code = new StringBuffer JavaDoc( 10 ) ; // output
40
// handle initial 2 characters exceptions
41
switch( inwd[0] ){
42         case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
43           if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 );
44           else local.append( inwd );
45           break;
46         case 'A': /* looking for AE */
47           if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 );
48           else local.append( inwd );
49           break;
50         case 'W' : /* looking for WR or WH */
51           if( inwd[1] == 'R' ){ // WR -> R
52
local.append(inwd, 1, inwd.length - 1 ); break ;
53           }
54           if( inwd[1] == 'H'){
55             local.append(inwd, 1, inwd.length - 1 );
56             local.setCharAt( 0,'W'); // WH -> W
57
}
58           else local.append( inwd );
59           break;
60         case 'X' : /* initial X becomes S */
61           inwd[0] = 'S' ;local.append( inwd );
62           break ;
63         default :
64           local.append( inwd );
65       } // now local has working string with initials fixed
66
int wdsz = local.length();
67       int n = 0 ;
68       while((mtsz < maxCodeLen ) && // max code size of 4 works well
69
(n < wdsz ) ){
70         char symb = local.charAt(n) ;
71         // remove duplicate letters except C
72
if(( symb != 'C' ) &&
73            (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ;
74         else{ // not dup
75
switch( symb ){
76             case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
77               if( n == 0 ) { code.append(symb );mtsz++;
78               }
79               break ; // only use vowel if leading char
80
case 'B' :
81               if( (n > 0 ) &&
82                   !(n + 1 == wdsz ) && // not MB at end of word
83
( local.charAt(n - 1) == 'M')) {
84                     code.append(symb);
85                   }
86               else code.append(symb);
87               mtsz++ ;
88               break ;
89             case 'C' : // lots of C special cases
90
/* discard if SCI, SCE or SCY */
91               if( ( n > 0 ) &&
92                   ( local.charAt(n-1) == 'S' ) &&
93                   ( n + 1 < wdsz ) &&
94                   ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;}
95               tmpS = local.toString();
96               if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X
97
code.append('X' ); mtsz++; break ;
98               }
99               if( ( n + 1 < wdsz ) &&
100                   (frontv.indexOf( local.charAt(n+1) )>= 0 )){
101                  code.append('S');mtsz++; break ; // CI,CE,CY -> S
102
}
103               if(( n > 0) &&
104                  ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk
105
code.append('K') ; mtsz++;break ;
106               }
107               if( tmpS.indexOf("CH", n ) == n ){ // detect CH
108
if((n == 0 ) &&
109                    (wdsz >= 3 ) && // CH consonant -> K consonant
110
(vowels.indexOf( local.charAt( 2) ) < 0 )){
111                      code.append('K');
112                 }
113                 else { code.append('X'); // CHvowel -> X
114
}
115                 mtsz++;
116               }
117               else { code.append('K' );mtsz++;
118               }
119               break ;
120             case 'D' :
121               if(( n + 2 < wdsz )&& // DGE DGI DGY -> J
122
( local.charAt(n+1) == 'G' )&&
123                  (frontv.indexOf( local.charAt(n+2) )>= 0)){
124                     code.append('J' ); n += 2 ;
125               }
126               else { code.append( 'T' );
127               }
128               mtsz++;
129               break ;
130             case 'G' : // GH silent at end or before consonant
131
if(( n + 2 == wdsz )&&
132                  (local.charAt(n+1) == 'H' )) break ;
133               if(( n + 2 < wdsz ) &&
134                  (local.charAt(n+1) == 'H' )&&
135                  (vowels.indexOf( local.charAt(n+2)) < 0 )) break ;
136               tmpS = local.toString();
137               if((n > 0) &&
138                  ( tmpS.indexOf("GN", n ) == n)||
139                  ( tmpS.indexOf("GNED",n) == n )) break ; // silent G
140
if(( n > 0 ) &&
141                  (local.charAt(n-1) == 'G')) hard = true ;
142               else hard = false ;
143               if((n+1 < wdsz) &&
144                  (frontv.indexOf( local.charAt(n+1) ) >= 0 )&&
145                  (!hard) ) code.append( 'J' );
146               else code.append('K');
147               mtsz++;
148               break ;
149             case 'H':
150               if( n + 1 == wdsz ) break ; // terminal H
151
if((n > 0) &&
152                  (varson.indexOf( local.charAt(n-1)) >= 0)) break ;
153               if( vowels.indexOf( local.charAt(n+1)) >=0 ){
154                   code.append('H') ; mtsz++;// Hvowel
155
}
156               break;
157             case 'F': case 'J' : case 'L' :
158             case 'M': case 'N' : case 'R' :
159               code.append( symb ); mtsz++; break ;
160             case 'K' :
161               if( n > 0 ){ // not initial
162
if( local.charAt( n -1) != 'C' ) {
163                      code.append(symb );
164                 }
165               }
166               else code.append( symb ); // initial K
167
mtsz++ ;
168               break ;
169             case 'P' :
170               if((n + 1 < wdsz) && // PH -> F
171
(local.charAt( n+1) == 'H'))code.append('F');
172               else code.append( symb );
173               mtsz++;
174               break ;
175             case 'Q' :
176               code.append('K' );mtsz++; break ;
177             case 'S' :
178               tmpS = local.toString();
179               if((tmpS.indexOf("SH", n )== n) ||
180                  (tmpS.indexOf("SIO",n )== n) ||
181                  (tmpS.indexOf("SIA",n )== n)) code.append('X');
182               else code.append( 'S' );
183               mtsz++ ;
184               break ;
185             case 'T' :
186               tmpS = local.toString(); // TIA TIO -> X
187
if((tmpS.indexOf("TIA",n )== n)||
188                  (tmpS.indexOf("TIO",n )== n) ){
189                     code.append('X'); mtsz++; break;
190               }
191               if( tmpS.indexOf("TCH",n )==n) break;
192               // substitute numeral 0 for TH (resembles theta after all)
193
if( tmpS.indexOf("TH", n )==n) code.append('0');
194               else code.append( 'T' );
195               mtsz++ ;
196               break ;
197             case 'V' :
198               code.append('F'); mtsz++;break ;
199             case 'W' : case 'Y' : // silent if not followed by vowel
200
if((n+1 < wdsz) &&
201                  (vowels.indexOf( local.charAt(n+1))>=0)){
202                     code.append( symb );mtsz++;
203               }
204               break ;
205             case 'X' :
206               code.append('K'); code.append('S');mtsz += 2;
207               break ;
208             case 'Z' :
209               code.append('S'); mtsz++; break ;
210           } // end switch
211
n++ ;
212         } // end else from symb != 'C'
213
if( mtsz > 4 )code.setLength( 4);
214       }
215       return code.toString();
216     } // end static method metaPhone()
217

218 }
219
Popular Tags