1 package SnowMailClient.utils.TextSearch; 2 3 import java.util.*; 4 5 6 10 public final class EditDistance 11 { 12 private EditDistance() 13 { 14 } 15 16 17 24 25 27 static int reditexEnglish(int a, int b) 28 { 29 if(a==b) return 0; 30 if((editexGroupENGLISH((char)a) & editexGroupENGLISH((char)b))!=0) return 1; 31 return 2; 32 } 33 34 static int d(int a, int b) 35 { 36 if(a=='H' && b =='W' && a!=b) return 1; 37 if(a=='W' && b =='H' && a!=b) return 1; 38 return reditexEnglish(a,b); 39 } 40 41 46 private static int editexGroupENGLISH(char c) 47 { 48 int rep=0; 49 if("AEIOUY".indexOf(c)!=-1) rep += 1; 50 if("BP".indexOf(c)!=-1) rep += 2; 51 if("CKQ".indexOf(c)!=-1) rep += 4; 52 if("DT".indexOf(c)!=-1) rep += 8; 53 if("LR".indexOf(c)!=-1) rep += 16; 54 if("MN".indexOf(c)!=-1) rep += 32; 55 if("GJ".indexOf(c)!=-1) rep += 64; 56 if("FPV".indexOf(c)!=-1) rep += 128; 57 if("SXZ".indexOf(c)!=-1) rep += 256; 58 if("CSZ".indexOf(c)!=-1) rep += 512; 59 return rep; 60 } 61 62 63 66 public static int editexDistanceEnglish(String w1,String w2, int tolerance) 67 { 68 if(Math.abs(w1.length()-w2.length())>tolerance) return tolerance+1; 71 return editexEnglish(w1.length(), w2.length(),w1,w2.toUpperCase()); 72 } 73 74 75 77 static int editexEnglish(int i, int j, final String w1, final String w2) 78 { 79 if(i==0 && j==0) return 0; 80 if(i==0) return editexEnglish(0,j-1,w1,w2)+(j<w2.length()?d(w2.charAt(j-1),w2.charAt(j)):0); 81 if(j==0) return editexEnglish(i-1,0,w1,w2)+(i<w1.length()?d(w1.charAt(i-1),w1.charAt(i)):0); 82 int p1 = editexEnglish(i-1,j-1,w1,w2) + reditexEnglish(w1.charAt(i-1), w2.charAt(j-1)); 83 if(p1==0) return 0; 84 int p2 = editexEnglish(i-1,j,w1,w2)+(i<w1.length()?d(w1.charAt(i-1),w1.charAt(i)):0); 85 int p3 = editexEnglish(i,j-1,w1,w2)+(j<w2.length()?d(w2.charAt(j-1),w2.charAt(j)):0); 86 87 return Math.min( 88 Math.min(p2, p3), 89 p1); 90 } 91 92 private static int min (int a, int b, int c) 93 { 94 int mi = a; 95 if (b < mi) 96 { 97 mi = b; 98 } 99 if (c < mi) 100 { 101 mi = c; 102 } 103 return mi; 104 } 105 106 109 public static int editDistance(String s, String t, int maxDistance) 110 { 111 int n = s.length(); 112 int m = t.length(); 113 if (n == 0) return m; 114 if (m == 0) return n; 115 if(n-m>maxDistance) return n-m; if(m-n>maxDistance) return m-n; 117 118 int d[][] = new int[n+1][m+1]; 119 120 for (int i = 0; i <= n; i++) d[i][0] = i; 121 for (int j = 0; j <= m; j++) d[0][j] = j; 122 123 for (int i = 1; i <= n; i++) 124 { 125 char s_i = s.charAt(i-1); 126 for (int j = 1; j <= m; j++) 127 { 128 d[i][j] = min (d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1] + (s_i==t.charAt(j-1)?0:1)); 129 } 130 } 131 132 return d[n][m]; 133 } 134 135 136 public static String getRandomString(int len) 137 { 138 StringBuffer sb = new StringBuffer (); 139 for(int i=0; i<len; i++) 140 { 141 sb.append((char) ('A'+((int)(Math.random()*25)))); 142 } 143 return sb.toString(); 144 } 145 146 public static void main(String [] aaa) 147 { 148 System.out.println( editDistance("JOERG","JEORG",6)); 149 System.out.println( editDistance("WATER","WINE",6)); 150 System.out.println( editDistance("WIINE","WINE",6)); 151 System.out.println(getRandomString(12)); 152 System.out.println(getRandomString(6)); 153 154 155 long t0 = new Date().getTime(); 157 int n = 2000000; 158 for(int i=0; i<n; i++) 159 { 160 int d; 161 String s1 = getRandomString( (int) (2+Math.random()*8) ); 162 String s2 = getRandomString( (int) (2+Math.random()*8) ); 163 d = editDistance(s1,s2,2); 164 } 165 long t1 = new Date().getTime(); 166 System.out.println("time for " + n +" edit distances = "+(t1-t0)/1000.0+ " s"); 167 168 173 174 } 175 } | Popular Tags |