1 23 24 package org.jivesoftware.stringprep; 25 26 class NFKC { 27 33 public static String normalizeNFKC(String in) { 34 StringBuilder out = new StringBuilder (); 35 36 for (int i = 0; i < in.length(); i++) { 37 char code = in.charAt(i); 38 39 if (code >= 0xAC00 && code <= 0xD7AF) { 44 out.append(decomposeHangul(code)); 45 } else { 46 int index = decomposeIndex(code); 47 if (index == -1) { 48 out.append(code); 49 } else { 50 out.append(DecompositionMappings.m[index]); 51 } 52 } 53 } 54 55 canonicalOrdering(out); 57 58 int last_cc = 0; 60 int last_start = 0; 61 62 for (int i = 0; i < out.length(); i++) { 63 int cc = combiningClass(out.charAt(i)); 64 65 if (i > 0 && (last_cc == 0 || last_cc != cc)) { 66 char a = out.charAt(last_start); 68 char b = out.charAt(i); 69 70 int c = compose(a, b); 71 72 if (c != -1) { 73 out.setCharAt(last_start, (char) c); 74 out.deleteCharAt(i); 75 i--; 76 77 if (i == last_start) { 78 last_cc = 0; 79 } else { 80 last_cc = combiningClass(out.charAt(i - 1)); 81 } 82 continue; 83 } 84 } 85 86 if (cc == 0) { 87 last_start = i; 88 } 89 90 last_cc = cc; 91 } 92 93 return out.toString(); 94 } 95 96 97 104 static int decomposeIndex(char c) { 105 int start = 0; 106 int end = DecompositionKeys.k.length / 2; 107 108 while (true) { 109 int half = (start + end) / 2; 110 int code = DecompositionKeys.k[half * 2]; 111 112 if (c == code) { 113 return DecompositionKeys.k[half * 2 + 1]; 114 } 115 if (half == start) { 116 return -1; 118 } else if (c > code) { 119 start = half; 120 } else { 121 end = half; 122 } 123 } 124 } 125 126 132 static int combiningClass(char c) { 133 int h = c >> 8; 134 int l = c & 0xff; 135 136 int i = CombiningClass.i[h]; 137 if (i > -1) { 138 return CombiningClass.c[i][l]; 139 } else { 140 return 0; 141 } 142 } 143 144 150 static void canonicalOrdering(StringBuilder in) { 151 boolean isOrdered = false; 152 153 while (!isOrdered) { 154 isOrdered = true; 155 156 int lastCC = combiningClass(in.charAt(0)); 157 158 for (int i = 0; i < in.length() - 1; i++) { 159 int nextCC = combiningClass(in.charAt(i + 1)); 160 if (nextCC != 0 && lastCC > nextCC) { 161 for (int j = i + 1; j > 0; j--) { 162 if (combiningClass(in.charAt(j - 1)) <= nextCC) { 163 break; 164 } 165 char t = in.charAt(j); 166 in.setCharAt(j, in.charAt(j - 1)); 167 in.setCharAt(j - 1, t); 168 isOrdered = false; 169 } 170 nextCC = lastCC; 171 } 172 lastCC = nextCC; 173 } 174 } 175 } 176 177 183 static int composeIndex(char a) { 184 if (a >> 8 >= Composition.composePage.length) { 185 return -1; 186 } 187 int ap = Composition.composePage[a >> 8]; 188 if (ap == -1) { 189 return -1; 190 } 191 return Composition.composeData[ap][a & 0xff]; 192 } 193 194 202 static int compose(char a, char b) { 203 int h = composeHangul(a, b); 204 if (h != -1) { 205 return h; 206 } 207 208 int ai = composeIndex(a); 209 210 if (ai >= Composition.singleFirstStart && ai < Composition.singleSecondStart) { 211 if (b == Composition.singleFirst[ai - Composition.singleFirstStart][0]) { 212 return Composition.singleFirst[ai - Composition.singleFirstStart][1]; 213 } else { 214 return -1; 215 } 216 } 217 218 int bi = composeIndex(b); 219 220 if (bi >= Composition.singleSecondStart) { 221 if (a == Composition.singleSecond[bi - Composition.singleSecondStart][0]) { 222 return Composition.singleSecond[bi - Composition.singleSecondStart][1]; 223 } else { 224 return -1; 225 } 226 } 227 228 if (ai >= 0 && ai < Composition.multiSecondStart && 229 bi >= Composition.multiSecondStart && bi < Composition.singleFirstStart) { 230 char[] f = Composition.multiFirst[ai]; 231 232 if (bi - Composition.multiSecondStart < f.length) { 233 char r = f[bi - Composition.multiSecondStart]; 234 if (r == 0) { 235 return -1; 236 } else { 237 return r; 238 } 239 } 240 } 241 242 243 return -1; 244 } 245 246 252 static final int SBase = 0xAC00; 253 static final int LBase = 0x1100; 254 static final int VBase = 0x1161; 255 static final int TBase = 0x11A7; 256 static final int LCount = 19; 257 static final int VCount = 21; 258 static final int TCount = 28; 259 static final int NCount = VCount * TCount; 260 static final int SCount = LCount * NCount; 261 262 270 static String decomposeHangul(char s) { 271 int SIndex = s - SBase; 272 if (SIndex < 0 || SIndex >= SCount) { 273 return String.valueOf(s); 274 } 275 StringBuilder result = new StringBuilder (); 276 int L = LBase + SIndex / NCount; 277 int V = VBase + (SIndex % NCount) / TCount; 278 int T = TBase + SIndex % TCount; 279 result.append((char) L); 280 result.append((char) V); 281 if (T != TBase) result.append((char) T); 282 return result.toString(); 283 } 284 285 293 static int composeHangul(char a, char b) { 294 int LIndex = a - LBase; 296 if (0 <= LIndex && LIndex < LCount) { 297 int VIndex = b - VBase; 298 if (0 <= VIndex && VIndex < VCount) { 299 return SBase + (LIndex * VCount + VIndex) * TCount; 301 } 302 } 303 304 int SIndex = a - SBase; 306 if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0) { 307 int TIndex = b - TBase; 308 if (0 <= TIndex && TIndex <= TCount) { 309 return a + TIndex; 311 } 312 } 313 return -1; 314 } 315 316 } | Popular Tags |