1 package org.apache.lucene.analysis.el; 2 3 18 19 28 public class GreekCharsets 29 { 30 public static char[] UnicodeGreek = { 32 '\u0390', 34 '\u03AC', 35 '\u03AD', 36 '\u03AE', 37 '\u03AF', 38 '\u03B0', 39 '\u03B1', 40 '\u03B2', 41 '\u03B3', 42 '\u03B4', 43 '\u03B5', 44 '\u03B6', 45 '\u03B7', 46 '\u03B8', 47 '\u03B9', 48 '\u03BA', 49 '\u03BB', 50 '\u03BC', 51 '\u03BD', 52 '\u03BE', 53 '\u03BF', 54 '\u03C0', 55 '\u03C1', 56 '\u03C2', 57 '\u03C3', 58 '\u03C4', 59 '\u03C5', 60 '\u03C6', 61 '\u03C7', 62 '\u03C8', 63 '\u03C9', 64 '\u03CA', 65 '\u03CB', 66 '\u03CC', 67 '\u03CD', 68 '\u03CE', 69 '\u0386', 71 '\u0388', 72 '\u0389', 73 '\u038A', 74 '\u038C', 75 '\u038E', 76 '\u038F', 77 '\u0391', 78 '\u0392', 79 '\u0393', 80 '\u0394', 81 '\u0395', 82 '\u0396', 83 '\u0397', 84 '\u0398', 85 '\u0399', 86 '\u039A', 87 '\u039B', 88 '\u039C', 89 '\u039D', 90 '\u039E', 91 '\u039F', 92 '\u03A0', 93 '\u03A1', 94 '\u03A3', 95 '\u03A4', 96 '\u03A5', 97 '\u03A6', 98 '\u03A7', 99 '\u03A8', 100 '\u03A9', 101 '\u03AA', 102 '\u03AB' 103 }; 104 105 public static char[] ISO = { 107 0xc0, 109 0xdc, 110 0xdd, 111 0xde, 112 0xdf, 113 0xe0, 114 0xe1, 115 0xe2, 116 0xe3, 117 0xe4, 118 0xe5, 119 0xe6, 120 0xe7, 121 0xe8, 122 0xe9, 123 0xea, 124 0xeb, 125 0xec, 126 0xed, 127 0xee, 128 0xef, 129 0xf0, 130 0xf1, 131 0xf2, 132 0xf3, 133 0xf4, 134 0xf5, 135 0xf6, 136 0xf7, 137 0xf8, 138 0xf9, 139 0xfa, 140 0xfb, 141 0xfc, 142 0xfd, 143 0xfe, 144 0xb6, 146 0xb8, 147 0xb9, 148 0xba, 149 0xbc, 150 0xbe, 151 0xbf, 152 0xc1, 153 0xc2, 154 0xc3, 155 0xc4, 156 0xc5, 157 0xc6, 158 0xc7, 159 0xc8, 160 0xc9, 161 0xca, 162 0xcb, 163 0xcc, 164 0xcd, 165 0xce, 166 0xcf, 167 0xd0, 168 0xd1, 169 0xd3, 170 0xd4, 171 0xd5, 172 0xd6, 173 0xd7, 174 0xd8, 175 0xd9, 176 0xda, 177 0xdb 178 }; 179 180 public static char[] CP1253 = { 182 0xc0, 184 0xdc, 185 0xdd, 186 0xde, 187 0xdf, 188 0xe0, 189 0xe1, 190 0xe2, 191 0xe3, 192 0xe4, 193 0xe5, 194 0xe6, 195 0xe7, 196 0xe8, 197 0xe9, 198 0xea, 199 0xeb, 200 0xec, 201 0xed, 202 0xee, 203 0xef, 204 0xf0, 205 0xf1, 206 0xf2, 207 0xf3, 208 0xf4, 209 0xf5, 210 0xf6, 211 0xf7, 212 0xf8, 213 0xf9, 214 0xfa, 215 0xfb, 216 0xfc, 217 0xfd, 218 0xfe, 219 0xa2, 221 0xb8, 222 0xb9, 223 0xba, 224 0xbc, 225 0xbe, 226 0xbf, 227 0xc1, 228 0xc2, 229 0xc3, 230 0xc4, 231 0xc5, 232 0xc6, 233 0xc7, 234 0xc8, 235 0xc9, 236 0xca, 237 0xcb, 238 0xcc, 239 0xcd, 240 0xce, 241 0xcf, 242 0xd0, 243 0xd1, 244 0xd3, 245 0xd4, 246 0xd5, 247 0xd6, 248 0xd7, 249 0xd8, 250 0xd9, 251 0xda, 252 0xdb 253 }; 254 255 public static char toLowerCase(char letter, char[] charset) 256 { 257 if (charset == UnicodeGreek) { 258 if (letter >= '\u03B1' && letter <= '\u03C9') 260 { 261 if (letter == '\u03C2') { 263 return '\u03C3'; 264 } else { 265 return letter; 266 } 267 } 268 if (letter == '\u03AC') { 271 return '\u03B1'; 272 } 273 if (letter == '\u03AD') { 275 return '\u03B5'; 276 } 277 if (letter == '\u03AE') { 279 return '\u03B7'; 280 } 281 if (letter == '\u03AF' || letter == '\u03CA' || letter == '\u0390') { 283 return '\u03B9'; 284 } 285 if (letter == '\u03CD' || letter == '\u03CB' || letter == '\u03B0') { 287 return '\u03C5'; 288 } 289 if (letter == '\u03CC') { 291 return '\u03BF'; 292 } 293 if (letter == '\u03CE') { 295 return '\u03C9'; 296 } 297 if (letter >= '\u0391' && letter <= '\u03A9') 299 { 300 return (char) (letter + 32); 301 } 302 if (letter == '\u0386') { 305 return '\u03B1'; 306 } 307 if (letter == '\u0388') { 309 return '\u03B5'; 310 } 311 if (letter == '\u0389') { 313 return '\u03B7'; 314 } 315 if (letter == '\u038A' || letter == '\u03AA') { 317 return '\u03B9'; 318 } 319 if (letter == '\u038E' || letter == '\u03AB') { 321 return '\u03C5'; 322 } 323 if (letter == '\u038C') { 325 return '\u03BF'; 326 } 327 if (letter == '\u038F') { 329 return '\u03C9'; 330 } 331 } else if (charset == ISO) { 332 if (letter >= 0xe1 && letter <= 0xf9) 334 { 335 if (letter == 0xf2) { 337 return 0xf3; 338 } else { 339 return letter; 340 } 341 } 342 if (letter == 0xdc) { 345 return 0xe1; 346 } 347 if (letter == 0xdd) { 349 return 0xe5; 350 } 351 if (letter == 0xde) { 353 return 0xe7; 354 } 355 if (letter == 0xdf || letter == 0xfa || letter == 0xc0) { 357 return '\u03B9'; 358 } 359 if (letter == 0xfd || letter == 0xfb || letter == 0xe0) { 361 return 0xf5; 362 } 363 if (letter == 0xfc) { 365 return 0xef; 366 } 367 if (letter == 0xfe) { 369 return 0xf9; 370 } 371 if (letter >= 0xc1 && letter <= 0xd9) { 373 return (char) (letter + 32); 374 } 375 if (letter == 0xb6) { 378 return 0xe1; 379 } 380 if (letter == 0xb8) { 382 return 0xe5; 383 } 384 if (letter == 0xb9) { 386 return 0xe7; 387 } 388 if (letter == 0xba || letter == 0xda) { 390 return 0xe9; 391 } 392 if (letter == 0xbe || letter == 0xdb) { 394 return 0xf5; 395 } 396 if (letter == 0xbc) { 398 return 0xef; 399 } 400 if (letter == 0xbf) { 402 return 0xf9; 403 } 404 } else if (charset == CP1253) { 405 if (letter >= 0xe1 && letter <= 0xf9) 407 { 408 if (letter == 0xf2) { 410 return 0xf3; 411 } else { 412 return letter; 413 } 414 } 415 if (letter == 0xdc) { 418 return 0xe1; 419 } 420 if (letter == 0xdd) { 422 return 0xe5; 423 } 424 if (letter == 0xde) { 426 return 0xe7; 427 } 428 if (letter == 0xdf || letter == 0xfa || letter == 0xc0) { 430 return '\u03B9'; 431 } 432 if (letter == 0xfd || letter == 0xfb || letter == 0xe0) { 434 return 0xf5; 435 } 436 if (letter == 0xfc) { 438 return 0xef; 439 } 440 if (letter == 0xfe) { 442 return 0xf9; 443 } 444 if (letter >= 0xc1 && letter <= 0xd9) { 446 return (char) (letter + 32); 447 } 448 if (letter == 0xa2) { 451 return 0xe1; 452 } 453 if (letter == 0xb8) { 455 return 0xe5; 456 } 457 if (letter == 0xb9) { 459 return 0xe7; 460 } 461 if (letter == 0xba || letter == 0xda) { 463 return 0xe9; 464 } 465 if (letter == 0xbe || letter == 0xdb) { 467 return 0xf5; 468 } 469 if (letter == 0xbc) { 471 return 0xef; 472 } 473 if (letter == 0xbf) { 475 return 0xf9; 476 } 477 } 478 479 return Character.toLowerCase(letter); 480 } 481 } 482 | Popular Tags |