1 11 12 package org.eclipse.jdt.internal.ui.text.spelling.engine; 13 14 23 public final class DefaultPhoneticHashProvider implements IPhoneticHashProvider { 24 25 private static final String [] meta01= { "ACH", "" }; private static final String [] meta02= { "BACHER", "MACHER", "" }; private static final String [] meta03= { "CAESAR", "" }; private static final String [] meta04= { "CHIA", "" }; private static final String [] meta05= { "CH", "" }; private static final String [] meta06= { "CHAE", "" }; private static final String [] meta07= { "HARAC", "HARIS", "" }; private static final String [] meta08= { "HOR", "HYM", "HIA", "HEM", "" }; private static final String [] meta09= { "CHORE", "" }; private static final String [] meta10= { "VAN ", "VON ", "" }; private static final String [] meta11= { "SCH", "" }; private static final String [] meta12= { "ORCHES", "ARCHIT", "ORCHID", "" }; private static final String [] meta13= { "T", "S", "" }; private static final String [] meta14= { "A", "O", "U", "E", "" }; private static final String [] meta15= { "L", "R", "N", "M", "B", "H", "F", "V", "W", " ", "" }; private static final String [] meta16= { "MC", "" }; private static final String [] meta17= { "CZ", "" }; private static final String [] meta18= { "WICZ", "" }; private static final String [] meta19= { "CIA", "" }; private static final String [] meta20= { "CC", "" }; private static final String [] meta21= { "I", "E", "H", "" }; private static final String [] meta22= { "HU", "" }; private static final String [] meta23= { "UCCEE", "UCCES", "" }; private static final String [] meta24= { "CK", "CG", "CQ", "" }; private static final String [] meta25= { "CI", "CE", "CY", "" }; private static final String [] meta26= { "GN", "KN", "PN", "WR", "PS", "" }; private static final String [] meta27= { " C", " Q", " G", "" }; private static final String [] meta28= { "C", "K", "Q", "" }; private static final String [] meta29= { "CE", "CI", "" }; private static final String [] meta30= { "DG", "" }; private static final String [] meta31= { "I", "E", "Y", "" }; private static final String [] meta32= { "DT", "DD", "" }; private static final String [] meta33= { "B", "H", "D", "" }; private static final String [] meta34= { "B", "H", "D", "" }; private static final String [] meta35= { "B", "H", "" }; private static final String [] meta36= { "C", "G", "L", "R", "T", "" }; private static final String [] meta37= { "EY", "" }; private static final String [] meta38= { "LI", "" }; private static final String [] meta39= { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", "" }; private static final String [] meta40= { "ER", "" }; private static final String [] meta41= { "DANGER", "RANGER", "MANGER", "" }; private static final String [] meta42= { "E", "I", "" }; private static final String [] meta43= { "RGY", "OGY", "" }; private static final String [] meta44= { "E", "I", "Y", "" }; private static final String [] meta45= { "AGGI", "OGGI", "" }; private static final String [] meta46= { "VAN ", "VON ", "" }; private static final String [] meta47= { "SCH", "" }; private static final String [] meta48= { "ET", "" }; private static final String [] meta49= { "C", "X", "" }; private static final String [] meta50= { "JOSE", "" }; private static final String [] meta51= { "SAN ", "" }; private static final String [] meta52= { "SAN ", "" }; private static final String [] meta53= { "JOSE", "" }; private static final String [] meta54= { "L", "T", "K", "S", "N", "M", "B", "Z", "" }; private static final String [] meta55= { "S", "K", "L", "" }; private static final String [] meta56= { "ILLO", "ILLA", "ALLE", "" }; private static final String [] meta57= { "AS", "OS", "" }; private static final String [] meta58= { "A", "O", "" }; private static final String [] meta59= { "ALLE", "" }; private static final String [] meta60= { "UMB", "" }; private static final String [] meta61= { "ER", "" }; private static final String [] meta62= { "P", "B", "" }; private static final String [] meta63= { "IE", "" }; private static final String [] meta64= { "ME", "MA", "" }; private static final String [] meta65= { "ISL", "YSL", "" }; private static final String [] meta66= { "SUGAR", "" }; private static final String [] meta67= { "SH", "" }; private static final String [] meta68= { "HEIM", "HOEK", "HOLM", "HOLZ", "" }; private static final String [] meta69= { "SIO", "SIA", "" }; private static final String [] meta70= { "SIAN", "" }; private static final String [] meta71= { "M", "N", "L", "W", "" }; private static final String [] meta72= { "Z", "" }; private static final String [] meta73= { "Z", "" }; private static final String [] meta74= { "SC", "" }; private static final String [] meta75= { "OO", "ER", "EN", "UY", "ED", "EM", "" }; private static final String [] meta76= { "ER", "EN", "" }; private static final String [] meta77= { "I", "E", "Y", "" }; private static final String [] meta78= { "AI", "OI", "" }; private static final String [] meta79= { "S", "Z", "" }; private static final String [] meta80= { "TION", "" }; private static final String [] meta81= { "TIA", "TCH", "" }; private static final String [] meta82= { "TH", "" }; private static final String [] meta83= { "TTH", "" }; private static final String [] meta84= { "OM", "AM", "" }; private static final String [] meta85= { "VAN ", "VON ", "" }; private static final String [] meta86= { "SCH", "" }; private static final String [] meta87= { "T", "D", "" }; private static final String [] meta88= { "WR", "" }; private static final String [] meta89= { "WH", "" }; private static final String [] meta90= { "EWSKI", "EWSKY", "OWSKI", "OWSKY", "" }; private static final String [] meta91= { "SCH", "" }; private static final String [] meta92= { "WICZ", "WITZ", "" }; private static final String [] meta93= { "IAU", "EAU", "" }; private static final String [] meta94= { "AU", "OU", "" }; private static final String [] meta95= { "W", "K", "CZ", "WITZ" }; 121 122 private static final char[] MUTATOR_CHARACTERS= { 'A', 'B', 'X', 'S', 'K', 'J', 'T', 'F', 'H', 'L', 'M', 'N', 'P', 'R', '0' }; 123 124 125 private static final char[] VOWEL_CHARACTERS= new char[] { 'A', 'E', 'I', 'O', 'U', 'Y' }; 126 127 142 protected static final boolean hasOneOf(final String [] candidates, final char[] token, final int offset, final int length) { 143 144 if (offset < 0 || offset >= token.length || candidates.length == 0) 145 return false; 146 147 final String checkable= new String (token, offset, length); 148 for (int index= 0; index < candidates.length; index++) { 149 150 if (candidates[index].equals(checkable)) 151 return true; 152 } 153 return false; 154 } 155 156 167 protected static final boolean hasOneOf(final String [] candidates, final String token) { 168 169 for (int index= 0; index < candidates.length; index++) { 170 171 if (token.indexOf(candidates[index]) >= 0) 172 return true; 173 } 174 return false; 175 } 176 177 190 protected static final boolean hasVowel(final char[] token, final int offset, final int length) { 191 192 if (offset >= 0 && offset < length) { 193 194 final char character= token[offset]; 195 for (int index= 0; index < VOWEL_CHARACTERS.length; index++) { 196 197 if (VOWEL_CHARACTERS[index] == character) 198 return true; 199 } 200 } 201 return false; 202 } 203 204 207 public final String getHash(final String word) { 208 209 final String input= word.toUpperCase() + " "; final char[] hashable= input.toCharArray(); 211 212 final boolean has95= hasOneOf(meta95, input); 213 final StringBuffer buffer= new StringBuffer (hashable.length); 214 215 int offset= 0; 216 if (hasOneOf(meta26, hashable, 0, 2)) 217 offset += 1; 218 219 if (hashable[0] == 'X') { 220 buffer.append('S'); 221 offset += 1; 222 } 223 224 while (offset < hashable.length) { 225 226 switch (hashable[offset]) { 227 case 'A' : 228 case 'E' : 229 case 'I' : 230 case 'O' : 231 case 'U' : 232 case 'Y' : 233 if (offset == 0) 234 buffer.append('A'); 235 offset += 1; 236 break; 237 case 'B' : 238 buffer.append('P'); 239 if (hashable[offset + 1] == 'B') 240 offset += 2; 241 else 242 offset += 1; 243 break; 244 case 'C' : 245 if ((offset > 1) && !hasVowel(hashable, offset - 2, hashable.length) && hasOneOf(meta01, hashable, (offset - 1), 3) && (hashable[offset + 2] != 'I') && (hashable[offset + 2] != 'E') || hasOneOf(meta02, hashable, (offset - 2), 6)) { 246 buffer.append('K'); 247 offset += 2; 248 break; 249 } 250 if ((offset == 0) && hasOneOf(meta03, hashable, offset, 6)) { 251 buffer.append('S'); 252 offset += 2; 253 break; 254 } 255 if (hasOneOf(meta04, hashable, offset, 4)) { 256 buffer.append('K'); 257 offset += 2; 258 break; 259 } 260 if (hasOneOf(meta05, hashable, offset, 2)) { 261 if ((offset > 0) && hasOneOf(meta06, hashable, offset, 4)) { 262 buffer.append('K'); 263 offset += 2; 264 break; 265 } 266 if ((offset == 0) && hasOneOf(meta07, hashable, (offset + 1), 5) || hasOneOf(meta08, hashable, offset + 1, 3) && !hasOneOf(meta09, hashable, 0, 5)) { 267 buffer.append('K'); 268 offset += 2; 269 break; 270 } 271 if (hasOneOf(meta10, hashable, 0, 4) || hasOneOf(meta11, hashable, 0, 3) || hasOneOf(meta12, hashable, offset - 2, 6) || hasOneOf(meta13, hashable, offset + 2, 1) || (hasOneOf(meta14, hashable, offset - 1, 1) || (offset == 0)) && hasOneOf(meta15, hashable, offset + 2, 1)) { 272 buffer.append('K'); 273 } else { 274 if (offset > 0) { 275 if (hasOneOf(meta16, hashable, 0, 2)) 276 buffer.append('K'); 277 else 278 buffer.append('X'); 279 } else { 280 buffer.append('X'); 281 } 282 } 283 offset += 2; 284 break; 285 } 286 if (hasOneOf(meta17, hashable, offset, 2) && !hasOneOf(meta18, hashable, offset, 4)) { 287 buffer.append('S'); 288 offset += 2; 289 break; 290 } 291 if (hasOneOf(meta19, hashable, offset, 2)) { 292 buffer.append('X'); 293 offset += 2; 294 break; 295 } 296 if (hasOneOf(meta20, hashable, offset, 2) && !((offset == 1) && hashable[0] == 'M')) { 297 if (hasOneOf(meta21, hashable, offset + 2, 1) && !hasOneOf(meta22, hashable, offset + 2, 2)) { 298 if (((offset == 1) && (hashable[offset - 1] == 'A')) || hasOneOf(meta23, hashable, (offset - 1), 5)) 299 buffer.append("KS"); else 301 buffer.append('X'); 302 offset += 3; 303 break; 304 } else { 305 buffer.append('K'); 306 offset += 2; 307 break; 308 } 309 } 310 if (hasOneOf(meta24, hashable, offset, 2)) { 311 buffer.append('K'); 312 offset += 2; 313 break; 314 } else if (hasOneOf(meta25, hashable, offset, 2)) { 315 buffer.append('S'); 316 offset += 2; 317 break; 318 } 319 buffer.append('K'); 320 if (hasOneOf(meta27, hashable, offset + 1, 2)) 321 offset += 3; 322 else if (hasOneOf(meta28, hashable, offset + 1, 1) && !hasOneOf(meta29, hashable, offset + 1, 2)) 323 offset += 2; 324 else 325 offset += 1; 326 break; 327 case '\u00C7' : 328 buffer.append('S'); 329 offset += 1; 330 break; 331 case 'D' : 332 if (hasOneOf(meta30, hashable, offset, 2)) { 333 if (hasOneOf(meta31, hashable, offset + 2, 1)) { 334 buffer.append('J'); 335 offset += 3; 336 break; 337 } else { 338 buffer.append("TK"); offset += 2; 340 break; 341 } 342 } 343 buffer.append('T'); 344 if (hasOneOf(meta32, hashable, offset, 2)) { 345 offset += 2; 346 } else { 347 offset += 1; 348 } 349 break; 350 case 'F' : 351 if (hashable[offset + 1] == 'F') 352 offset += 2; 353 else 354 offset += 1; 355 buffer.append('F'); 356 break; 357 case 'G' : 358 if (hashable[offset + 1] == 'H') { 359 if ((offset > 0) && !hasVowel(hashable, offset - 1, hashable.length)) { 360 buffer.append('K'); 361 offset += 2; 362 break; 363 } 364 if (offset < 3) { 365 if (offset == 0) { 366 if (hashable[offset + 2] == 'I') 367 buffer.append('J'); 368 else 369 buffer.append('K'); 370 offset += 2; 371 break; 372 } 373 } 374 if ((offset > 1) && hasOneOf(meta33, hashable, offset - 2, 1) || ((offset > 2) && hasOneOf(meta34, hashable, offset - 3, 1)) || ((offset > 3) && hasOneOf(meta35, hashable, offset - 4, 1))) { 375 offset += 2; 376 break; 377 } else { 378 if ((offset > 2) && (hashable[offset - 1] == 'U') && hasOneOf(meta36, hashable, offset - 3, 1)) { 379 buffer.append('F'); 380 } else { 381 if ((offset > 0) && (hashable[offset - 1] != 'I')) 382 buffer.append('K'); 383 } 384 offset += 2; 385 break; 386 } 387 } 388 if (hashable[offset + 1] == 'N') { 389 if ((offset == 1) && hasVowel(hashable, 0, hashable.length) && !has95) { 390 buffer.append("KN"); } else { 392 if (!hasOneOf(meta37, hashable, offset + 2, 2) && (hashable[offset + 1] != 'Y') && !has95) { 393 buffer.append("N"); } else { 395 buffer.append("KN"); } 397 } 398 offset += 2; 399 break; 400 } 401 if (hasOneOf(meta38, hashable, offset + 1, 2) && !has95) { 402 buffer.append("KL"); offset += 2; 404 break; 405 } 406 if ((offset == 0) && ((hashable[offset + 1] == 'Y') || hasOneOf(meta39, hashable, offset + 1, 2))) { 407 buffer.append('K'); 408 offset += 2; 409 break; 410 } 411 if ((hasOneOf(meta40, hashable, offset + 1, 2) || (hashable[offset + 1] == 'Y')) && !hasOneOf(meta41, hashable, 0, 6) && !hasOneOf(meta42, hashable, offset - 1, 1) && !hasOneOf(meta43, hashable, offset - 1, 3)) { 412 buffer.append('K'); 413 offset += 2; 414 break; 415 } 416 if (hasOneOf(meta44, hashable, offset + 1, 1) || hasOneOf(meta45, hashable, offset - 1, 4)) { 417 if (hasOneOf(meta46, hashable, 0, 4) || hasOneOf(meta47, hashable, 0, 3) || hasOneOf(meta48, hashable, offset + 1, 2)) { 418 buffer.append('K'); 419 } else { 420 buffer.append('J'); 421 } 422 offset += 2; 423 break; 424 } 425 if (hashable[offset + 1] == 'G') 426 offset += 2; 427 else 428 offset += 1; 429 buffer.append('K'); 430 break; 431 case 'H' : 432 if (((offset == 0) || hasVowel(hashable, offset - 1, hashable.length)) && hasVowel(hashable, offset + 1, hashable.length)) { 433 buffer.append('H'); 434 offset += 2; 435 } else { 436 offset += 1; 437 } 438 break; 439 case 'J' : 440 if (hasOneOf(meta50, hashable, offset, 4) || hasOneOf(meta51, hashable, 0, 4)) { 441 if ((offset == 0) && (hashable[offset + 4] == ' ') || hasOneOf(meta52, hashable, 0, 4)) { 442 buffer.append('H'); 443 } else { 444 buffer.append('J'); 445 } 446 offset += 1; 447 break; 448 } 449 if ((offset == 0) && !hasOneOf(meta53, hashable, offset, 4)) { 450 buffer.append('J'); 451 } else { 452 if (hasVowel(hashable, offset - 1, hashable.length) && !has95 && ((hashable[offset + 1] == 'A') || hashable[offset + 1] == 'O')) { 453 buffer.append('J'); 454 } else { 455 if (offset == (hashable.length - 1)) { 456 buffer.append('J'); 457 } else { 458 if (!hasOneOf(meta54, hashable, offset + 1, 1) && !hasOneOf(meta55, hashable, offset - 1, 1)) { 459 buffer.append('J'); 460 } 461 } 462 } 463 } 464 if (hashable[offset + 1] == 'J') 465 offset += 2; 466 else 467 offset += 1; 468 break; 469 case 'K' : 470 if (hashable[offset + 1] == 'K') 471 offset += 2; 472 else 473 offset += 1; 474 buffer.append('K'); 475 break; 476 case 'L' : 477 if (hashable[offset + 1] == 'L') { 478 if (((offset == (hashable.length - 3)) && hasOneOf(meta56, hashable, offset - 1, 4)) || ((hasOneOf(meta57, hashable, (hashable.length - 1) - 1, 2) || hasOneOf(meta58, hashable, hashable.length - 1, 1)) && hasOneOf(meta59, hashable, offset - 1, 4))) { 479 buffer.append('L'); 480 offset += 2; 481 break; 482 } 483 offset += 2; 484 } else 485 offset += 1; 486 buffer.append('L'); 487 break; 488 case 'M' : 489 if ((hasOneOf(meta60, hashable, offset - 1, 3) && (((offset + 1) == (hashable.length - 1)) || hasOneOf(meta61, hashable, offset + 2, 2))) || (hashable[offset + 1] == 'M')) 490 offset += 2; 491 else 492 offset += 1; 493 buffer.append('M'); 494 break; 495 case 'N' : 496 if (hashable[offset + 1] == 'N') 497 offset += 2; 498 else 499 offset += 1; 500 buffer.append('N'); 501 break; 502 case '\u00D1' : 503 offset += 1; 504 buffer.append('N'); 505 break; 506 case 'P' : 507 if (hashable[offset + 1] == 'N') { 508 buffer.append('F'); 509 offset += 2; 510 break; 511 } 512 if (hasOneOf(meta62, hashable, offset + 1, 1)) 513 offset += 2; 514 else 515 offset += 1; 516 buffer.append('P'); 517 break; 518 case 'Q' : 519 if (hashable[offset + 1] == 'Q') 520 offset += 2; 521 else 522 offset += 1; 523 buffer.append('K'); 524 break; 525 case 'R' : 526 if (!((offset == (hashable.length - 1)) && !has95 && hasOneOf(meta63, hashable, offset - 2, 2) && !hasOneOf(meta64, hashable, offset - 4, 2))) 527 buffer.append('R'); 528 if (hashable[offset + 1] == 'R') 529 offset += 2; 530 else 531 offset += 1; 532 break; 533 case 'S' : 534 if (hasOneOf(meta65, hashable, offset - 1, 3)) { 535 offset += 1; 536 break; 537 } 538 if ((offset == 0) && hasOneOf(meta66, hashable, offset, 5)) { 539 buffer.append('X'); 540 offset += 1; 541 break; 542 } 543 if (hasOneOf(meta67, hashable, offset, 2)) { 544 if (hasOneOf(meta68, hashable, offset + 1, 4)) 545 buffer.append('S'); 546 else 547 buffer.append('X'); 548 offset += 2; 549 break; 550 } 551 if (hasOneOf(meta69, hashable, offset, 3) || hasOneOf(meta70, hashable, offset, 4)) { 552 buffer.append('S'); 553 offset += 3; 554 break; 555 } 556 if (((offset == 0) && hasOneOf(meta71, hashable, offset + 1, 1)) || hasOneOf(meta72, hashable, offset + 1, 1)) { 557 buffer.append('S'); 558 if (hasOneOf(meta73, hashable, offset + 1, 1)) 559 offset += 2; 560 else 561 offset += 1; 562 break; 563 } 564 if (hasOneOf(meta74, hashable, offset, 2)) { 565 if (hashable[offset + 2] == 'H') 566 if (hasOneOf(meta75, hashable, offset + 3, 2)) { 567 if (hasOneOf(meta76, hashable, offset + 3, 2)) { 568 buffer.append("X"); } else { 570 buffer.append("SK"); } 572 offset += 3; 573 break; 574 } else { 575 buffer.append('X'); 576 offset += 3; 577 break; 578 } 579 if (hasOneOf(meta77, hashable, offset + 2, 1)) { 580 buffer.append('S'); 581 offset += 3; 582 break; 583 } 584 buffer.append("SK"); offset += 3; 586 break; 587 } 588 if (!((offset == (hashable.length - 1)) && hasOneOf(meta78, hashable, offset - 2, 2))) 589 buffer.append('S'); 590 if (hasOneOf(meta79, hashable, offset + 1, 1)) 591 offset += 2; 592 else 593 offset += 1; 594 break; 595 case 'T' : 596 if (hasOneOf(meta80, hashable, offset, 4)) { 597 buffer.append('X'); 598 offset += 3; 599 break; 600 } 601 if (hasOneOf(meta81, hashable, offset, 3)) { 602 buffer.append('X'); 603 offset += 3; 604 break; 605 } 606 if (hasOneOf(meta82, hashable, offset, 2) || hasOneOf(meta83, hashable, offset, 3)) { 607 if (hasOneOf(meta84, hashable, (offset + 2), 2) || hasOneOf(meta85, hashable, 0, 4) || hasOneOf(meta86, hashable, 0, 3)) { 608 buffer.append('T'); 609 } else { 610 buffer.append('0'); 611 } 612 offset += 2; 613 break; 614 } 615 if (hasOneOf(meta87, hashable, offset + 1, 1)) { 616 offset += 2; 617 } else 618 offset += 1; 619 buffer.append('T'); 620 break; 621 case 'V' : 622 if (hashable[offset + 1] == 'V') 623 offset += 2; 624 else 625 offset += 1; 626 buffer.append('F'); 627 break; 628 case 'W' : 629 if (hasOneOf(meta88, hashable, offset, 2)) { 630 buffer.append('R'); 631 offset += 2; 632 break; 633 } 634 if ((offset == 0) && (hasVowel(hashable, offset + 1, hashable.length) || hasOneOf(meta89, hashable, offset, 2))) { 635 buffer.append('A'); 636 } 637 if (((offset == (hashable.length - 1)) && hasVowel(hashable, offset - 1, hashable.length)) || hasOneOf(meta90, hashable, offset - 1, 5) || hasOneOf(meta91, hashable, 0, 3)) { 638 buffer.append('F'); 639 offset += 1; 640 break; 641 } 642 if (hasOneOf(meta92, hashable, offset, 4)) { 643 buffer.append("TS"); offset += 4; 645 break; 646 } 647 offset += 1; 648 break; 649 case 'X' : 650 if (!((offset == (hashable.length - 1)) && (hasOneOf(meta93, hashable, offset - 3, 3) || hasOneOf(meta94, hashable, offset - 2, 2)))) 651 buffer.append("KS"); if (hasOneOf(meta49, hashable, offset + 1, 1)) 653 offset += 2; 654 else 655 offset += 1; 656 break; 657 case 'Z' : 658 if (hashable[offset + 1] == 'H') { 659 buffer.append('J'); 660 offset += 2; 661 break; 662 } else { 663 buffer.append('S'); 664 } 665 if (hashable[offset + 1] == 'Z') 666 offset += 2; 667 else 668 offset += 1; 669 break; 670 default : 671 offset += 1; 672 } 673 } 674 return buffer.toString(); 675 } 676 677 680 public final char[] getMutators() { 681 return MUTATOR_CHARACTERS; 682 } 683 } 684 | Popular Tags |