1 16 17 package org.apache.commons.codec.language; 18 19 import junit.framework.Test; 20 import junit.framework.TestSuite; 21 import org.apache.commons.codec.EncoderException; 22 import org.apache.commons.codec.StringEncoder; 23 import org.apache.commons.codec.StringEncoderAbstractTest; 24 25 32 public class DoubleMetaphoneTest extends StringEncoderAbstractTest { 33 34 43 private static final String [][] FIXTURE = { { "Accosinly", "Occasionally" }, { 44 "Ciculer", "Circler" }, { 45 "Circue", "Circle" }, { 46 "Maddness", "Madness" }, { 47 "Occusionaly", "Occasionally" }, { 48 "Steffen", "Stephen" }, { 49 "Thw", "The" }, { 50 "Unformanlly", "Unfortunately" }, { 51 "Unfortally", "Unfortunately" }, { 52 "abilitey", "ability" }, { 53 "abouy", "about" }, { 54 "absorbtion", "absorption" }, { 55 "accidently", "accidentally" }, { 56 "accomodate", "accommodate" }, { 57 "acommadate", "accommodate" }, { 58 "acord", "accord" }, { 59 "adultry", "adultery" }, { 60 "aggresive", "aggressive" }, { 61 "alchohol", "alcohol" }, { 62 "alchoholic", "alcoholic" }, { 63 "allieve", "alive" }, { 64 "alot", "a lot" }, { 65 "alright", "all right" }, { 66 "amature", "amateur" }, { 67 "ambivilant", "ambivalent" }, { 68 "amification", "amplification" }, { 69 "amourfous", "amorphous" }, { 70 "annoint", "anoint" }, { 71 "annonsment", "announcement" }, { 72 "annoyting", "anting" }, { 73 "annuncio", "announce" }, { 74 "anonomy", "anatomy" }, { 75 "anotomy", "anatomy" }, { 76 "antidesestablishmentarianism", "antidisestablishmentarianism" }, { 77 "antidisestablishmentarism", "antidisestablishmentarianism" }, { 78 "anynomous", "anonymous" }, { 79 "appelet", "applet" }, { 80 "appreceiated", "appreciated" }, { 81 "appresteate", "appreciate" }, { 82 "aquantance", "acquaintance" }, { 83 "aratictature", "architecture" }, { 84 "archeype", "archetype" }, { 85 "aricticure", "architecture" }, { 86 "artic", "arctic" }, { 87 "asentote", "asymptote" }, { 88 "ast", "at" }, { 89 "asterick", "asterisk" }, { 90 "asymetric", "asymmetric" }, { 91 "atentively", "attentively" }, { 92 "autoamlly", "automatically" }, { 93 "bankrot", "bankrupt" }, { 94 "basicly", "basically" }, { 95 "batallion", "battalion" }, { 96 "bbrose", "browse" }, { 97 "beauro", "bureau" }, { 98 "beaurocracy", "bureaucracy" }, { 99 "beggining", "beginning" }, { 100 "beging", "beginning" }, { 101 "behaviour", "behavior" }, { 102 "beleive", "believe" }, { 103 "belive", "believe" }, { 104 "benidifs", "benefits" }, { 105 "bigginging", "beginning" }, { 106 "blait", "bleat" }, { 107 "bouyant", "buoyant" }, { 108 "boygot", "boycott" }, { 109 "brocolli", "broccoli" }, { 110 "buch", "bush" }, { 111 "buder", "butter" }, { 112 "budr", "butter" }, { 113 "budter", "butter" }, { 114 "buracracy", "bureaucracy" }, { 115 "burracracy", "bureaucracy" }, { 116 "buton", "button" }, { 117 "byby", "by by" }, { 118 "cauler", "caller" }, { 119 "ceasar", "caesar" }, { 120 "cemetary", "cemetery" }, { 121 "changeing", "changing" }, { 122 "cheet", "cheat" }, { 123 "cicle", "circle" }, { 124 "cimplicity", "simplicity" }, { 125 "circumstaces", "circumstances" }, { 126 "clob", "club" }, { 127 "coaln", "colon" }, { 128 "cocamena", "cockamamie" }, { 129 "colleaque", "colleague" }, { 130 "colloquilism", "colloquialism" }, { 131 "columne", "column" }, { 132 "comiler", "compiler" }, { 133 "comitmment", "commitment" }, { 134 "comitte", "committee" }, { 135 "comittmen", "commitment" }, { 136 "comittmend", "commitment" }, { 137 "commerciasl", "commercials" }, { 138 "commited", "committed" }, { 139 "commitee", "committee" }, { 140 "companys", "companies" }, { 141 "compicated", "complicated" }, { 142 "comupter", "computer" }, { 143 "concensus", "consensus" }, { 144 "confusionism", "confucianism" }, { 145 "congradulations", "congratulations" }, { 146 "conibation", "contribution" }, { 147 "consident", "consistent" }, { 148 "consident", "consonant" }, { 149 "contast", "constant" }, { 150 "contastant", "constant" }, { 151 "contunie", "continue" }, { 152 "cooly", "coolly" }, { 153 "copping", "coping" }, { 154 "cosmoplyton", "cosmopolitan" }, { 155 "courst", "court" }, { 156 "crasy", "crazy" }, { 157 "cravets", "caveats" }, { 158 "credetability", "credibility" }, { 159 "criqitue", "critique" }, { 160 "croke", "croak" }, { 161 "crucifiction", "crucifixion" }, { 162 "crusifed", "crucified" }, { 163 "ctitique", "critique" }, { 164 "cumba", "combo" }, { 165 "custamisation", "customization" }, { 166 "dag", "dog" }, { 167 "daly", "daily" }, { 168 "danguages", "dangerous" }, { 169 "deaft", "draft" }, { 170 "defence", "defense" }, { 171 "defenly", "defiantly" }, { 172 "definate", "definite" }, { 173 "definately", "definitely" }, { 174 "dependeble", "dependable" }, { 175 "descrption", "description" }, { 176 "descrptn", "description" }, { 177 "desparate", "desperate" }, { 178 "dessicate", "desiccate" }, { 179 "destint", "distant" }, { 180 "develepment", "developments" }, { 181 "developement", "development" }, { 182 "develpond", "development" }, { 183 "devulge", "divulge" }, { 184 "diagree", "disagree" }, { 185 "dieties", "deities" }, { 186 "dinasaur", "dinosaur" }, { 187 "dinasour", "dinosaur" }, { 188 "direcyly", "directly" }, { 189 "discuess", "discuss" }, { 190 "disect", "dissect" }, { 191 "disippate", "dissipate" }, { 192 "disition", "decision" }, { 193 "dispair", "despair" }, { 194 "disssicion", "discussion" }, { 195 "distarct", "distract" }, { 196 "distart", "distort" }, { 197 "distroy", "destroy" }, { 198 "documtations", "documentation" }, { 199 "doenload", "download" }, { 200 "dongle", "dangle" }, { 201 "doog", "dog" }, { 202 "dramaticly", "dramatically" }, { 203 "drunkeness", "drunkenness" }, { 204 "ductioneery", "dictionary" }, { 205 "dur", "due" }, { 206 "duren", "during" }, { 207 "dymatic", "dynamic" }, { 208 "dynaic", "dynamic" }, { 209 "ecstacy", "ecstasy" }, { 210 "efficat", "efficient" }, { 211 "efficity", "efficacy" }, { 212 "effots", "efforts" }, { 213 "egsistence", "existence" }, { 214 "eitiology", "etiology" }, { 215 "elagent", "elegant" }, { 216 "elligit", "elegant" }, { 217 "embarass", "embarrass" }, { 218 "embarassment", "embarrassment" }, { 219 "embaress", "embarrass" }, { 220 "encapsualtion", "encapsulation" }, { 221 "encyclapidia", "encyclopedia" }, { 222 "encyclopia", "encyclopedia" }, { 223 "engins", "engine" }, { 224 "enhence", "enhance" }, { 225 "enligtment", "Enlightenment" }, { 226 "ennuui", "ennui" }, { 227 "enought", "enough" }, { 228 "enventions", "inventions" }, { 229 "envireminakl", "environmental" }, { 230 "enviroment", "environment" }, { 231 "epitomy", "epitome" }, { 232 "equire", "acquire" }, { 233 "errara", "error" }, { 234 "erro", "error" }, { 235 "evaualtion", "evaluation" }, { 236 "evething", "everything" }, { 237 "evtually", "eventually" }, { 238 "excede", "exceed" }, { 239 "excercise", "exercise" }, { 240 "excpt", "except" }, { 241 "excution", "execution" }, { 242 "exhileration", "exhilaration" }, { 243 "existance", "existence" }, { 244 "expleyly", "explicitly" }, { 245 "explity", "explicitly" }, { 246 "expresso", "espresso" }, { 247 "exspidient", "expedient" }, { 248 "extions", "extensions" }, { 249 "factontion", "factorization" }, { 250 "failer", "failure" }, { 251 "famdasy", "fantasy" }, { 252 "faver", "favor" }, { 253 "faxe", "fax" }, { 254 "febuary", "february" }, { 255 "firey", "fiery" }, { 256 "fistival", "festival" }, { 257 "flatterring", "flattering" }, { 258 "fluk", "flux" }, { 259 "flukse", "flux" }, { 260 "fone", "phone" }, { 261 "forsee", "foresee" }, { 262 "frustartaion", "frustrating" }, { 263 "fuction", "function" }, { 264 "funetik", "phonetic" }, { 265 "futs", "guts" }, { 266 "gamne", "came" }, { 267 "gaurd", "guard" }, { 268 "generly", "generally" }, { 269 "ghandi", "gandhi" }, { 270 "goberment", "government" }, { 271 "gobernement", "government" }, { 272 "gobernment", "government" }, { 273 "gotton", "gotten" }, { 274 "gracefull", "graceful" }, { 275 "gradualy", "gradually" }, { 276 "grammer", "grammar" }, { 277 "hallo", "hello" }, { 278 "hapily", "happily" }, { 279 "harrass", "harass" }, { 280 "havne", "have" }, { 281 "heellp", "help" }, { 282 "heighth", "height" }, { 283 "hellp", "help" }, { 284 "helo", "hello" }, { 285 "herlo", "hello" }, { 286 "hifin", "hyphen" }, { 287 "hifine", "hyphen" }, { 288 "higer", "higher" }, { 289 "hiphine", "hyphen" }, { 290 "hippie", "hippy" }, { 291 "hippopotamous", "hippopotamus" }, { 292 "hlp", "help" }, { 293 "hourse", "horse" }, { 294 "houssing", "housing" }, { 295 "howaver", "however" }, { 296 "howver", "however" }, { 297 "humaniti", "humanity" }, { 298 "hyfin", "hyphen" }, { 299 "hypotathes", "hypothesis" }, { 300 "hypotathese", "hypothesis" }, { 301 "hystrical", "hysterical" }, { 302 "ident", "indent" }, { 303 "illegitament", "illegitimate" }, { 304 "imbed", "embed" }, { 305 "imediaetly", "immediately" }, { 306 "imfamy", "infamy" }, { 307 "immenant", "immanent" }, { 308 "implemtes", "implements" }, { 309 "inadvertant", "inadvertent" }, { 310 "incase", "in case" }, { 311 "incedious", "insidious" }, { 312 "incompleet", "incomplete" }, { 313 "incomplot", "incomplete" }, { 314 "inconvenant", "inconvenient" }, { 315 "inconvience", "inconvenience" }, { 316 "independant", "independent" }, { 317 "independenent", "independent" }, { 318 "indepnends", "independent" }, { 319 "indepth", "in depth" }, { 320 "indispensible", "indispensable" }, { 321 "inefficite", "inefficient" }, { 322 "inerface", "interface" }, { 323 "infact", "in fact" }, { 324 "influencial", "influential" }, { 325 "inital", "initial" }, { 326 "initinized", "initialized" }, { 327 "initized", "initialized" }, { 328 "innoculate", "inoculate" }, { 329 "insistant", "insistent" }, { 330 "insistenet", "insistent" }, { 331 "instulation", "installation" }, { 332 "intealignt", "intelligent" }, { 333 "intejilent", "intelligent" }, { 334 "intelegent", "intelligent" }, { 335 "intelegnent", "intelligent" }, { 336 "intelejent", "intelligent" }, { 337 "inteligent", "intelligent" }, { 338 "intelignt", "intelligent" }, { 339 "intellagant", "intelligent" }, { 340 "intellegent", "intelligent" }, { 341 "intellegint", "intelligent" }, { 342 "intellgnt", "intelligent" }, { 343 "intensionality", "intensionally" }, { 344 "interate", "iterate" }, { 345 "internation", "international" }, { 346 "interpretate", "interpret" }, { 347 "interpretter", "interpreter" }, { 348 "intertes", "interested" }, { 349 "intertesd", "interested" }, { 350 "invermeantial", "environmental" }, { 351 "irregardless", "regardless" }, { 352 "irresistable", "irresistible" }, { 353 "irritible", "irritable" }, { 354 "islams", "muslims" }, { 355 "isotrop", "isotope" }, { 356 "isreal", "israel" }, { 357 "johhn", "john" }, { 358 "judgement", "judgment" }, { 359 "kippur", "kipper" }, { 360 "knawing", "knowing" }, { 361 "latext", "latest" }, { 362 "leasve", "leave" }, { 363 "lesure", "leisure" }, { 364 "liasion", "lesion" }, { 365 "liason", "liaison" }, { 366 "libary", "library" }, { 367 "likly", "likely" }, { 368 "lilometer", "kilometer" }, { 369 "liquify", "liquefy" }, { 370 "lloyer", "layer" }, { 371 "lossing", "losing" }, { 372 "luser", "laser" }, { 373 "maintanence", "maintenance" }, { 374 "majaerly", "majority" }, { 375 "majoraly", "majority" }, { 376 "maks", "masks" }, { 377 "mandelbrot", "Mandelbrot" }, { 378 "mant", "want" }, { 379 "marshall", "marshal" }, { 380 "maxium", "maximum" }, { 381 "meory", "memory" }, { 382 "metter", "better" }, { 383 "mic", "mike" }, { 384 "midia", "media" }, { 385 "millenium", "millennium" }, { 386 "miniscule", "minuscule" }, { 387 "minkay", "monkey" }, { 388 "minum", "minimum" }, { 389 "mischievious", "mischievous" }, { 390 "misilous", "miscellaneous" }, { 391 "momento", "memento" }, { 392 "monkay", "monkey" }, { 393 "mosaik", "mosaic" }, { 394 "mostlikely", "most likely" }, { 395 "mousr", "mouser" }, { 396 "mroe", "more" }, { 397 "neccessary", "necessary" }, { 398 "necesary", "necessary" }, { 399 "necesser", "necessary" }, { 400 "neice", "niece" }, { 401 "neighbour", "neighbor" }, { 402 "nemonic", "pneumonic" }, { 403 "nevade", "Nevada" }, { 404 "nickleodeon", "nickelodeon" }, { 405 "nieve", "naive" }, { 406 "noone", "no one" }, { 407 "noticably", "noticeably" }, { 408 "notin", "not in" }, { 409 "nozled", "nuzzled" }, { 410 "objectsion", "objects" }, { 411 "obsfuscate", "obfuscate" }, { 412 "ocassion", "occasion" }, { 413 "occuppied", "occupied" }, { 414 "occurence", "occurrence" }, { 415 "octagenarian", "octogenarian" }, { 416 "olf", "old" }, { 417 "opposim", "opossum" }, { 418 "organise", "organize" }, { 419 "organiz", "organize" }, { 420 "orientate", "orient" }, { 421 "oscilascope", "oscilloscope" }, { 422 "oving", "moving" }, { 423 "paramers", "parameters" }, { 424 "parametic", "parameter" }, { 425 "paranets", "parameters" }, { 426 "partrucal", "particular" }, { 427 "pataphysical", "metaphysical" }, { 428 "patten", "pattern" }, { 429 "permissable", "permissible" }, { 430 "permition", "permission" }, { 431 "permmasivie", "permissive" }, { 432 "perogative", "prerogative" }, { 433 "persue", "pursue" }, { 434 "phantasia", "fantasia" }, { 435 "phenominal", "phenomenal" }, { 436 "picaresque", "picturesque" }, { 437 "playwrite", "playwright" }, { 438 "poeses", "poesies" }, { 439 "polation", "politician" }, { 440 "poligamy", "polygamy" }, { 441 "politict", "politic" }, { 442 "pollice", "police" }, { 443 "polypropalene", "polypropylene" }, { 444 "pompom", "pompon" }, { 445 "possable", "possible" }, { 446 "practicle", "practical" }, { 447 "pragmaticism", "pragmatism" }, { 448 "preceeding", "preceding" }, { 449 "precion", "precision" }, { 450 "precios", "precision" }, { 451 "preemptory", "peremptory" }, { 452 "prefices", "prefixes" }, { 453 "prefixt", "prefixed" }, { 454 "presbyterian", "Presbyterian" }, { 455 "presue", "pursue" }, { 456 "presued", "pursued" }, { 457 "privielage", "privilege" }, { 458 "priviledge", "privilege" }, { 459 "proceedures", "procedures" }, { 460 "pronensiation", "pronunciation" }, { 461 "pronisation", "pronunciation" }, { 462 "pronounciation", "pronunciation" }, { 463 "properally", "properly" }, { 464 "proplematic", "problematic" }, { 465 "protray", "portray" }, { 466 "pscolgst", "psychologist" }, { 467 "psicolagest", "psychologist" }, { 468 "psycolagest", "psychologist" }, { 469 "quoz", "quiz" }, { 470 "radious", "radius" }, { 471 "ramplily", "rampantly" }, { 472 "reccomend", "recommend" }, { 473 "reccona", "raccoon" }, { 474 "recieve", "receive" }, { 475 "reconise", "recognize" }, { 476 "rectangeles", "rectangle" }, { 477 "redign", "redesign" }, { 478 "reoccurring", "recurring" }, { 479 "repitition", "repetition" }, { 480 "replasments", "replacement" }, { 481 "reposable", "responsible" }, { 482 "reseblence", "resemblance" }, { 483 "respct", "respect" }, { 484 "respecally", "respectfully" }, { 485 "roon", "room" }, { 486 "rought", "roughly" }, { 487 "rsx", "RSX" }, { 488 "rudemtry", "rudimentary" }, { 489 "runnung", "running" }, { 490 "sacreligious", "sacrilegious" }, { 491 "saftly", "safely" }, { 492 "salut", "salute" }, { 493 "satifly", "satisfy" }, { 494 "scrabdle", "scrabble" }, { 495 "searcheable", "searchable" }, { 496 "secion", "section" }, { 497 "seferal", "several" }, { 498 "segements", "segments" }, { 499 "sence", "sense" }, { 500 "seperate", "separate" }, { 501 "sherbert", "sherbet" }, { 502 "sicolagest", "psychologist" }, { 503 "sieze", "seize" }, { 504 "simpfilty", "simplicity" }, { 505 "simplye", "simply" }, { 506 "singal", "signal" }, { 507 "sitte", "site" }, { 508 "situration", "situation" }, { 509 "slyph", "sylph" }, { 510 "smil", "smile" }, { 511 "snuck", "sneaked" }, { 512 "sometmes", "sometimes" }, { 513 "soonec", "sonic" }, { 514 "specificialy", "specifically" }, { 515 "spel", "spell" }, { 516 "spoak", "spoke" }, { 517 "sponsered", "sponsored" }, { 518 "stering", "steering" }, { 519 "straightjacket", "straitjacket" }, { 520 "stumach", "stomach" }, { 521 "stutent", "student" }, { 522 "styleguide", "style guide" }, { 523 "subisitions", "substitutions" }, { 524 "subjecribed", "subscribed" }, { 525 "subpena", "subpoena" }, { 526 "substations", "substitutions" }, { 527 "suger", "sugar" }, { 528 "supercede", "supersede" }, { 529 "superfulous", "superfluous" }, { 530 "susan", "Susan" }, { 531 "swimwear", "swim wear" }, { 532 "syncorization", "synchronization" }, { 533 "taff", "tough" }, { 534 "taht", "that" }, { 535 "tattos", "tattoos" }, { 536 "techniquely", "technically" }, { 537 "teh", "the" }, { 538 "tem", "team" }, { 539 "teo", "two" }, { 540 "teridical", "theoretical" }, { 541 "tesst", "test" }, { 542 "tets", "tests" }, { 543 "thanot", "than or" }, { 544 "theirselves", "themselves" }, { 545 "theridically", "theoretical" }, { 546 "thredically", "theoretically" }, { 547 "thruout", "throughout" }, { 548 "ths", "this" }, { 549 "titalate", "titillate" }, { 550 "tobagan", "tobaggon" }, { 551 "tommorrow", "tomorrow" }, { 552 "tomorow", "tomorrow" }, { 553 "tradegy", "tragedy" }, { 554 "trubbel", "trouble" }, { 555 "ttest", "test" }, { 556 "tunnellike", "tunnel like" }, { 557 "tured", "turned" }, { 558 "tyrrany", "tyranny" }, { 559 "unatourral", "unnatural" }, { 560 "unaturral", "unnatural" }, { 561 "unconisitional", "unconstitutional" }, { 562 "unconscience", "unconscious" }, { 563 "underladder", "under ladder" }, { 564 "unentelegible", "unintelligible" }, { 565 "unfortunently", "unfortunately" }, { 566 "unnaturral", "unnatural" }, { 567 "upcast", "up cast" }, { 568 "upmost", "utmost" }, { 569 "uranisium", "uranium" }, { 570 "verison", "version" }, { 571 "vinagarette", "vinaigrette" }, { 572 "volumptuous", "voluptuous" }, { 573 "volunteerism", "voluntarism" }, { 574 "volye", "volley" }, { 575 "wadting", "wasting" }, { 576 "waite", "wait" }, { 577 "wan't", "won't" }, { 578 "warloord", "warlord" }, { 579 "whaaat", "what" }, { 580 "whard", "ward" }, { 581 "whimp", "wimp" }, { 582 "wicken", "weaken" }, { 583 "wierd", "weird" }, { 584 "wrank", "rank" }, { 585 "writeen", "righten" }, { 586 "writting", "writing" }, { 587 "wundeews", "windows" }, { 588 "yeild", "yield" }, { 589 "youe", "your" } 590 }; 591 592 595 private static final String [][] MATCHES = { { "Accosinly", "Occasionally" }, { 596 "Maddness", "Madness" }, { 597 "Occusionaly", "Occasionally" }, { 598 "Steffen", "Stephen" }, { 599 "Thw", "The" }, { 600 "Unformanlly", "Unfortunately" }, { 601 "Unfortally", "Unfortunately" }, { 602 "abilitey", "ability" }, { 603 "absorbtion", "absorption" }, { 604 "accidently", "accidentally" }, { 605 "accomodate", "accommodate" }, { 606 "acommadate", "accommodate" }, { 607 "acord", "accord" }, { 608 "adultry", "adultery" }, { 609 "aggresive", "aggressive" }, { 610 "alchohol", "alcohol" }, { 611 "alchoholic", "alcoholic" }, { 612 "allieve", "alive" }, { 613 "alot", "a lot" }, { 614 "alright", "all right" }, { 615 "amature", "amateur" }, { 616 "ambivilant", "ambivalent" }, { 617 "amourfous", "amorphous" }, { 618 "annoint", "anoint" }, { 619 "annonsment", "announcement" }, { 620 "annoyting", "anting" }, { 621 "annuncio", "announce" }, { 622 "anotomy", "anatomy" }, { 623 "antidesestablishmentarianism", "antidisestablishmentarianism" }, { 624 "antidisestablishmentarism", "antidisestablishmentarianism" }, { 625 "anynomous", "anonymous" }, { 626 "appelet", "applet" }, { 627 "appreceiated", "appreciated" }, { 628 "appresteate", "appreciate" }, { 629 "aquantance", "acquaintance" }, { 630 "aricticure", "architecture" }, { 631 "asterick", "asterisk" }, { 632 "asymetric", "asymmetric" }, { 633 "atentively", "attentively" }, { 634 "bankrot", "bankrupt" }, { 635 "basicly", "basically" }, { 636 "batallion", "battalion" }, { 637 "bbrose", "browse" }, { 638 "beauro", "bureau" }, { 639 "beaurocracy", "bureaucracy" }, { 640 "beggining", "beginning" }, { 641 "behaviour", "behavior" }, { 642 "beleive", "believe" }, { 643 "belive", "believe" }, { 644 "blait", "bleat" }, { 645 "bouyant", "buoyant" }, { 646 "boygot", "boycott" }, { 647 "brocolli", "broccoli" }, { 648 "buder", "butter" }, { 649 "budr", "butter" }, { 650 "budter", "butter" }, { 651 "buracracy", "bureaucracy" }, { 652 "burracracy", "bureaucracy" }, { 653 "buton", "button" }, { 654 "byby", "by by" }, { 655 "cauler", "caller" }, { 656 "ceasar", "caesar" }, { 657 "cemetary", "cemetery" }, { 658 "changeing", "changing" }, { 659 "cheet", "cheat" }, { 660 "cimplicity", "simplicity" }, { 661 "circumstaces", "circumstances" }, { 662 "clob", "club" }, { 663 "coaln", "colon" }, { 664 "colleaque", "colleague" }, { 665 "colloquilism", "colloquialism" }, { 666 "columne", "column" }, { 667 "comitmment", "commitment" }, { 668 "comitte", "committee" }, { 669 "comittmen", "commitment" }, { 670 "comittmend", "commitment" }, { 671 "commerciasl", "commercials" }, { 672 "commited", "committed" }, { 673 "commitee", "committee" }, { 674 "companys", "companies" }, { 675 "comupter", "computer" }, { 676 "concensus", "consensus" }, { 677 "confusionism", "confucianism" }, { 678 "congradulations", "congratulations" }, { 679 "contunie", "continue" }, { 680 "cooly", "coolly" }, { 681 "copping", "coping" }, { 682 "cosmoplyton", "cosmopolitan" }, { 683 "crasy", "crazy" }, { 684 "croke", "croak" }, { 685 "crucifiction", "crucifixion" }, { 686 "crusifed", "crucified" }, { 687 "cumba", "combo" }, { 688 "custamisation", "customization" }, { 689 "dag", "dog" }, { 690 "daly", "daily" }, { 691 "defence", "defense" }, { 692 "definate", "definite" }, { 693 "definately", "definitely" }, { 694 "dependeble", "dependable" }, { 695 "descrption", "description" }, { 696 "descrptn", "description" }, { 697 "desparate", "desperate" }, { 698 "dessicate", "desiccate" }, { 699 "destint", "distant" }, { 700 "develepment", "developments" }, { 701 "developement", "development" }, { 702 "develpond", "development" }, { 703 "devulge", "divulge" }, { 704 "dieties", "deities" }, { 705 "dinasaur", "dinosaur" }, { 706 "dinasour", "dinosaur" }, { 707 "discuess", "discuss" }, { 708 "disect", "dissect" }, { 709 "disippate", "dissipate" }, { 710 "disition", "decision" }, { 711 "dispair", "despair" }, { 712 "distarct", "distract" }, { 713 "distart", "distort" }, { 714 "distroy", "destroy" }, { 715 "doenload", "download" }, { 716 "dongle", "dangle" }, { 717 "doog", "dog" }, { 718 "dramaticly", "dramatically" }, { 719 "drunkeness", "drunkenness" }, { 720 "ductioneery", "dictionary" }, { 721 "ecstacy", "ecstasy" }, { 722 "egsistence", "existence" }, { 723 "eitiology", "etiology" }, { 724 "elagent", "elegant" }, { 725 "embarass", "embarrass" }, { 726 "embarassment", "embarrassment" }, { 727 "embaress", "embarrass" }, { 728 "encapsualtion", "encapsulation" }, { 729 "encyclapidia", "encyclopedia" }, { 730 "encyclopia", "encyclopedia" }, { 731 "engins", "engine" }, { 732 "enhence", "enhance" }, { 733 "ennuui", "ennui" }, { 734 "enventions", "inventions" }, { 735 "envireminakl", "environmental" }, { 736 "enviroment", "environment" }, { 737 "epitomy", "epitome" }, { 738 "equire", "acquire" }, { 739 "errara", "error" }, { 740 "evaualtion", "evaluation" }, { 741 "excede", "exceed" }, { 742 "excercise", "exercise" }, { 743 "excpt", "except" }, { 744 "exhileration", "exhilaration" }, { 745 "existance", "existence" }, { 746 "expleyly", "explicitly" }, { 747 "explity", "explicitly" }, { 748 "failer", "failure" }, { 749 "faver", "favor" }, { 750 "faxe", "fax" }, { 751 "firey", "fiery" }, { 752 "fistival", "festival" }, { 753 "flatterring", "flattering" }, { 754 "flukse", "flux" }, { 755 "fone", "phone" }, { 756 "forsee", "foresee" }, { 757 "frustartaion", "frustrating" }, { 758 "funetik", "phonetic" }, { 759 "gaurd", "guard" }, { 760 "generly", "generally" }, { 761 "ghandi", "gandhi" }, { 762 "gotton", "gotten" }, { 763 "gracefull", "graceful" }, { 764 "gradualy", "gradually" }, { 765 "grammer", "grammar" }, { 766 "hallo", "hello" }, { 767 "hapily", "happily" }, { 768 "harrass", "harass" }, { 769 "heellp", "help" }, { 770 "heighth", "height" }, { 771 "hellp", "help" }, { 772 "helo", "hello" }, { 773 "hifin", "hyphen" }, { 774 "hifine", "hyphen" }, { 775 "hiphine", "hyphen" }, { 776 "hippie", "hippy" }, { 777 "hippopotamous", "hippopotamus" }, { 778 "hourse", "horse" }, { 779 "houssing", "housing" }, { 780 "howaver", "however" }, { 781 "howver", "however" }, { 782 "humaniti", "humanity" }, { 783 "hyfin", "hyphen" }, { 784 "hystrical", "hysterical" }, { 785 "illegitament", "illegitimate" }, { 786 "imbed", "embed" }, { 787 "imediaetly", "immediately" }, { 788 "immenant", "immanent" }, { 789 "implemtes", "implements" }, { 790 "inadvertant", "inadvertent" }, { 791 "incase", "in case" }, { 792 "incedious", "insidious" }, { 793 "incompleet", "incomplete" }, { 794 "incomplot", "incomplete" }, { 795 "inconvenant", "inconvenient" }, { 796 "inconvience", "inconvenience" }, { 797 "independant", "independent" }, { 798 "independenent", "independent" }, { 799 "indepnends", "independent" }, { 800 "indepth", "in depth" }, { 801 "indispensible", "indispensable" }, { 802 "inefficite", "inefficient" }, { 803 "infact", "in fact" }, { 804 "influencial", "influential" }, { 805 "innoculate", "inoculate" }, { 806 "insistant", "insistent" }, { 807 "insistenet", "insistent" }, { 808 "instulation", "installation" }, { 809 "intealignt", "intelligent" }, { 810 "intelegent", "intelligent" }, { 811 "intelegnent", "intelligent" }, { 812 "intelejent", "intelligent" }, { 813 "inteligent", "intelligent" }, { 814 "intelignt", "intelligent" }, { 815 "intellagant", "intelligent" }, { 816 "intellegent", "intelligent" }, { 817 "intellegint", "intelligent" }, { 818 "intellgnt", "intelligent" }, { 819 "intensionality", "intensionally" }, { 820 "internation", "international" }, { 821 "interpretate", "interpret" }, { 822 "interpretter", "interpreter" }, { 823 "intertes", "interested" }, { 824 "intertesd", "interested" }, { 825 "invermeantial", "environmental" }, { 826 "irresistable", "irresistible" }, { 827 "irritible", "irritable" }, { 828 "isreal", "israel" }, { 829 "johhn", "john" }, { 830 "kippur", "kipper" }, { 831 "knawing", "knowing" }, { 832 "lesure", "leisure" }, { 833 "liasion", "lesion" }, { 834 "liason", "liaison" }, { 835 "likly", "likely" }, { 836 "liquify", "liquefy" }, { 837 "lloyer", "layer" }, { 838 "lossing", "losing" }, { 839 "luser", "laser" }, { 840 "maintanence", "maintenance" }, { 841 "mandelbrot", "Mandelbrot" }, { 842 "marshall", "marshal" }, { 843 "maxium", "maximum" }, { 844 "mic", "mike" }, { 845 "midia", "media" }, { 846 "millenium", "millennium" }, { 847 "miniscule", "minuscule" }, { 848 "minkay", "monkey" }, { 849 "mischievious", "mischievous" }, { 850 "momento", "memento" }, { 851 "monkay", "monkey" }, { 852 "mosaik", "mosaic" }, { 853 "mostlikely", "most likely" }, { 854 "mousr", "mouser" }, { 855 "mroe", "more" }, { 856 "necesary", "necessary" }, { 857 "necesser", "necessary" }, { 858 "neice", "niece" }, { 859 "neighbour", "neighbor" }, { 860 "nemonic", "pneumonic" }, { 861 "nevade", "Nevada" }, { 862 "nickleodeon", "nickelodeon" }, { 863 "nieve", "naive" }, { 864 "noone", "no one" }, { 865 "notin", "not in" }, { 866 "nozled", "nuzzled" }, { 867 "objectsion", "objects" }, { 868 "ocassion", "occasion" }, { 869 "occuppied", "occupied" }, { 870 "occurence", "occurrence" }, { 871 "octagenarian", "octogenarian" }, { 872 "opposim", "opossum" }, { 873 "organise", "organize" }, { 874 "organiz", "organize" }, { 875 "orientate", "orient" }, { 876 "oscilascope", "oscilloscope" }, { 877 "parametic", "parameter" }, { 878 "permissable", "permissible" }, { 879 "permmasivie", "permissive" }, { 880 "persue", "pursue" }, { 881 "phantasia", "fantasia" }, { 882 "phenominal", "phenomenal" }, { 883 "playwrite", "playwright" }, { 884 "poeses", "poesies" }, { 885 "poligamy", "polygamy" }, { 886 "politict", "politic" }, { 887 "pollice", "police" }, { 888 "polypropalene", "polypropylene" }, { 889 "possable", "possible" }, { 890 "practicle", "practical" }, { 891 "pragmaticism", "pragmatism" }, { 892 "preceeding", "preceding" }, { 893 "precios", "precision" }, { 894 "preemptory", "peremptory" }, { 895 "prefixt", "prefixed" }, { 896 "presbyterian", "Presbyterian" }, { 897 "presue", "pursue" }, { 898 "presued", "pursued" }, { 899 "privielage", "privilege" }, { 900 "priviledge", "privilege" }, { 901 "proceedures", "procedures" }, { 902 "pronensiation", "pronunciation" }, { 903 "pronounciation", "pronunciation" }, { 904 "properally", "properly" }, { 905 "proplematic", "problematic" }, { 906 "protray", "portray" }, { 907 "pscolgst", "psychologist" }, { 908 "psicolagest", "psychologist" }, { 909 "psycolagest", "psychologist" }, { 910 "quoz", "quiz" }, { 911 "radious", "radius" }, { 912 "reccomend", "recommend" }, { 913 "reccona", "raccoon" }, { 914 "recieve", "receive" }, { 915 "reconise", "recognize" }, { 916 "rectangeles", "rectangle" }, { 917 "reoccurring", "recurring" }, { 918 "repitition", "repetition" }, { 919 "replasments", "replacement" }, { 920 "respct", "respect" }, { 921 "respecally", "respectfully" }, { 922 "rsx", "RSX" }, { 923 "runnung", "running" }, { 924 "sacreligious", "sacrilegious" }, { 925 "salut", "salute" }, { 926 "searcheable", "searchable" }, { 927 "seferal", "several" }, { 928 "segements", "segments" }, { 929 "sence", "sense" }, { 930 "seperate", "separate" }, { 931 "sicolagest", "psychologist" }, { 932 "sieze", "seize" }, { 933 "simplye", "simply" }, { 934 "sitte", "site" }, { 935 "slyph", "sylph" }, { 936 "smil", "smile" }, { 937 "sometmes", "sometimes" }, { 938 "soonec", "sonic" }, { 939 "specificialy", "specifically" }, { 940 "spel", "spell" }, { 941 "spoak", "spoke" }, { 942 "sponsered", "sponsored" }, { 943 "stering", "steering" }, { 944 "straightjacket", "straitjacket" }, { 945 "stumach", "stomach" }, { 946 "stutent", "student" }, { 947 "styleguide", "style guide" }, { 948 "subpena", "subpoena" }, { 949 "substations", "substitutions" }, { 950 "supercede", "supersede" }, { 951 "superfulous", "superfluous" }, { 952 "susan", "Susan" }, { 953 "swimwear", "swim wear" }, { 954 "syncorization", "synchronization" }, { 955 "taff", "tough" }, { 956 "taht", "that" }, { 957 "tattos", "tattoos" }, { 958 "techniquely", "technically" }, { 959 "teh", "the" }, { 960 "tem", "team" }, { 961 "teo", "two" }, { 962 "teridical", "theoretical" }, { 963 "tesst", "test" }, { 964 "theridically", "theoretical" }, { 965 "thredically", "theoretically" }, { 966 "thruout", "throughout" }, { 967 "ths", "this" }, { 968 "titalate", "titillate" }, { 969 "tobagan", "tobaggon" }, { 970 "tommorrow", "tomorrow" }, { 971 "tomorow", "tomorrow" }, { 972 "trubbel", "trouble" }, { 973 "ttest", "test" }, { 974 "tyrrany", "tyranny" }, { 975 "unatourral", "unnatural" }, { 976 "unaturral", "unnatural" }, { 977 "unconisitional", "unconstitutional" }, { 978 "unconscience", "unconscious" }, { 979 "underladder", "under ladder" }, { 980 "unentelegible", "unintelligible" }, { 981 "unfortunently", "unfortunately" }, { 982 "unnaturral", "unnatural" }, { 983 "upcast", "up cast" }, { 984 "verison", "version" }, { 985 "vinagarette", "vinaigrette" }, { 986 "volunteerism", "voluntarism" }, { 987 "volye", "volley" }, { 988 "waite", "wait" }, { 989 "wan't", "won't" }, { 990 "warloord", "warlord" }, { 991 "whaaat", "what" }, { 992 "whard", "ward" }, { 993 "whimp", "wimp" }, { 994 "wicken", "weaken" }, { 995 "wierd", "weird" }, { 996 "wrank", "rank" }, { 997 "writeen", "righten" }, { 998 "writting", "writing" }, { 999 "wundeews", "windows" }, { 1000 "yeild", "yield" }, }; 1001 1002 public static Test suite() { 1003 return (new TestSuite(DoubleMetaphoneTest.class)); 1004 } 1005 1006 private DoubleMetaphone doubleMetaphone = null; 1007 1008 public DoubleMetaphoneTest(String name) { 1009 super(name); 1010 } 1011 1012 1015 private void assertDoubleMetaphone(String expected, String source) { 1016 assertEquals(expected, this.getDoubleMetaphone().encode(source)); 1017 try { 1018 assertEquals(expected, this.getDoubleMetaphone().encode((Object ) source)); 1019 } catch (EncoderException e) { 1020 fail("Unexpected expection: " + e); 1021 } 1022 assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source)); 1023 assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source, false)); 1024 } 1025 1026 1029 public void assertDoubleMetaphoneAlt(String expected, String source) { 1030 assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source, true)); 1031 } 1032 1033 public void doubleMetaphoneEqualTest(String [][] pairs, boolean useAlternate) { 1034 this.validateFixture(pairs); 1035 for (int i = 0; i < pairs.length; i++) { 1036 String name0 = pairs[i][0]; 1037 String name1 = pairs[i][1]; 1038 String failMsg = "Expected match between " + name0 + " and " + name1 + " (use alternate: " + useAlternate + ")"; 1039 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, useAlternate)); 1040 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name1, name0, useAlternate)); 1041 if (!useAlternate) { 1042 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1)); 1043 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name1, name0)); 1044 } 1045 } 1046 } 1047 1048 public void doubleMetaphoneNotEqualTest(boolean alternate) { 1049 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Brain", "Band", alternate)); 1050 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Band", "Brain", alternate)); 1051 1052 if (!alternate) { 1053 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Brain", "Band")); 1054 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Band", "Brain")); 1055 } 1056 } 1057 1058 private DoubleMetaphone getDoubleMetaphone() { 1059 return this.doubleMetaphone; 1060 } 1061 1062 protected StringEncoder makeEncoder() { 1063 return new Metaphone(); 1064 } 1065 1066 private void setDoubleMetaphone(DoubleMetaphone doubleMetaphone) { 1067 this.doubleMetaphone = doubleMetaphone; 1068 } 1069 1070 public void setUp() throws Exception { 1071 super.setUp(); 1072 this.setDoubleMetaphone(new DoubleMetaphone()); 1073 } 1074 1075 public void tearDown() throws Exception { 1076 super.tearDown(); 1077 this.setDoubleMetaphone(null); 1078 } 1079 1080 public void testDoubleMetaphone() { 1081 assertDoubleMetaphone("TSTN", "testing"); 1082 assertDoubleMetaphone("0", "The"); 1083 assertDoubleMetaphone("KK", "quick"); 1084 assertDoubleMetaphone("PRN", "brown"); 1085 assertDoubleMetaphone("FKS", "fox"); 1086 assertDoubleMetaphone("JMPT", "jumped"); 1087 assertDoubleMetaphone("AFR", "over"); 1088 assertDoubleMetaphone("0", "the"); 1089 assertDoubleMetaphone("LS", "lazy"); 1090 assertDoubleMetaphone("TKS", "dogs"); 1091 assertDoubleMetaphone("MKFR", "MacCafferey"); 1092 assertDoubleMetaphone("STFN", "Stephan"); 1093 assertDoubleMetaphone("KSSK", "Kuczewski"); 1094 1095 assertDoubleMetaphoneAlt("TSTN", "testing"); 1096 assertDoubleMetaphoneAlt("T", "The"); 1097 assertDoubleMetaphoneAlt("KK", "quick"); 1098 assertDoubleMetaphoneAlt("PRN", "brown"); 1099 assertDoubleMetaphoneAlt("FKS", "fox"); 1100 assertDoubleMetaphoneAlt("AMPT", "jumped"); 1101 assertDoubleMetaphoneAlt("AFR", "over"); 1102 assertDoubleMetaphoneAlt("T", "the"); 1103 assertDoubleMetaphoneAlt("LS", "lazy"); 1104 assertDoubleMetaphoneAlt("TKS", "dogs"); 1105 assertDoubleMetaphoneAlt("MKFR", "MacCafferey"); 1106 assertDoubleMetaphoneAlt("STFN", "Stephan"); 1107 assertDoubleMetaphoneAlt("KXFS", "Kutchefski"); 1108 } 1109 1110 public void testEmpty() { 1111 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone(null)); 1112 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone("")); 1113 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone(" ")); 1114 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone("\t\n\r ")); 1115 } 1116 1117 public void testIsDoubleMetaphoneEqualBasic() { 1118 String [][] testFixture = new String [][] { { "Case", "case" }, { 1119 "CASE", "Case" }, { 1120 "caSe", "cAsE" }, { 1121 "cookie", "quick" }, { 1122 "quick", "cookie" }, { 1123 "Brian", "Bryan" }, { 1124 "Auto", "Otto" }, { 1125 "Steven", "Stefan" }, { 1126 "Philipowitz", "Filipowicz" } 1127 }; 1128 doubleMetaphoneEqualTest(testFixture, false); 1129 doubleMetaphoneEqualTest(testFixture, true); 1130 } 1131 1132 1135 public void testIsDoubleMetaphoneEqualExtended1() { 1136 } 1141 1142 public void testIsDoubleMetaphoneEqualExtended2() { 1143 String [][] testFixture = new String [][] { { "Jablonski", "Yablonsky" } 1144 }; 1145 doubleMetaphoneEqualTest(testFixture, true); 1147 } 1148 1149 1153 public void testIsDoubleMetaphoneEqualExtended3() { 1154 this.validateFixture(FIXTURE); 1155 StringBuffer failures = new StringBuffer (); 1156 StringBuffer matches = new StringBuffer (); 1157 String cr = System.getProperty("line.separator"); 1158 matches.append("private static final String[][] MATCHES = {" + cr); 1159 int failCount = 0; 1160 for (int i = 0; i < FIXTURE.length; i++) { 1161 String name0 = FIXTURE[i][0]; 1162 String name1 = FIXTURE[i][1]; 1163 boolean match1 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, false); 1164 boolean match2 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, true); 1165 if (match1 == false && match2 == false) { 1166 String failMsg = "[" + i + "] " + name0 + " and " + name1 + cr; 1167 failures.append(failMsg); 1168 failCount++; 1169 } else { 1170 matches.append("{\"" + name0 + "\", \"" + name1 + "\"}," + cr); 1171 } 1172 } 1173 String msg = failures.toString(); 1174 matches.append("};"); 1175 if (msg.length() > 0) { 1178 } 1182 } 1183 1184 public void testIsDoubleMetaphoneEqualWithMATCHES() { 1185 this.validateFixture(MATCHES); 1186 for (int i = 0; i < MATCHES.length; i++) { 1187 String name0 = MATCHES[i][0]; 1188 String name1 = MATCHES[i][1]; 1189 boolean match1 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, false); 1190 boolean match2 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, true); 1191 if (match1 == false && match2 == false) { 1192 fail("Expected match [" + i + "] " + name0 + " and " + name1); 1193 } 1194 } 1195 } 1196 1197 public void testIsDoubleMetaphoneNotEqual() { 1198 doubleMetaphoneNotEqualTest(false); 1199 doubleMetaphoneNotEqualTest(true); 1200 } 1201 1202 public void testCCedilla() { 1203 this.getDoubleMetaphone().isDoubleMetaphoneEqual("ç", "S"); 1204 } 1205 1206 public void testNTilde() { 1207 this.getDoubleMetaphone().isDoubleMetaphoneEqual("ñ", "N"); 1208 } 1209 1210 public void validateFixture(String [][] pairs) { 1211 if (pairs.length == 0) { 1212 fail("Test fixture is empty"); 1213 } 1214 for (int i = 0; i < pairs.length; i++) { 1215 if (pairs[i].length != 2) { 1216 fail("Error in test fixture in the data array at index " + i); 1217 } 1218 } 1219 } 1220} 1221 | Popular Tags |