1 package org.contineo.core.text.analyze; 2 3 6 public class Stopwords { 7 8 private final static String [] GERMAN_STOP_WORDS = { 9 "der","die","das","dass","daß", 10 "ein","dies","dem","den","des", 11 "zu","zum","zur","eine","einer","einem", 12 "einen","eines", 13 "auf","aus","am","im","in","um","an", 14 "und","oder","ohne","mit", 15 "ich","du","er","sie","es","wir","ihr", 16 "mein","sein","dein","euer","eure", 17 "mich","dich","sich", 18 "durch","wegen","bei","neben", 19 "vor","nach","von","vom","als","für", 20 "wird","werde","werden","werdet","werd,", 21 "wurde","wurden","wurdet","wurdest","wurd", 22 "würde","würden","würdet","würdest","würd", 23 "kann","können","konn", 24 "muss","muß","müss","müßte","müssen","musste", 25 "mußtet","mußte","müsste", 26 "wer","wie","was","wem","wen","wessen", 27 "wo","womit","wofür","wodurch", 28 "wobei","wonach","welch", 29 "welcher","welchen","welches","welchem", 30 "nicht","nur","damit","so","auch", 31 "ist","sind","war","waren", 32 "man","also","aber","über", 33 "soll","will","woll", 34 "wollen","wollt","wolltet","willst", 35 "wollte","wolltest","wollten", 36 "weil","noch","dabei","dann","danach", 37 "ja","nein","immer","nie","jetzt","heute", 38 "mehr","weniger","all", 39 "solch","solcher","solche","solches","solchen","solchem", 40 "bereits","zwischen", 41 "innen","aussen","außen","innerhalb","außerhalb", 42 "wieder","wider","gegen","wenn","hat","hab", 43 "je","jed","jede","jeder","jedes","jeden","jedem", 44 "gar","dar","einzeln","möglich", 45 "haben","hat","hatte","hast","hattest","habt", 46 "hattet","hatten", 47 "hätt","hätte","hätten","hättest","hättet", 48 "oben","unten","über","unter","obere","untere", 49 "weiterhin","desweiteren","gut","schlecht", 50 "allgemein","wichtig","etwas","anhand", 51 "jedoch", "dazu","dafür", 52 "ers","zwei","drit","drei","vier", 53 "erster","zweiter","dritter","vierter", 54 "beispiel","beispielsweise","bis", 55 "neu","neue","neues","neuen","neuer","neuem", 56 "andere","anderes","anderen","anderer","anders","anderem", 57 "richtig","falsch","sowie" 58 }; 59 60 private final static String [] FRENCH_STOP_WORDS = { 61 "alors","au","aucuns","aussi","autre", 62 "avant","avec","avoir","bon","car","ce", 63 "cela","ces","ceux","chaque","ci", 64 "comme","comment","dans","des","du", 65 "dedans","dehors","depuis","deux","devrait", 66 "doit","donc","dos","droite","début", 67 "elle","elles","en","encore","essai", 68 "est","et","eu","fait","faites", 69 "fois","font","force","haut","hors", 70 "ici","il","ils","je","juste", 71 "la","le","les","leur","là", 72 "ma","maintenant","mais","mes","mine", 73 "moins","mon","mot","même","ni", 74 "nommés","notre","nous","nouveaux","ou", 75 "où","par","parce","parole","pas", 76 "personnes","peut","peu","pièce","plupart", 77 "pour","pourquoi","quand","que","quel", 78 "quelle","quelles","quels","qui","sa", 79 "sans","ses","seulement","si","sien", 80 "son","sont","sous","soyez","sujet", 81 "sur","ta","tandis","tellement","tels", 82 "tes","ton","tous","tout","trop", 83 "très","tu","valeur","voie","voient", 84 "vont","votre","vous","vu","ça", 85 "étaient","état","étions","été","être" 86 }; 87 88 private final static String [] ENGLISH_STOP_WORDS = { 89 "a", "and", "are", "as", "at", "be", "but", "by", 90 "for", "if", "in", "into", "is", "it", 91 "no", "not", "of", "on", "or", "s", "such", 92 "too", "that", "the", "their", "then", "there", "these", 93 "they", "this", "to", "was", "will", "with", 94 "thus", "have", "has", "had", "do", "did", "yes", 95 "than", "those", "just", "like", "about", 96 "which", "who", "what", "whom", "when", "where", 97 "within", "without", "whose", "although", "all", 98 "because", "while", "how", "here", "any", "some", 99 "during", "next", "previous", "does", 100 "between", "been", "one", "two", "three", "four", 101 "five", "six", "seven", "eight", "nine", "ten", 102 "bottom", "top", "down", "up", "left", "right", 103 "whether", "whole", "also", "now", "onto", "still", 104 "often", "more", "most", "good", "best", "go", "better", 105 "gone", "went", "many", "much", "lot", "sever" 106 }; 107 108 public static String [] getStopwords(String language) { 109 if (language.equalsIgnoreCase("de")) 110 return GERMAN_STOP_WORDS; 111 else 112 if (language.equalsIgnoreCase("fr")) 113 return FRENCH_STOP_WORDS; 114 else 115 return ENGLISH_STOP_WORDS; 116 } 117 } 118 | Popular Tags |