KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > contineo > core > text > analyze > Stopwords


1 package org.contineo.core.text.analyze;
2
3 /**
4  * Created on 13.01.2005
5  */

6 public class Stopwords {
7     
8     private final static String JavaDoc[] GERMAN_STOP_WORDS = {
9             "der","die","das","dass","daß",
10             "ein","dies","dem","den","des",
11             "zu","zum","zur","eine","einer","einem",
12             "einen","eines",
13             "auf","aus","am","im","in","um","an",
14             "und","oder","ohne","mit",
15             "ich","du","er","sie","es","wir","ihr",
16             "mein","sein","dein","euer","eure",
17             "mich","dich","sich",
18             "durch","wegen","bei","neben",
19             "vor","nach","von","vom","als","für",
20             "wird","werde","werden","werdet","werd,",
21             "wurde","wurden","wurdet","wurdest","wurd",
22             "würde","würden","würdet","würdest","würd",
23             "kann","können","konn",
24             "muss","muß","müss","müßte","müssen","musste",
25             "mußtet","mußte","müsste",
26             "wer","wie","was","wem","wen","wessen",
27             "wo","womit","wofür","wodurch",
28             "wobei","wonach","welch",
29             "welcher","welchen","welches","welchem",
30             "nicht","nur","damit","so","auch",
31             "ist","sind","war","waren",
32             "man","also","aber","über",
33             "soll","will","woll",
34             "wollen","wollt","wolltet","willst",
35             "wollte","wolltest","wollten",
36             "weil","noch","dabei","dann","danach",
37             "ja","nein","immer","nie","jetzt","heute",
38             "mehr","weniger","all",
39             "solch","solcher","solche","solches","solchen","solchem",
40             "bereits","zwischen",
41             "innen","aussen","außen","innerhalb","außerhalb",
42             "wieder","wider","gegen","wenn","hat","hab",
43             "je","jed","jede","jeder","jedes","jeden","jedem",
44             "gar","dar","einzeln","möglich",
45             "haben","hat","hatte","hast","hattest","habt",
46             "hattet","hatten",
47             "hätt","hätte","hätten","hättest","hättet",
48             "oben","unten","über","unter","obere","untere",
49             "weiterhin","desweiteren","gut","schlecht",
50             "allgemein","wichtig","etwas","anhand",
51             "jedoch", "dazu","dafür",
52             "ers","zwei","drit","drei","vier",
53             "erster","zweiter","dritter","vierter",
54             "beispiel","beispielsweise","bis",
55             "neu","neue","neues","neuen","neuer","neuem",
56             "andere","anderes","anderen","anderer","anders","anderem",
57             "richtig","falsch","sowie"
58         };
59     
60     private final static String JavaDoc[] FRENCH_STOP_WORDS = {
61             "alors","au","aucuns","aussi","autre",
62             "avant","avec","avoir","bon","car","ce",
63             "cela","ces","ceux","chaque","ci",
64             "comme","comment","dans","des","du",
65             "dedans","dehors","depuis","deux","devrait",
66             "doit","donc","dos","droite","début",
67             "elle","elles","en","encore","essai",
68             "est","et","eu","fait","faites",
69             "fois","font","force","haut","hors",
70             "ici","il","ils","je","juste",
71             "la","le","les","leur","là",
72             "ma","maintenant","mais","mes","mine",
73             "moins","mon","mot","même","ni",
74             "nommés","notre","nous","nouveaux","ou",
75             "où","par","parce","parole","pas",
76             "personnes","peut","peu","pièce","plupart",
77             "pour","pourquoi","quand","que","quel",
78             "quelle","quelles","quels","qui","sa",
79             "sans","ses","seulement","si","sien",
80             "son","sont","sous","soyez","sujet",
81             "sur","ta","tandis","tellement","tels",
82             "tes","ton","tous","tout","trop",
83             "très","tu","valeur","voie","voient",
84             "vont","votre","vous","vu","ça",
85             "étaient","état","étions","été","être"
86     };
87     
88     private final static String JavaDoc[] ENGLISH_STOP_WORDS = {
89             "a", "and", "are", "as", "at", "be", "but", "by",
90             "for", "if", "in", "into", "is", "it",
91             "no", "not", "of", "on", "or", "s", "such",
92             "too", "that", "the", "their", "then", "there", "these",
93             "they", "this", "to", "was", "will", "with",
94             "thus", "have", "has", "had", "do", "did", "yes",
95             "than", "those", "just", "like", "about",
96             "which", "who", "what", "whom", "when", "where",
97             "within", "without", "whose", "although", "all",
98             "because", "while", "how", "here", "any", "some",
99             "during", "next", "previous", "does",
100             "between", "been", "one", "two", "three", "four",
101             "five", "six", "seven", "eight", "nine", "ten",
102             "bottom", "top", "down", "up", "left", "right",
103             "whether", "whole", "also", "now", "onto", "still",
104             "often", "more", "most", "good", "best", "go", "better",
105             "gone", "went", "many", "much", "lot", "sever"
106         };
107
108     public static String JavaDoc[] getStopwords(String JavaDoc language) {
109         if (language.equalsIgnoreCase("de"))
110             return GERMAN_STOP_WORDS;
111         else
112             if (language.equalsIgnoreCase("fr"))
113                 return FRENCH_STOP_WORDS;
114             else
115                 return ENGLISH_STOP_WORDS;
116     }
117 }
118
Popular Tags