1 package org.apache.lucene.analysis.el; 2 3 18 19 import org.apache.lucene.analysis.Analyzer; 20 import org.apache.lucene.analysis.StopFilter; 21 import org.apache.lucene.analysis.TokenStream; 22 import org.apache.lucene.analysis.standard.StandardTokenizer; 23 24 import java.io.Reader ; 25 import java.util.HashSet ; 26 import java.util.Hashtable ; 27 import java.util.Set ; 28 29 36 public final class GreekAnalyzer extends Analyzer 37 { 38 private static char A = 6; 40 private static char B = 7; 41 private static char G = 8; 42 private static char D = 9; 43 private static char E = 10; 44 private static char Z = 11; 45 private static char H = 12; 46 private static char TH = 13; 47 private static char I = 14; 48 private static char K = 15; 49 private static char L = 16; 50 private static char M = 17; 51 private static char N = 18; 52 private static char KS = 19; 53 private static char O = 20; 54 private static char P = 21; 55 private static char R = 22; 56 private static char S = 24; private static char T = 25; 58 private static char Y = 26; 59 private static char F = 27; 60 private static char X = 28; 61 private static char PS = 29; 62 private static char W = 30; 63 64 67 private static char[][] GREEK_STOP_WORDS = { 68 {O}, 69 {H}, 70 {T, O}, 71 {O, I}, 72 {T, A}, 73 {T, O, Y}, 74 {T, H, S}, 75 {T, W, N}, 76 {T, O, N}, 77 {T, H, N}, 78 {K, A, I}, 79 {K, I}, 80 {K}, 81 {E, I, M, A, I}, 82 {E, I, S, A, I}, 83 {E, I, N, A, I}, 84 {E, I, M, A, S, T, E}, 85 {E, I, S, T, E}, 86 {S, T, O}, 87 {S, T, O, N}, 88 {S, T, H}, 89 {S, T, H, N}, 90 {M, A}, 91 {A, L, L, A}, 92 {A, P, O}, 93 {G, I, A}, 94 {P, R, O, S}, 95 {M, E}, 96 {S, E}, 97 {W, S}, 98 {P, A, R, A}, 99 {A, N, T, I}, 100 {K, A, T, A}, 101 {M, E, T, A}, 102 {TH, A}, 103 {N, A}, 104 {D, E}, 105 {D, E, N}, 106 {M, H}, 107 {M, H, N}, 108 {E, P, I}, 109 {E, N, W}, 110 {E, A, N}, 111 {A, N}, 112 {T, O, T, E}, 113 {P, O, Y}, 114 {P, W, S}, 115 {P, O, I, O, S}, 116 {P, O, I, A}, 117 {P, O, I, O}, 118 {P, O, I, O, I}, 119 {P, O, I, E, S}, 120 {P, O, I, W, N}, 121 {P, O, I, O, Y, S}, 122 {A, Y, T, O, S}, 123 {A, Y, T, H}, 124 {A, Y, T, O}, 125 {A, Y, T, O, I}, 126 {A, Y, T, W, N}, 127 {A, Y, T, O, Y, S}, 128 {A, Y, T, E, S}, 129 {A, Y, T, A}, 130 {E, K, E, I, N, O, S}, 131 {E, K, E, I, N, H}, 132 {E, K, E, I, N, O}, 133 {E, K, E, I, N, O, I}, 134 {E, K, E, I, N, E, S}, 135 {E, K, E, I, N, A}, 136 {E, K, E, I, N, W, N}, 137 {E, K, E, I, N, O, Y, S}, 138 {O, P, W, S}, 139 {O, M, W, S}, 140 {I, S, W, S}, 141 {O, S, O}, 142 {O, T, I} 143 }; 144 145 148 private Set stopSet = new HashSet (); 149 150 155 private char[] charset; 156 157 public GreekAnalyzer() { 158 charset = GreekCharsets.UnicodeGreek; 159 stopSet = StopFilter.makeStopSet( 160 makeStopWords(GreekCharsets.UnicodeGreek)); 161 } 162 163 166 public GreekAnalyzer(char[] charset) 167 { 168 this.charset = charset; 169 stopSet = StopFilter.makeStopSet(makeStopWords(charset)); 170 } 171 172 175 public GreekAnalyzer(char[] charset, String [] stopwords) 176 { 177 this.charset = charset; 178 stopSet = StopFilter.makeStopSet(stopwords); 179 } 180 181 private static String [] makeStopWords(char[] charset) 184 { 185 String [] res = new String [GREEK_STOP_WORDS.length]; 186 for (int i = 0; i < res.length; i++) 187 { 188 char[] theStopWord = GREEK_STOP_WORDS[i]; 189 StringBuffer theWord = new StringBuffer (); 191 for (int j = 0; j < theStopWord.length; j++) 192 { 193 theWord.append(charset[theStopWord[j]]); 194 } 195 res[i] = theWord.toString(); 196 } 197 return res; 198 } 199 200 203 public GreekAnalyzer(char[] charset, Hashtable stopwords) 204 { 205 this.charset = charset; 206 stopSet = new HashSet (stopwords.keySet()); 207 } 208 209 215 public TokenStream tokenStream(String fieldName, Reader reader) 216 { 217 TokenStream result = new StandardTokenizer(reader); 218 result = new GreekLowerCaseFilter(result, charset); 219 result = new StopFilter(result, stopSet); 220 return result; 221 } 222 } 223 | Popular Tags |