1 2 package net.nutch.analysis; 3 import net.nutch.searcher.Query; 4 import net.nutch.searcher.QueryFilters; 5 import net.nutch.searcher.Query.Clause; 6 import org.apache.lucene.analysis.StopFilter; 7 import java.io.*; 8 import java.util.*; 9 10 public class NutchAnalysisTokenManager implements NutchAnalysisConstants 11 { 12 13 public NutchAnalysisTokenManager(Reader reader) { 14 this(new FastCharStream(reader)); 15 } 16 public java.io.PrintStream debugStream = System.out; 17 public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; } 18 private final int jjStopStringLiteralDfa_0(int pos, long active0) 19 { 20 switch (pos) 21 { 22 default : 23 return -1; 24 } 25 } 26 private final int jjStartNfa_0(int pos, long active0) 27 { 28 return jjMoveNfa_0(jjStopStringLiteralDfa_0(pos, active0), pos + 1); 29 } 30 private final int jjStopAtPos(int pos, int kind) 31 { 32 jjmatchedKind = kind; 33 jjmatchedPos = pos; 34 return pos + 1; 35 } 36 private final int jjStartNfaWithStates_0(int pos, int kind, int state) 37 { 38 jjmatchedKind = kind; 39 jjmatchedPos = pos; 40 try { curChar = input_stream.readChar(); } 41 catch(java.io.IOException e) { return pos + 1; } 42 return jjMoveNfa_0(state, pos + 1); 43 } 44 private final int jjMoveStringLiteralDfa0_0() 45 { 46 switch(curChar) 47 { 48 case 34: 49 return jjStopAtPos(0, 9); 50 case 39: 51 return jjStopAtPos(0, 14); 52 case 43: 53 return jjStopAtPos(0, 7); 54 case 45: 55 return jjStopAtPos(0, 8); 56 case 46: 57 return jjStopAtPos(0, 12); 58 case 47: 59 return jjStopAtPos(0, 11); 60 case 58: 61 return jjStopAtPos(0, 10); 62 case 64: 63 return jjStopAtPos(0, 13); 64 default : 65 return jjMoveNfa_0(1, 0); 66 } 67 } 68 private final void jjCheckNAdd(int state) 69 { 70 if (jjrounds[state] != jjround) 71 { 72 jjstateSet[jjnewStateCnt++] = state; 73 jjrounds[state] = jjround; 74 } 75 } 76 private final void jjAddStates(int start, int end) 77 { 78 do { 79 jjstateSet[jjnewStateCnt++] = jjnextStates[start]; 80 } while (start++ != end); 81 } 82 private final void jjCheckNAddTwoStates(int state1, int state2) 83 { 84 jjCheckNAdd(state1); 85 jjCheckNAdd(state2); 86 } 87 private final void jjCheckNAddStates(int start, int end) 88 { 89 do { 90 jjCheckNAdd(jjnextStates[start]); 91 } while (start++ != end); 92 } 93 private final void jjCheckNAddStates(int start) 94 { 95 jjCheckNAdd(jjnextStates[start]); 96 jjCheckNAdd(jjnextStates[start + 1]); 97 } 98 static final long[] jjbitVec0 = { 99 0xfffffffeL, 0x0L, 0x0L, 0x0L 100 }; 101 static final long[] jjbitVec2 = { 102 0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL 103 }; 104 static final long[] jjbitVec3 = { 105 0x1ff0000000000000L, 0xffffffffffffc000L, 0xffffffffL, 0x600000000000000L 106 }; 107 static final long[] jjbitVec4 = { 108 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL 109 }; 110 static final long[] jjbitVec5 = { 111 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffL, 0x0L 112 }; 113 static final long[] jjbitVec6 = { 114 0xffffffffffffffffL, 0xffffffffffffffffL, 0x0L, 0x0L 115 }; 116 static final long[] jjbitVec7 = { 117 0x3fffffffffffL, 0x0L, 0x0L, 0x0L 118 }; 119 private final int jjMoveNfa_0(int startState, int curPos) 120 { 121 int[] nextStates; 122 int startsAt = 0; 123 jjnewStateCnt = 10; 124 int i = 1; 125 jjstateSet[0] = startState; 126 int j, kind = 0x7fffffff; 127 for (;;) 128 { 129 if (++jjround == 0x7fffffff) 130 ReInitRounds(); 131 if (curChar < 64) 132 { 133 long l = 1L << curChar; 134 MatchLoop: do 135 { 136 switch(jjstateSet[--i]) 137 { 138 case 1: 139 case 0: 140 if ((0x3ff004000000000L & l) == 0L) 141 break; 142 kind = 1; 143 jjCheckNAdd(0); 144 break; 145 case 2: 146 if (curChar == 46) 147 jjCheckNAdd(3); 148 break; 149 case 4: 150 if (curChar != 46) 151 break; 152 if (kind > 2) 153 kind = 2; 154 jjCheckNAdd(3); 155 break; 156 case 7: 157 if (curChar == 35) 158 kind = 1; 159 break; 160 case 8: 161 if (curChar == 43 && kind > 1) 162 kind = 1; 163 break; 164 case 9: 165 if (curChar == 43) 166 jjstateSet[jjnewStateCnt++] = 8; 167 break; 168 default : break; 169 } 170 } while(i != startsAt); 171 } 172 else if (curChar < 128) 173 { 174 long l = 1L << (curChar & 077); 175 MatchLoop: do 176 { 177 switch(jjstateSet[--i]) 178 { 179 case 1: 180 if ((0x7fffffe87fffffeL & l) != 0L) 181 { 182 if (kind > 1) 183 kind = 1; 184 jjCheckNAdd(0); 185 } 186 if ((0x7fffffe07fffffeL & l) != 0L) 187 jjstateSet[jjnewStateCnt++] = 2; 188 if ((0x800000008L & l) != 0L) 189 jjAddStates(0, 1); 190 break; 191 case 0: 192 if ((0x7fffffe87fffffeL & l) == 0L) 193 break; 194 if (kind > 1) 195 kind = 1; 196 jjCheckNAdd(0); 197 break; 198 case 3: 199 if ((0x7fffffe07fffffeL & l) != 0L) 200 jjstateSet[jjnewStateCnt++] = 4; 201 break; 202 case 6: 203 if ((0x800000008L & l) != 0L) 204 jjAddStates(0, 1); 205 break; 206 default : break; 207 } 208 } while(i != startsAt); 209 } 210 else 211 { 212 int hiByte = (int)(curChar >> 8); 213 int i1 = hiByte >> 6; 214 long l1 = 1L << (hiByte & 077); 215 int i2 = (curChar & 0xff) >> 6; 216 long l2 = 1L << (curChar & 077); 217 MatchLoop: do 218 { 219 switch(jjstateSet[--i]) 220 { 221 case 1: 222 if (jjCanMove_0(hiByte, i1, i2, l1, l2)) 223 { 224 if (kind > 1) 225 kind = 1; 226 jjCheckNAdd(0); 227 } 228 if (jjCanMove_0(hiByte, i1, i2, l1, l2)) 229 jjstateSet[jjnewStateCnt++] = 2; 230 if (jjCanMove_1(hiByte, i1, i2, l1, l2)) 231 { 232 if (kind > 3) 233 kind = 3; 234 } 235 break; 236 case 0: 237 if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) 238 break; 239 if (kind > 1) 240 kind = 1; 241 jjCheckNAdd(0); 242 break; 243 case 3: 244 if (jjCanMove_0(hiByte, i1, i2, l1, l2)) 245 jjstateSet[jjnewStateCnt++] = 4; 246 break; 247 case 5: 248 if (jjCanMove_1(hiByte, i1, i2, l1, l2) && kind > 3) 249 kind = 3; 250 break; 251 default : break; 252 } 253 } while(i != startsAt); 254 } 255 if (kind != 0x7fffffff) 256 { 257 jjmatchedKind = kind; 258 jjmatchedPos = curPos; 259 kind = 0x7fffffff; 260 } 261 ++curPos; 262 if ((i = jjnewStateCnt) == (startsAt = 10 - (jjnewStateCnt = startsAt))) 263 return curPos; 264 try { curChar = input_stream.readChar(); } 265 catch(java.io.IOException e) { return curPos; } 266 } 267 } 268 static final int[] jjnextStates = { 269 7, 9, 270 }; 271 private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) 272 { 273 switch(hiByte) 274 { 275 case 0: 276 return ((jjbitVec2[i2] & l2) != 0L); 277 default : 278 if ((jjbitVec0[i1] & l1) != 0L) 279 return true; 280 return false; 281 } 282 } 283 private static final boolean jjCanMove_1(int hiByte, int i1, int i2, long l1, long l2) 284 { 285 switch(hiByte) 286 { 287 case 48: 288 return ((jjbitVec4[i2] & l2) != 0L); 289 case 49: 290 return ((jjbitVec5[i2] & l2) != 0L); 291 case 51: 292 return ((jjbitVec6[i2] & l2) != 0L); 293 case 61: 294 return ((jjbitVec7[i2] & l2) != 0L); 295 default : 296 if ((jjbitVec3[i1] & l1) != 0L) 297 return true; 298 return false; 299 } 300 } 301 public static final String [] jjstrLiteralImages = { 302 "", null, null, null, null, null, null, "\53", "\55", "\42", "\72", "\57", 303 "\56", "\100", "\47", null, null, null, null, null, }; 304 public static final String [] lexStateNames = { 305 "DEFAULT", 306 }; 307 protected CharStream input_stream; 308 private final int[] jjrounds = new int[10]; 309 private final int[] jjstateSet = new int[20]; 310 StringBuffer image; 311 int jjimageLen; 312 int lengthOfMatch; 313 protected char curChar; 314 public NutchAnalysisTokenManager(CharStream stream) 315 { 316 input_stream = stream; 317 } 318 public NutchAnalysisTokenManager(CharStream stream, int lexState) 319 { 320 this(stream); 321 SwitchTo(lexState); 322 } 323 public void ReInit(CharStream stream) 324 { 325 jjmatchedPos = jjnewStateCnt = 0; 326 curLexState = defaultLexState; 327 input_stream = stream; 328 ReInitRounds(); 329 } 330 private final void ReInitRounds() 331 { 332 int i; 333 jjround = 0x80000001; 334 for (i = 10; i-- > 0;) 335 jjrounds[i] = 0x80000000; 336 } 337 public void ReInit(CharStream stream, int lexState) 338 { 339 ReInit(stream); 340 SwitchTo(lexState); 341 } 342 public void SwitchTo(int lexState) 343 { 344 if (lexState >= 1 || lexState < 0) 345 throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); 346 else 347 curLexState = lexState; 348 } 349 350 protected Token jjFillToken() 351 { 352 Token t = Token.newToken(jjmatchedKind); 353 t.kind = jjmatchedKind; 354 String im = jjstrLiteralImages[jjmatchedKind]; 355 t.image = (im == null) ? input_stream.GetImage() : im; 356 t.beginLine = input_stream.getBeginLine(); 357 t.beginColumn = input_stream.getBeginColumn(); 358 t.endLine = input_stream.getEndLine(); 359 t.endColumn = input_stream.getEndColumn(); 360 return t; 361 } 362 363 int curLexState = 0; 364 int defaultLexState = 0; 365 int jjnewStateCnt; 366 int jjround; 367 int jjmatchedPos; 368 int jjmatchedKind; 369 370 public Token getNextToken() 371 { 372 int kind; 373 Token specialToken = null; 374 Token matchedToken; 375 int curPos = 0; 376 377 EOFLoop : 378 for (;;) 379 { 380 try 381 { 382 curChar = input_stream.BeginToken(); 383 } 384 catch(java.io.IOException e) 385 { 386 jjmatchedKind = 0; 387 matchedToken = jjFillToken(); 388 return matchedToken; 389 } 390 image = null; 391 jjimageLen = 0; 392 393 jjmatchedKind = 0x7fffffff; 394 jjmatchedPos = 0; 395 curPos = jjMoveStringLiteralDfa0_0(); 396 if (jjmatchedPos == 0 && jjmatchedKind > 15) 397 { 398 jjmatchedKind = 15; 399 } 400 if (jjmatchedKind != 0x7fffffff) 401 { 402 if (jjmatchedPos + 1 < curPos) 403 input_stream.backup(curPos - jjmatchedPos - 1); 404 matchedToken = jjFillToken(); 405 TokenLexicalActions(matchedToken); 406 return matchedToken; 407 } 408 int error_line = input_stream.getEndLine(); 409 int error_column = input_stream.getEndColumn(); 410 String error_after = null; 411 boolean EOFSeen = false; 412 try { input_stream.readChar(); input_stream.backup(1); } 413 catch (java.io.IOException e1) { 414 EOFSeen = true; 415 error_after = curPos <= 1 ? "" : input_stream.GetImage(); 416 if (curChar == '\n' || curChar == '\r') { 417 error_line++; 418 error_column = 0; 419 } 420 else 421 error_column++; 422 } 423 if (!EOFSeen) { 424 input_stream.backup(1); 425 error_after = curPos <= 1 ? "" : input_stream.GetImage(); 426 } 427 throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR); 428 } 429 } 430 431 void TokenLexicalActions(Token matchedToken) 432 { 433 switch(jjmatchedKind) 434 { 435 case 1 : 436 if (image == null) 437 image = new StringBuffer (new String (input_stream.GetSuffix(jjimageLen + (lengthOfMatch = jjmatchedPos + 1)))); 438 else 439 image.append(input_stream.GetSuffix(jjimageLen + (lengthOfMatch = jjmatchedPos + 1))); 440 matchedToken.image = matchedToken.image.toLowerCase(); 441 break; 442 case 2 : 443 if (image == null) 444 image = new StringBuffer (new String (input_stream.GetSuffix(jjimageLen + (lengthOfMatch = jjmatchedPos + 1)))); 445 else 446 image.append(input_stream.GetSuffix(jjimageLen + (lengthOfMatch = jjmatchedPos + 1))); 447 for (int i = 0; i < image.length(); i++) { 449 if (image.charAt(i) == '.') 450 image.deleteCharAt(i--); 451 } 452 matchedToken.image = image.toString().toLowerCase(); 453 break; 454 default : 455 break; 456 } 457 } 458 } 459 | Popular Tags |