KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > analysis > NutchAnalysisTokenManager


1 /* Generated By:JavaCC: Do not edit this line. NutchAnalysisTokenManager.java */
2 package net.nutch.analysis;
3 import net.nutch.searcher.Query;
4 import net.nutch.searcher.QueryFilters;
5 import net.nutch.searcher.Query.Clause;
6 import org.apache.lucene.analysis.StopFilter;
7 import java.io.*;
8 import java.util.*;
9
10 public class NutchAnalysisTokenManager implements NutchAnalysisConstants
11 {
12   /** Constructs a token manager for the provided Reader. */
13   public NutchAnalysisTokenManager(Reader reader) {
14     this(new FastCharStream(reader));
15   }
16   public java.io.PrintStream JavaDoc debugStream = System.out;
17   public void setDebugStream(java.io.PrintStream JavaDoc ds) { debugStream = ds; }
18 private final int jjStopStringLiteralDfa_0(int pos, long active0)
19 {
20    switch (pos)
21    {
22       default :
23          return -1;
24    }
25 }
26 private final int jjStartNfa_0(int pos, long active0)
27 {
28    return jjMoveNfa_0(jjStopStringLiteralDfa_0(pos, active0), pos + 1);
29 }
30 private final int jjStopAtPos(int pos, int kind)
31 {
32    jjmatchedKind = kind;
33    jjmatchedPos = pos;
34    return pos + 1;
35 }
36 private final int jjStartNfaWithStates_0(int pos, int kind, int state)
37 {
38    jjmatchedKind = kind;
39    jjmatchedPos = pos;
40    try { curChar = input_stream.readChar(); }
41    catch(java.io.IOException JavaDoc e) { return pos + 1; }
42    return jjMoveNfa_0(state, pos + 1);
43 }
44 private final int jjMoveStringLiteralDfa0_0()
45 {
46    switch(curChar)
47    {
48       case 34:
49          return jjStopAtPos(0, 9);
50       case 39:
51          return jjStopAtPos(0, 14);
52       case 43:
53          return jjStopAtPos(0, 7);
54       case 45:
55          return jjStopAtPos(0, 8);
56       case 46:
57          return jjStopAtPos(0, 12);
58       case 47:
59          return jjStopAtPos(0, 11);
60       case 58:
61          return jjStopAtPos(0, 10);
62       case 64:
63          return jjStopAtPos(0, 13);
64       default :
65          return jjMoveNfa_0(1, 0);
66    }
67 }
68 private final void jjCheckNAdd(int state)
69 {
70    if (jjrounds[state] != jjround)
71    {
72       jjstateSet[jjnewStateCnt++] = state;
73       jjrounds[state] = jjround;
74    }
75 }
76 private final void jjAddStates(int start, int end)
77 {
78    do {
79       jjstateSet[jjnewStateCnt++] = jjnextStates[start];
80    } while (start++ != end);
81 }
82 private final void jjCheckNAddTwoStates(int state1, int state2)
83 {
84    jjCheckNAdd(state1);
85    jjCheckNAdd(state2);
86 }
87 private final void jjCheckNAddStates(int start, int end)
88 {
89    do {
90       jjCheckNAdd(jjnextStates[start]);
91    } while (start++ != end);
92 }
93 private final void jjCheckNAddStates(int start)
94 {
95    jjCheckNAdd(jjnextStates[start]);
96    jjCheckNAdd(jjnextStates[start + 1]);
97 }
98 static final long[] jjbitVec0 = {
99    0xfffffffeL, 0x0L, 0x0L, 0x0L
100 };
101 static final long[] jjbitVec2 = {
102    0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL
103 };
104 static final long[] jjbitVec3 = {
105    0x1ff0000000000000L, 0xffffffffffffc000L, 0xffffffffL, 0x600000000000000L
106 };
107 static final long[] jjbitVec4 = {
108    0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
109 };
110 static final long[] jjbitVec5 = {
111    0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffL, 0x0L
112 };
113 static final long[] jjbitVec6 = {
114    0xffffffffffffffffL, 0xffffffffffffffffL, 0x0L, 0x0L
115 };
116 static final long[] jjbitVec7 = {
117    0x3fffffffffffL, 0x0L, 0x0L, 0x0L
118 };
119 private final int jjMoveNfa_0(int startState, int curPos)
120 {
121    int[] nextStates;
122    int startsAt = 0;
123    jjnewStateCnt = 10;
124    int i = 1;
125    jjstateSet[0] = startState;
126    int j, kind = 0x7fffffff;
127    for (;;)
128    {
129       if (++jjround == 0x7fffffff)
130          ReInitRounds();
131       if (curChar < 64)
132       {
133          long l = 1L << curChar;
134          MatchLoop: do
135          {
136             switch(jjstateSet[--i])
137             {
138                case 1:
139                case 0:
140                   if ((0x3ff004000000000L & l) == 0L)
141                      break;
142                   kind = 1;
143                   jjCheckNAdd(0);
144                   break;
145                case 2:
146                   if (curChar == 46)
147                      jjCheckNAdd(3);
148                   break;
149                case 4:
150                   if (curChar != 46)
151                      break;
152                   if (kind > 2)
153                      kind = 2;
154                   jjCheckNAdd(3);
155                   break;
156                case 7:
157                   if (curChar == 35)
158                      kind = 1;
159                   break;
160                case 8:
161                   if (curChar == 43 && kind > 1)
162                      kind = 1;
163                   break;
164                case 9:
165                   if (curChar == 43)
166                      jjstateSet[jjnewStateCnt++] = 8;
167                   break;
168                default : break;
169             }
170          } while(i != startsAt);
171       }
172       else if (curChar < 128)
173       {
174          long l = 1L << (curChar & 077);
175          MatchLoop: do
176          {
177             switch(jjstateSet[--i])
178             {
179                case 1:
180                   if ((0x7fffffe87fffffeL & l) != 0L)
181                   {
182                      if (kind > 1)
183                         kind = 1;
184                      jjCheckNAdd(0);
185                   }
186                   if ((0x7fffffe07fffffeL & l) != 0L)
187                      jjstateSet[jjnewStateCnt++] = 2;
188                   if ((0x800000008L & l) != 0L)
189                      jjAddStates(0, 1);
190                   break;
191                case 0:
192                   if ((0x7fffffe87fffffeL & l) == 0L)
193                      break;
194                   if (kind > 1)
195                      kind = 1;
196                   jjCheckNAdd(0);
197                   break;
198                case 3:
199                   if ((0x7fffffe07fffffeL & l) != 0L)
200                      jjstateSet[jjnewStateCnt++] = 4;
201                   break;
202                case 6:
203                   if ((0x800000008L & l) != 0L)
204                      jjAddStates(0, 1);
205                   break;
206                default : break;
207             }
208          } while(i != startsAt);
209       }
210       else
211       {
212          int hiByte = (int)(curChar >> 8);
213          int i1 = hiByte >> 6;
214          long l1 = 1L << (hiByte & 077);
215          int i2 = (curChar & 0xff) >> 6;
216          long l2 = 1L << (curChar & 077);
217          MatchLoop: do
218          {
219             switch(jjstateSet[--i])
220             {
221                case 1:
222                   if (jjCanMove_0(hiByte, i1, i2, l1, l2))
223                   {
224                      if (kind > 1)
225                         kind = 1;
226                      jjCheckNAdd(0);
227                   }
228                   if (jjCanMove_0(hiByte, i1, i2, l1, l2))
229                      jjstateSet[jjnewStateCnt++] = 2;
230                   if (jjCanMove_1(hiByte, i1, i2, l1, l2))
231                   {
232                      if (kind > 3)
233                         kind = 3;
234                   }
235                   break;
236                case 0:
237                   if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
238                      break;
239                   if (kind > 1)
240                      kind = 1;
241                   jjCheckNAdd(0);
242                   break;
243                case 3:
244                   if (jjCanMove_0(hiByte, i1, i2, l1, l2))
245                      jjstateSet[jjnewStateCnt++] = 4;
246                   break;
247                case 5:
248                   if (jjCanMove_1(hiByte, i1, i2, l1, l2) && kind > 3)
249                      kind = 3;
250                   break;
251                default : break;
252             }
253          } while(i != startsAt);
254       }
255       if (kind != 0x7fffffff)
256       {
257          jjmatchedKind = kind;
258          jjmatchedPos = curPos;
259          kind = 0x7fffffff;
260       }
261       ++curPos;
262       if ((i = jjnewStateCnt) == (startsAt = 10 - (jjnewStateCnt = startsAt)))
263          return curPos;
264       try { curChar = input_stream.readChar(); }
265       catch(java.io.IOException JavaDoc e) { return curPos; }
266    }
267 }
268 static final int[] jjnextStates = {
269    7, 9,
270 };
271 private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
272 {
273    switch(hiByte)
274    {
275       case 0:
276          return ((jjbitVec2[i2] & l2) != 0L);
277       default :
278          if ((jjbitVec0[i1] & l1) != 0L)
279             return true;
280          return false;
281    }
282 }
283 private static final boolean jjCanMove_1(int hiByte, int i1, int i2, long l1, long l2)
284 {
285    switch(hiByte)
286    {
287       case 48:
288          return ((jjbitVec4[i2] & l2) != 0L);
289       case 49:
290          return ((jjbitVec5[i2] & l2) != 0L);
291       case 51:
292          return ((jjbitVec6[i2] & l2) != 0L);
293       case 61:
294          return ((jjbitVec7[i2] & l2) != 0L);
295       default :
296          if ((jjbitVec3[i1] & l1) != 0L)
297             return true;
298          return false;
299    }
300 }
301 public static final String JavaDoc[] jjstrLiteralImages = {
302 "", null, null, null, null, null, null, "\53", "\55", "\42", "\72", "\57",
303 "\56", "\100", "\47", null, null, null, null, null, };
304 public static final String JavaDoc[] lexStateNames = {
305    "DEFAULT",
306 };
307 protected CharStream input_stream;
308 private final int[] jjrounds = new int[10];
309 private final int[] jjstateSet = new int[20];
310 StringBuffer JavaDoc image;
311 int jjimageLen;
312 int lengthOfMatch;
313 protected char curChar;
314 public NutchAnalysisTokenManager(CharStream stream)
315 {
316    input_stream = stream;
317 }
318 public NutchAnalysisTokenManager(CharStream stream, int lexState)
319 {
320    this(stream);
321    SwitchTo(lexState);
322 }
323 public void ReInit(CharStream stream)
324 {
325    jjmatchedPos = jjnewStateCnt = 0;
326    curLexState = defaultLexState;
327    input_stream = stream;
328    ReInitRounds();
329 }
330 private final void ReInitRounds()
331 {
332    int i;
333    jjround = 0x80000001;
334    for (i = 10; i-- > 0;)
335       jjrounds[i] = 0x80000000;
336 }
337 public void ReInit(CharStream stream, int lexState)
338 {
339    ReInit(stream);
340    SwitchTo(lexState);
341 }
342 public void SwitchTo(int lexState)
343 {
344    if (lexState >= 1 || lexState < 0)
345       throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE);
346    else
347       curLexState = lexState;
348 }
349
350 protected Token jjFillToken()
351 {
352    Token t = Token.newToken(jjmatchedKind);
353    t.kind = jjmatchedKind;
354    String JavaDoc im = jjstrLiteralImages[jjmatchedKind];
355    t.image = (im == null) ? input_stream.GetImage() : im;
356    t.beginLine = input_stream.getBeginLine();
357    t.beginColumn = input_stream.getBeginColumn();
358    t.endLine = input_stream.getEndLine();
359    t.endColumn = input_stream.getEndColumn();
360    return t;
361 }
362
363 int curLexState = 0;
364 int defaultLexState = 0;
365 int jjnewStateCnt;
366 int jjround;
367 int jjmatchedPos;
368 int jjmatchedKind;
369
370 public Token getNextToken()
371 {
372   int kind;
373   Token specialToken = null;
374   Token matchedToken;
375   int curPos = 0;
376
377   EOFLoop :
378   for (;;)
379   {
380    try
381    {
382       curChar = input_stream.BeginToken();
383    }
384    catch(java.io.IOException JavaDoc e)
385    {
386       jjmatchedKind = 0;
387       matchedToken = jjFillToken();
388       return matchedToken;
389    }
390    image = null;
391    jjimageLen = 0;
392
393    jjmatchedKind = 0x7fffffff;
394    jjmatchedPos = 0;
395    curPos = jjMoveStringLiteralDfa0_0();
396    if (jjmatchedPos == 0 && jjmatchedKind > 15)
397    {
398       jjmatchedKind = 15;
399    }
400    if (jjmatchedKind != 0x7fffffff)
401    {
402       if (jjmatchedPos + 1 < curPos)
403          input_stream.backup(curPos - jjmatchedPos - 1);
404          matchedToken = jjFillToken();
405          TokenLexicalActions(matchedToken);
406          return matchedToken;
407    }
408    int error_line = input_stream.getEndLine();
409    int error_column = input_stream.getEndColumn();
410    String JavaDoc error_after = null;
411    boolean EOFSeen = false;
412    try { input_stream.readChar(); input_stream.backup(1); }
413    catch (java.io.IOException JavaDoc e1) {
414       EOFSeen = true;
415       error_after = curPos <= 1 ? "" : input_stream.GetImage();
416       if (curChar == '\n' || curChar == '\r') {
417          error_line++;
418          error_column = 0;
419       }
420       else
421          error_column++;
422    }
423    if (!EOFSeen) {
424       input_stream.backup(1);
425       error_after = curPos <= 1 ? "" : input_stream.GetImage();
426    }
427    throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR);
428   }
429 }
430
431 void TokenLexicalActions(Token matchedToken)
432 {
433    switch(jjmatchedKind)
434    {
435       case 1 :
436         if (image == null)
437             image = new StringBuffer JavaDoc(new String JavaDoc(input_stream.GetSuffix(jjimageLen + (lengthOfMatch = jjmatchedPos + 1))));
438          else
439             image.append(input_stream.GetSuffix(jjimageLen + (lengthOfMatch = jjmatchedPos + 1)));
440     matchedToken.image = matchedToken.image.toLowerCase();
441          break;
442       case 2 :
443         if (image == null)
444             image = new StringBuffer JavaDoc(new String JavaDoc(input_stream.GetSuffix(jjimageLen + (lengthOfMatch = jjmatchedPos + 1))));
445          else
446             image.append(input_stream.GetSuffix(jjimageLen + (lengthOfMatch = jjmatchedPos + 1)));
447                                                   // remove dots
448
for (int i = 0; i < image.length(); i++) {
449         if (image.charAt(i) == '.')
450           image.deleteCharAt(i--);
451       }
452       matchedToken.image = image.toString().toLowerCase();
453          break;
454       default :
455          break;
456    }
457 }
458 }
459
Popular Tags