KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > analysis > standard > StandardTokenizer


1 /* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */
2 package org.apache.lucene.analysis.standard;
3
4 import java.io.*;
5
6 /** A grammar-based tokenizer constructed with JavaCC.
7  *
8  * <p> This should be a good tokenizer for most European-language documents:
9  *
10  * <ul>
11  * <li>Splits words at punctuation characters, removing punctuation. However, a
12  * dot that's not followed by whitespace is considered part of a token.
13  * <li>Splits words at hyphens, unless there's a number in the token, in which case
14  * the whole token is interpreted as a product number and is not split.
15  * <li>Recognizes email addresses and internet hostnames as one token.
16  * </ul>
17  *
18  * <p>Many applications have specific tokenizer needs. If this tokenizer does
19  * not suit your application, please consider copying this source code
20  * directory to your project and maintaining your own grammar-based tokenizer.
21  */

22 public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer implements StandardTokenizerConstants {
23
24   /** Constructs a tokenizer for this Reader. */
25   public StandardTokenizer(Reader reader) {
26     this(new FastCharStream(reader));
27     this.input = reader;
28   }
29
30 /** Returns the next token in the stream, or null at EOS.
31  * <p>The returned token's type is set to an element of {@link
32  * StandardTokenizerConstants#tokenImage}.
33  */

34   final public org.apache.lucene.analysis.Token next() throws ParseException, IOException {
35   Token token = null;
36     switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
37     case ALPHANUM:
38       token = jj_consume_token(ALPHANUM);
39       break;
40     case APOSTROPHE:
41       token = jj_consume_token(APOSTROPHE);
42       break;
43     case ACRONYM:
44       token = jj_consume_token(ACRONYM);
45       break;
46     case COMPANY:
47       token = jj_consume_token(COMPANY);
48       break;
49     case EMAIL:
50       token = jj_consume_token(EMAIL);
51       break;
52     case HOST:
53       token = jj_consume_token(HOST);
54       break;
55     case NUM:
56       token = jj_consume_token(NUM);
57       break;
58     case CJ:
59       token = jj_consume_token(CJ);
60       break;
61     case 0:
62       token = jj_consume_token(0);
63       break;
64     default:
65       jj_la1[0] = jj_gen;
66       jj_consume_token(-1);
67       throw new ParseException();
68     }
69       if (token.kind == EOF) {
70         {if (true) return null;}
71       } else {
72         {if (true) return
73           new org.apache.lucene.analysis.Token(token.image,
74                                         token.beginColumn,token.endColumn,
75                                         tokenImage[token.kind]);}
76       }
77     throw new Error JavaDoc("Missing return statement in function");
78   }
79
80   public StandardTokenizerTokenManager token_source;
81   public Token token, jj_nt;
82   private int jj_ntk;
83   private int jj_gen;
84   final private int[] jj_la1 = new int[1];
85   static private int[] jj_la1_0;
86   static {
87       jj_la1_0();
88    }
89    private static void jj_la1_0() {
90       jj_la1_0 = new int[] {0x10ff,};
91    }
92
93   public StandardTokenizer(CharStream stream) {
94     token_source = new StandardTokenizerTokenManager(stream);
95     token = new Token();
96     jj_ntk = -1;
97     jj_gen = 0;
98     for (int i = 0; i < 1; i++) jj_la1[i] = -1;
99   }
100
101   public void ReInit(CharStream stream) {
102     token_source.ReInit(stream);
103     token = new Token();
104     jj_ntk = -1;
105     jj_gen = 0;
106     for (int i = 0; i < 1; i++) jj_la1[i] = -1;
107   }
108
109   public StandardTokenizer(StandardTokenizerTokenManager tm) {
110     token_source = tm;
111     token = new Token();
112     jj_ntk = -1;
113     jj_gen = 0;
114     for (int i = 0; i < 1; i++) jj_la1[i] = -1;
115   }
116
117   public void ReInit(StandardTokenizerTokenManager tm) {
118     token_source = tm;
119     token = new Token();
120     jj_ntk = -1;
121     jj_gen = 0;
122     for (int i = 0; i < 1; i++) jj_la1[i] = -1;
123   }
124
125   final private Token jj_consume_token(int kind) throws ParseException {
126     Token oldToken;
127     if ((oldToken = token).next != null) token = token.next;
128     else token = token.next = token_source.getNextToken();
129     jj_ntk = -1;
130     if (token.kind == kind) {
131       jj_gen++;
132       return token;
133     }
134     token = oldToken;
135     jj_kind = kind;
136     throw generateParseException();
137   }
138
139   final public Token getNextToken() {
140     if (token.next != null) token = token.next;
141     else token = token.next = token_source.getNextToken();
142     jj_ntk = -1;
143     jj_gen++;
144     return token;
145   }
146
147   final public Token getToken(int index) {
148     Token t = token;
149     for (int i = 0; i < index; i++) {
150       if (t.next != null) t = t.next;
151       else t = t.next = token_source.getNextToken();
152     }
153     return t;
154   }
155
156   final private int jj_ntk() {
157     if ((jj_nt=token.next) == null)
158       return (jj_ntk = (token.next=token_source.getNextToken()).kind);
159     else
160       return (jj_ntk = jj_nt.kind);
161   }
162
163   private java.util.Vector JavaDoc jj_expentries = new java.util.Vector JavaDoc();
164   private int[] jj_expentry;
165   private int jj_kind = -1;
166
167   public ParseException generateParseException() {
168     jj_expentries.removeAllElements();
169     boolean[] la1tokens = new boolean[16];
170     for (int i = 0; i < 16; i++) {
171       la1tokens[i] = false;
172     }
173     if (jj_kind >= 0) {
174       la1tokens[jj_kind] = true;
175       jj_kind = -1;
176     }
177     for (int i = 0; i < 1; i++) {
178       if (jj_la1[i] == jj_gen) {
179         for (int j = 0; j < 32; j++) {
180           if ((jj_la1_0[i] & (1<<j)) != 0) {
181             la1tokens[j] = true;
182           }
183         }
184       }
185     }
186     for (int i = 0; i < 16; i++) {
187       if (la1tokens[i]) {
188         jj_expentry = new int[1];
189         jj_expentry[0] = i;
190         jj_expentries.addElement(jj_expentry);
191       }
192     }
193     int[][] exptokseq = new int[jj_expentries.size()][];
194     for (int i = 0; i < jj_expentries.size(); i++) {
195       exptokseq[i] = (int[])jj_expentries.elementAt(i);
196     }
197     return new ParseException(token, exptokseq, tokenImage);
198   }
199
200   final public void enable_tracing() {
201   }
202
203   final public void disable_tracing() {
204   }
205
206 }
207
Popular Tags