KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > percederberg > grammatica > parser > TestTokenizer


1 /*
2  * TestTokenizer.java
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 2.1
7  * of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free
16  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17  * MA 02111-1307, USA.
18  *
19  * Copyright (c) 2003-2005 Per Cederberg. All rights reserved.
20  */

21
22 package net.percederberg.grammatica.parser;
23
24 import java.io.StringReader JavaDoc;
25
26 import junit.framework.TestCase;
27
28 /**
29  * A test case for the Tokenizer class.
30  *
31  * @author Per Cederberg, <per at percederberg dot net>
32  * @version 1.5
33  */

34 public class TestTokenizer extends TestCase {
35
36     /**
37      * The end of file token identifier.
38      */

39     private static final int EOF = 0;
40
41     /**
42      * The keyword token identifier.
43      */

44     private static final int KEYWORD = 1;
45
46     /**
47      * The identifier token identifier.
48      */

49     private static final int IDENTIFIER = 2;
50
51     /**
52      * The number token identifier.
53      */

54     private static final int NUMBER = 3;
55
56     /**
57      * The whitespace token identifier.
58      */

59     private static final int WHITESPACE = 4;
60
61     /**
62      * The error token identifier.
63      */

64     private static final int ERROR = 5;
65
66     /**
67      * Test various invalid patterns.
68      */

69     public void testInvalidPattern() {
70         Tokenizer tokenizer = createTokenizer("", false);
71         TokenPattern pattern;
72
73         pattern = new TokenPattern(NUMBER,
74                                    "NUMBER",
75                                    TokenPattern.REGEXP_TYPE + 13,
76                                    "13");
77         failAddPattern(tokenizer, pattern);
78         pattern = new TokenPattern(NUMBER,
79                                    "NUMBER",
80                                    TokenPattern.REGEXP_TYPE,
81                                    "1(3");
82         failAddPattern(tokenizer, pattern);
83     }
84
85     /**
86      * Tests the tokenizer with empty input.
87      */

88     public void testEmptyInput() {
89         Tokenizer tokenizer = createDefaultTokenizer("", false);
90
91         readToken(tokenizer, EOF);
92     }
93
94     /**
95      * Tests the ignored tokens.
96      */

97     public void testIgnoreTokens() {
98         Tokenizer tokenizer = createDefaultTokenizer(" 12 keyword 0 ", false);
99
100         readToken(tokenizer, NUMBER);
101         readToken(tokenizer, KEYWORD);
102         readToken(tokenizer, NUMBER);
103         readToken(tokenizer, EOF);
104     }
105
106     /**
107      * Tests the ignored tokens.
108      */

109     public void testErrorTokens() {
110         Tokenizer tokenizer = createDefaultTokenizer("12 error1 ", false);
111
112         readToken(tokenizer, NUMBER);
113         failReadToken(tokenizer);
114         readToken(tokenizer, NUMBER);
115         readToken(tokenizer, EOF);
116     }
117
118     /**
119      * Test the parse error recovery.
120      */

121     public void testParseError() {
122         Tokenizer tokenizer = createDefaultTokenizer("12 (keyword)", false);
123
124         readToken(tokenizer, NUMBER);
125         failReadToken(tokenizer);
126         readToken(tokenizer, KEYWORD);
127         failReadToken(tokenizer);
128         readToken(tokenizer, EOF);
129     }
130
131     /**
132      * Tests the token list functions.
133      */

134     public void testTokenList() {
135         Tokenizer tokenizer = createDefaultTokenizer("12 keyword 0", false);
136         Token token;
137
138         assertEquals("default token list setting",
139                      false,
140                      tokenizer.getUseTokenList());
141         tokenizer.setUseTokenList(true);
142         token = readToken(tokenizer, NUMBER);
143         readToken(tokenizer, KEYWORD);
144         readToken(tokenizer, NUMBER);
145         readToken(tokenizer, EOF);
146         assertEquals("previous token", null, token.getPreviousToken());
147         token = token.getNextToken();
148         assertEquals("token id", WHITESPACE, token.getId());
149         token = token.getNextToken();
150         assertEquals("token id", KEYWORD, token.getId());
151         token = token.getNextToken();
152         assertEquals("token id", WHITESPACE, token.getId());
153         token = token.getNextToken();
154         assertEquals("token id", NUMBER, token.getId());
155         assertEquals("next token", null, token.getNextToken());
156         token = token.getPreviousToken();
157         assertEquals("token id", WHITESPACE, token.getId());
158         token = token.getPreviousToken();
159         assertEquals("token id", KEYWORD, token.getId());
160         token = token.getPreviousToken();
161         assertEquals("token id", WHITESPACE, token.getId());
162         token = token.getPreviousToken();
163         assertEquals("token id", NUMBER, token.getId());
164     }
165
166     /**
167      * Tests the case-insensitive mode.
168      */

169     public void testCaseInsensitive() {
170         Tokenizer tokenizer = createDefaultTokenizer("kEyWOrd aBc ", true);
171
172         readToken(tokenizer, KEYWORD);
173         readToken(tokenizer, IDENTIFIER);
174         readToken(tokenizer, EOF);
175     }
176
177     /**
178      * Tests resetting the tokenizer with different input streams.
179      */

180     public void testReset() {
181         Tokenizer tokenizer = createDefaultTokenizer(" 12 keyword 0 ", false);
182
183         readToken(tokenizer, NUMBER);
184         readToken(tokenizer, KEYWORD);
185         readToken(tokenizer, NUMBER);
186         readToken(tokenizer, EOF);
187
188         tokenizer.reset(new StringReader JavaDoc("12 (keyword)"));
189         readToken(tokenizer, NUMBER);
190         failReadToken(tokenizer);
191         readToken(tokenizer, KEYWORD);
192
193         tokenizer.reset(new StringReader JavaDoc(""));
194         readToken(tokenizer, EOF);
195
196         tokenizer.reset(new StringReader JavaDoc(" 12 keyword 0 "));
197         readToken(tokenizer, NUMBER);
198         readToken(tokenizer, KEYWORD);
199         readToken(tokenizer, NUMBER);
200         readToken(tokenizer, EOF);
201     }
202
203     /**
204      * Creates a new tokenizer.
205      *
206      * @param input the input string
207      * @param ignoreCase the character case ignore flag
208      *
209      * @return a new tokenizer
210      */

211     private Tokenizer createTokenizer(String JavaDoc input, boolean ignoreCase) {
212         return new Tokenizer(new StringReader JavaDoc(input), ignoreCase);
213     }
214
215     /**
216      * Creates a new default tokenizer that recognizes a trivial
217      * language.
218      *
219      * @param input the input string
220      * @param ignoreCase the character case ignore flag
221      *
222      * @return a new tokenizer
223      */

224     private Tokenizer createDefaultTokenizer(String JavaDoc input,
225                                              boolean ignoreCase) {
226
227         Tokenizer tokenizer = createTokenizer(input, ignoreCase);
228         TokenPattern pattern;
229
230         pattern = new TokenPattern(KEYWORD,
231                                    "KEYWORD",
232                                    TokenPattern.STRING_TYPE,
233                                    "keyword");
234         addPattern(tokenizer, pattern);
235         pattern = new TokenPattern(IDENTIFIER,
236                                    "IDENTIFIER",
237                                    TokenPattern.REGEXP_TYPE,
238                                    "[A-Z]+");
239         addPattern(tokenizer, pattern);
240         pattern = new TokenPattern(NUMBER,
241                                    "NUMBER",
242                                    TokenPattern.REGEXP_TYPE,
243                                    "[0-9]+");
244         addPattern(tokenizer, pattern);
245         pattern = new TokenPattern(WHITESPACE,
246                                    "WHITESPACE",
247                                    TokenPattern.REGEXP_TYPE,
248                                    "[ \t\n]+");
249         pattern.setIgnore();
250         addPattern(tokenizer, pattern);
251         pattern = new TokenPattern(ERROR,
252                                    "ERROR",
253                                    TokenPattern.STRING_TYPE,
254                                    "error");
255         pattern.setError();
256         addPattern(tokenizer, pattern);
257
258         return tokenizer;
259     }
260
261     /**
262      * Adds a pattern to the tokenizer and reports a test failure if
263      * it failed.
264      *
265      * @param tokenizer the tokenizer
266      * @param pattern the pattern to add
267      */

268     private void addPattern(Tokenizer tokenizer, TokenPattern pattern) {
269         try {
270             tokenizer.addPattern(pattern);
271         } catch (ParserCreationException e) {
272             fail("couldn't add pattern " + pattern.getName() + ": " +
273                  e.getMessage());
274         }
275     }
276
277     /**
278      * Adds a pattern to the tokenizer and reports a test failure if
279      * it failed.
280      *
281      * @param tokenizer the tokenizer
282      * @param pattern the pattern to add
283      */

284     private void failAddPattern(Tokenizer tokenizer, TokenPattern pattern) {
285         try {
286             tokenizer.addPattern(pattern);
287             fail("could add pattern " + pattern.getName());
288         } catch (ParserCreationException e) {
289             // Failure was expected
290
}
291     }
292
293     /**
294      * Reads the next token. This method reports a test failure if a
295      * token couldn't be read.
296      *
297      * @param tokenizer the tokenizer to use
298      *
299      * @return the token read
300      */

301     private Token readToken(Tokenizer tokenizer) {
302         try {
303             return tokenizer.next();
304         } catch (ParseException e) {
305             fail("couldn't read next token: " + e.getMessage());
306             return null; // Unreachable
307
}
308     }
309
310     /**
311      * Reads the next token and checks it's id. This method reports a
312      * test failure if the right token couldn't be read.
313      *
314      * @param tokenizer the tokenizer to use
315      * @param id the expected token id
316      *
317      * @return the token read
318      */

319     private Token readToken(Tokenizer tokenizer, int id) {
320         Token token = readToken(tokenizer);
321
322         if (id == EOF) {
323             if (token != null) {
324                 fail("expected end of file, found " + token);
325             }
326         } else {
327             if (token != null) {
328                 assertEquals("token id", id, token.getId());
329             } else {
330                 fail("expected " + id + ", found EOF");
331             }
332         }
333         return token;
334     }
335
336     /**
337      * Fails to read the next token. This method reports a test
338      * failure if a token could be read.
339      *
340      * @param tokenizer the tokenizer to use
341      */

342     private void failReadToken(Tokenizer tokenizer) {
343         Token token;
344
345         try {
346             token = tokenizer.next();
347             fail("could read token " + token.toString());
348         } catch (ParseException e) {
349             // Failure was expected
350
}
351     }
352 }
353
Popular Tags