1 21 22 package net.percederberg.grammatica.parser; 23 24 import java.io.StringReader ; 25 26 import junit.framework.TestCase; 27 28 34 public class TestTokenizer extends TestCase { 35 36 39 private static final int EOF = 0; 40 41 44 private static final int KEYWORD = 1; 45 46 49 private static final int IDENTIFIER = 2; 50 51 54 private static final int NUMBER = 3; 55 56 59 private static final int WHITESPACE = 4; 60 61 64 private static final int ERROR = 5; 65 66 69 public void testInvalidPattern() { 70 Tokenizer tokenizer = createTokenizer("", false); 71 TokenPattern pattern; 72 73 pattern = new TokenPattern(NUMBER, 74 "NUMBER", 75 TokenPattern.REGEXP_TYPE + 13, 76 "13"); 77 failAddPattern(tokenizer, pattern); 78 pattern = new TokenPattern(NUMBER, 79 "NUMBER", 80 TokenPattern.REGEXP_TYPE, 81 "1(3"); 82 failAddPattern(tokenizer, pattern); 83 } 84 85 88 public void testEmptyInput() { 89 Tokenizer tokenizer = createDefaultTokenizer("", false); 90 91 readToken(tokenizer, EOF); 92 } 93 94 97 public void testIgnoreTokens() { 98 Tokenizer tokenizer = createDefaultTokenizer(" 12 keyword 0 ", false); 99 100 readToken(tokenizer, NUMBER); 101 readToken(tokenizer, KEYWORD); 102 readToken(tokenizer, NUMBER); 103 readToken(tokenizer, EOF); 104 } 105 106 109 public void testErrorTokens() { 110 Tokenizer tokenizer = createDefaultTokenizer("12 error1 ", false); 111 112 readToken(tokenizer, NUMBER); 113 failReadToken(tokenizer); 114 readToken(tokenizer, NUMBER); 115 readToken(tokenizer, EOF); 116 } 117 118 121 public void testParseError() { 122 Tokenizer tokenizer = createDefaultTokenizer("12 (keyword)", false); 123 124 readToken(tokenizer, NUMBER); 125 failReadToken(tokenizer); 126 readToken(tokenizer, KEYWORD); 127 failReadToken(tokenizer); 128 readToken(tokenizer, EOF); 129 } 130 131 134 public void testTokenList() { 135 Tokenizer tokenizer = createDefaultTokenizer("12 keyword 0", false); 136 Token token; 137 138 assertEquals("default token list setting", 139 false, 140 tokenizer.getUseTokenList()); 141 tokenizer.setUseTokenList(true); 142 token = readToken(tokenizer, NUMBER); 143 readToken(tokenizer, KEYWORD); 144 readToken(tokenizer, NUMBER); 145 readToken(tokenizer, EOF); 146 assertEquals("previous token", null, token.getPreviousToken()); 147 token = token.getNextToken(); 148 assertEquals("token id", WHITESPACE, token.getId()); 149 token = token.getNextToken(); 150 assertEquals("token id", KEYWORD, token.getId()); 151 token = token.getNextToken(); 152 assertEquals("token id", WHITESPACE, token.getId()); 153 token = token.getNextToken(); 154 assertEquals("token id", NUMBER, token.getId()); 155 assertEquals("next token", null, token.getNextToken()); 156 token = token.getPreviousToken(); 157 assertEquals("token id", WHITESPACE, token.getId()); 158 token = token.getPreviousToken(); 159 assertEquals("token id", KEYWORD, token.getId()); 160 token = token.getPreviousToken(); 161 assertEquals("token id", WHITESPACE, token.getId()); 162 token = token.getPreviousToken(); 163 assertEquals("token id", NUMBER, token.getId()); 164 } 165 166 169 public void testCaseInsensitive() { 170 Tokenizer tokenizer = createDefaultTokenizer("kEyWOrd aBc ", true); 171 172 readToken(tokenizer, KEYWORD); 173 readToken(tokenizer, IDENTIFIER); 174 readToken(tokenizer, EOF); 175 } 176 177 180 public void testReset() { 181 Tokenizer tokenizer = createDefaultTokenizer(" 12 keyword 0 ", false); 182 183 readToken(tokenizer, NUMBER); 184 readToken(tokenizer, KEYWORD); 185 readToken(tokenizer, NUMBER); 186 readToken(tokenizer, EOF); 187 188 tokenizer.reset(new StringReader ("12 (keyword)")); 189 readToken(tokenizer, NUMBER); 190 failReadToken(tokenizer); 191 readToken(tokenizer, KEYWORD); 192 193 tokenizer.reset(new StringReader ("")); 194 readToken(tokenizer, EOF); 195 196 tokenizer.reset(new StringReader (" 12 keyword 0 ")); 197 readToken(tokenizer, NUMBER); 198 readToken(tokenizer, KEYWORD); 199 readToken(tokenizer, NUMBER); 200 readToken(tokenizer, EOF); 201 } 202 203 211 private Tokenizer createTokenizer(String input, boolean ignoreCase) { 212 return new Tokenizer(new StringReader (input), ignoreCase); 213 } 214 215 224 private Tokenizer createDefaultTokenizer(String input, 225 boolean ignoreCase) { 226 227 Tokenizer tokenizer = createTokenizer(input, ignoreCase); 228 TokenPattern pattern; 229 230 pattern = new TokenPattern(KEYWORD, 231 "KEYWORD", 232 TokenPattern.STRING_TYPE, 233 "keyword"); 234 addPattern(tokenizer, pattern); 235 pattern = new TokenPattern(IDENTIFIER, 236 "IDENTIFIER", 237 TokenPattern.REGEXP_TYPE, 238 "[A-Z]+"); 239 addPattern(tokenizer, pattern); 240 pattern = new TokenPattern(NUMBER, 241 "NUMBER", 242 TokenPattern.REGEXP_TYPE, 243 "[0-9]+"); 244 addPattern(tokenizer, pattern); 245 pattern = new TokenPattern(WHITESPACE, 246 "WHITESPACE", 247 TokenPattern.REGEXP_TYPE, 248 "[ \t\n]+"); 249 pattern.setIgnore(); 250 addPattern(tokenizer, pattern); 251 pattern = new TokenPattern(ERROR, 252 "ERROR", 253 TokenPattern.STRING_TYPE, 254 "error"); 255 pattern.setError(); 256 addPattern(tokenizer, pattern); 257 258 return tokenizer; 259 } 260 261 268 private void addPattern(Tokenizer tokenizer, TokenPattern pattern) { 269 try { 270 tokenizer.addPattern(pattern); 271 } catch (ParserCreationException e) { 272 fail("couldn't add pattern " + pattern.getName() + ": " + 273 e.getMessage()); 274 } 275 } 276 277 284 private void failAddPattern(Tokenizer tokenizer, TokenPattern pattern) { 285 try { 286 tokenizer.addPattern(pattern); 287 fail("could add pattern " + pattern.getName()); 288 } catch (ParserCreationException e) { 289 } 291 } 292 293 301 private Token readToken(Tokenizer tokenizer) { 302 try { 303 return tokenizer.next(); 304 } catch (ParseException e) { 305 fail("couldn't read next token: " + e.getMessage()); 306 return null; } 308 } 309 310 319 private Token readToken(Tokenizer tokenizer, int id) { 320 Token token = readToken(tokenizer); 321 322 if (id == EOF) { 323 if (token != null) { 324 fail("expected end of file, found " + token); 325 } 326 } else { 327 if (token != null) { 328 assertEquals("token id", id, token.getId()); 329 } else { 330 fail("expected " + id + ", found EOF"); 331 } 332 } 333 return token; 334 } 335 336 342 private void failReadToken(Tokenizer tokenizer) { 343 Token token; 344 345 try { 346 token = tokenizer.next(); 347 fail("could read token " + token.toString()); 348 } catch (ParseException e) { 349 } 351 } 352 } 353 | Popular Tags |