1 21 22 package net.percederberg.grammatica.parser.re; 23 24 import java.io.IOException ; 25 26 import junit.framework.TestCase; 27 28 34 public class TestRegExp extends TestCase { 35 36 39 private static final String ASCII_ALPHABET = 40 "ABCDEFGHIJKLMNOPQRSTUNWXYZabcdefghijklmnopqrstuvwxyz"; 41 42 45 private static final String LATIN_1_CHARACTERS = 46 "ÁÀÄÂÅÉÈËÊÍÌÏÎÓÒÖÔÕÚÙÜÛÝáàäâãéèëêíìïîóòöôõúùüûýÿ"; 47 48 51 private static final String LATIN_1_SYMBOLS = 52 "§!#¤%&/=`'½@£~-_,:;©Þ®ªß«»µ¡¿²³¼¢"; 53 54 57 private static final String DIGITS = 58 "0123456789"; 59 60 63 private static final String WHITESPACE = 64 " \t\n\r\f\r\n\u000B"; 65 66 71 public TestRegExp(String name) { 72 super(name); 73 } 74 75 78 public void testSyntaxErrors() { 79 failCreateRegExp(""); 80 failCreateRegExp("?"); 81 failCreateRegExp("*"); 82 failCreateRegExp("+"); 83 failCreateRegExp("{0}"); 84 failCreateRegExp("("); 85 failCreateRegExp(")"); 86 failCreateRegExp("[ (])"); 87 failCreateRegExp("+|*"); 88 } 89 90 93 public void testCharacters() { 94 matchRegExp(ASCII_ALPHABET, ASCII_ALPHABET); 95 matchRegExp(LATIN_1_CHARACTERS, LATIN_1_CHARACTERS); 96 matchRegExp(LATIN_1_SYMBOLS, LATIN_1_SYMBOLS); 97 matchRegExp(DIGITS, DIGITS); 98 matchRegExp(WHITESPACE, WHITESPACE); 99 } 100 101 104 public void testSpecialCharacters() { 105 matchRegExp(".*", ASCII_ALPHABET); 106 matchRegExp(".*", LATIN_1_CHARACTERS); 107 matchRegExp(".*", LATIN_1_SYMBOLS); 108 matchRegExp(".*", DIGITS); 109 matchRegExp(".*", " \t"); 110 failMatchRegExp(".+", "\n"); 111 failMatchRegExp(".+", "\r"); 112 failMatchRegExp(".+", "\r\n"); 113 failMatchRegExp(".+", "\u0085"); 114 failMatchRegExp(".+", "\u2028"); 115 failMatchRegExp(".+", "\u2029"); 116 failCreateRegExp("^"); 117 failCreateRegExp("$"); 118 } 119 120 123 public void testCharacterEscapes() { 124 matchRegExp("\\d+", DIGITS); 125 failMatchRegExp("\\d+", ASCII_ALPHABET); 126 failMatchRegExp("\\d+", WHITESPACE); 127 matchRegExp("\\D+", ASCII_ALPHABET); 128 matchRegExp("\\D+", WHITESPACE); 129 failMatchRegExp("\\D+", DIGITS); 130 matchRegExp("\\s+", WHITESPACE); 131 failMatchRegExp("\\s+", ASCII_ALPHABET); 132 matchRegExp("\\S+", ASCII_ALPHABET); 133 failMatchRegExp("\\S+", WHITESPACE); 134 matchRegExp("\\w+", ASCII_ALPHABET); 135 matchRegExp("\\w+", DIGITS); 136 matchRegExp("\\w+", "_"); 137 failMatchRegExp("\\w+", WHITESPACE); 138 failMatchRegExp("\\w+", LATIN_1_CHARACTERS); 139 failMatchRegExp("\\W+", ASCII_ALPHABET); 140 failMatchRegExp("\\W+", DIGITS); 141 failMatchRegExp("\\W+", "_"); 142 matchRegExp("\\W+", WHITESPACE); 143 matchRegExp("\\W+", LATIN_1_CHARACTERS); 144 } 145 146 149 public void testSymbolEscapes() { 150 matchRegExp("\\\\", "\\"); 151 matchRegExp("\\\"", "\""); 152 matchRegExp("\\'", "'"); 153 matchRegExp("\\.", "."); 154 matchRegExp("\\*", "*"); 155 matchRegExp("\\+", "+"); 156 matchRegExp("\\?", "?"); 157 matchRegExp("\\(", "("); 158 matchRegExp("\\)", ")"); 159 matchRegExp("\\{", "{"); 160 matchRegExp("\\}", "}"); 161 matchRegExp("\\[", "["); 162 matchRegExp("\\]", "]"); 163 matchRegExp("\\@", "@"); 164 matchRegExp("\\<", "<"); 165 matchRegExp("\\>", ">"); 166 matchRegExp("\\$", "$"); 167 matchRegExp("\\%", "%"); 168 matchRegExp("\\&", "&"); 169 } 170 171 174 public void testControlEscapes() { 175 matchRegExp("\\t", "\t"); 176 matchRegExp("\\n", "\n"); 177 matchRegExp("\\r", "\r"); 178 matchRegExp("\\f", "\f"); 179 matchRegExp("\\a", "\u0007"); 180 matchRegExp("\\e", "\u001B"); 181 } 182 183 186 public void testOctalEscapes() { 187 failCreateRegExp("\\0"); 188 matchRegExp("\\01", "\01"); 189 matchRegExp("\\012", "\012"); 190 matchRegExp("\\0101", "A"); 191 matchRegExp("\\01174", "O4"); 192 matchRegExp("\\0117a", "Oa"); 193 matchRegExp("\\018", "\018"); 194 matchRegExp("\\0118", "\0118"); 195 failCreateRegExp("\\08"); 196 failCreateRegExp("\\043"); 197 failCreateRegExp("\\0432"); 198 } 199 200 203 public void testHexEscapes() { 204 failCreateRegExp("\\x"); 205 failCreateRegExp("\\x1"); 206 failCreateRegExp("\\x1g"); 207 matchRegExp("\\x41", "A"); 208 matchRegExp("\\x4f", "O"); 209 matchRegExp("\\xABC", "\u00ABC"); 210 } 211 212 215 public void testUnicodeEscapes() { 216 failCreateRegExp("\\u"); 217 failCreateRegExp("\\u1"); 218 failCreateRegExp("\\u11"); 219 failCreateRegExp("\\u111"); 220 failCreateRegExp("\\u111g"); 221 matchRegExp("\\u0041", "A"); 222 matchRegExp("\\u004f", "O"); 223 matchRegExp("\\u00ABC", "\u00ABC"); 224 } 225 226 229 public void testInvalidEscapes() { 230 failCreateRegExp("\\A"); 231 failCreateRegExp("\\B"); 232 failCreateRegExp("\\C"); 233 failCreateRegExp("\\E"); 234 failCreateRegExp("\\F"); 235 failCreateRegExp("\\G"); 236 failCreateRegExp("\\H"); 237 failCreateRegExp("\\I"); 238 failCreateRegExp("\\J"); 239 failCreateRegExp("\\K"); 240 failCreateRegExp("\\L"); 241 failCreateRegExp("\\M"); 242 failCreateRegExp("\\N"); 243 failCreateRegExp("\\O"); 244 failCreateRegExp("\\P"); 245 failCreateRegExp("\\Q"); 246 failCreateRegExp("\\R"); 247 failCreateRegExp("\\T"); 248 failCreateRegExp("\\U"); 249 failCreateRegExp("\\V"); 250 failCreateRegExp("\\X"); 251 failCreateRegExp("\\Y"); 252 failCreateRegExp("\\Z"); 253 failCreateRegExp("\\b"); 254 failCreateRegExp("\\c"); 255 failCreateRegExp("\\g"); 256 failCreateRegExp("\\h"); 257 failCreateRegExp("\\i"); 258 failCreateRegExp("\\j"); 259 failCreateRegExp("\\k"); 260 failCreateRegExp("\\l"); 261 failCreateRegExp("\\m"); 262 failCreateRegExp("\\o"); 263 failCreateRegExp("\\p"); 264 failCreateRegExp("\\q"); 265 failCreateRegExp("\\u"); 266 failCreateRegExp("\\v"); 267 failCreateRegExp("\\y"); 268 failCreateRegExp("\\z"); 269 } 270 271 274 public void testCharacterSet() { 275 matchRegExp("[ab]", "a"); 276 matchRegExp("[ab]", "b"); 277 failMatchRegExp("[ab]", "c"); 278 failMatchRegExp("[^ab]", "a"); 279 failMatchRegExp("[^ab]", "b"); 280 matchRegExp("[^ab]", "c"); 281 matchRegExp("[A-Za-z]+", ASCII_ALPHABET); 282 failMatchRegExp("[A-Za-z]+", DIGITS); 283 failMatchRegExp("[A-Za-z]+", WHITESPACE); 284 failMatchRegExp("[^A-Za-z]+", ASCII_ALPHABET); 285 matchRegExp("[^A-Za-z]+", DIGITS); 286 matchRegExp("[^A-Za-z]+", WHITESPACE); 287 matchRegExp("[.]", "."); 288 failMatchRegExp("[.]", "a"); 289 matchRegExp("[a-]+", "a-"); 290 matchRegExp("[-a]+", "a-"); 291 matchRegExp("[a-]+", "ab", "a"); 292 matchRegExp("[ \\t\\n\\r\\f\\x0B]*", WHITESPACE); 293 } 294 295 298 public void testGreedyQuantifiers() { 299 matchRegExp("a?", ""); 300 matchRegExp("a?", "a"); 301 matchRegExp("a?", "aaaa", "a"); 302 matchRegExp("a*", ""); 303 matchRegExp("a*", "aaaa"); 304 failMatchRegExp("a+", ""); 305 matchRegExp("a+", "a"); 306 matchRegExp("a+", "aaaa"); 307 failCreateRegExp("a{0}"); 308 failMatchRegExp("a{3}", "aa"); 309 matchRegExp("a{3}", "aaa"); 310 matchRegExp("a{3}", "aaaa", "aaa"); 311 failMatchRegExp("a{3,}", "aa"); 312 matchRegExp("a{3,}", "aaa"); 313 matchRegExp("a{3,}", "aaaaa"); 314 failMatchRegExp("a{2,3}", "a"); 315 matchRegExp("a{2,3}", "aa"); 316 matchRegExp("a{2,3}", "aaa"); 317 matchRegExp("a{2,3}", "aaaa", "aaa"); 318 } 319 320 323 public void testReluctantQuantifiers() { 324 matchRegExp("a??", ""); 325 matchRegExp("a??", "a", ""); 326 matchRegExp("a*?", ""); 327 matchRegExp("a*?", "aaaa", ""); 328 failMatchRegExp("a+?", ""); 329 matchRegExp("a+?", "a"); 330 matchRegExp("a+?", "aaaa", "a"); 331 failMatchRegExp("a{3}?", "aa"); 332 failCreateRegExp("a{0}?"); 333 matchRegExp("a{3}?", "aaa"); 334 matchRegExp("a{3}?", "aaaa", "aaa"); 335 failMatchRegExp("a{3,}?", "aa"); 336 matchRegExp("a{3,}?", "aaa"); 337 matchRegExp("a{3,}?", "aaaaa", "aaa"); 338 failMatchRegExp("a{2,3}?", "a"); 339 matchRegExp("a{2,3}?", "aa"); 340 matchRegExp("a{2,3}?", "aaa", "aa"); 341 matchRegExp("a{2,3}?", "aaaa", "aa"); 342 } 343 344 347 public void testPossessiveQuantifiers() { 348 matchRegExp("a?+", ""); 349 matchRegExp("a?+", "a"); 350 matchRegExp("a*+", ""); 351 matchRegExp("a*+", "aaaa"); 352 failMatchRegExp("a++", ""); 353 matchRegExp("a++", "a"); 354 matchRegExp("a++", "aaaa"); 355 failMatchRegExp("a{3}+", "aa"); 356 failCreateRegExp("a{0}+"); 357 matchRegExp("a{3}+", "aaa"); 358 matchRegExp("a{3}+", "aaaa", "aaa"); 359 failMatchRegExp("a{3,}+", "aa"); 360 matchRegExp("a{3,}+", "aaa"); 361 matchRegExp("a{3,}+", "aaaaa", "aaaaa"); 362 failMatchRegExp("a{2,3}+", "a"); 363 matchRegExp("a{2,3}+", "aa"); 364 matchRegExp("a{2,3}+", "aaa"); 365 matchRegExp("a{2,3}+", "aaaa", "aaa"); 366 } 367 368 371 public void testQuantifierBacktracking() { 372 matchRegExp("a?a", "a"); 373 matchRegExp("a*a", "aaaa"); 374 matchRegExp("a*aaaa", "aaaa"); 375 failMatchRegExp("a*aaaa", "aaa"); 376 matchRegExp("a+a", "aaaa"); 377 matchRegExp("a+aaa", "aaaa"); 378 failMatchRegExp("a+aaaa", "aaaa"); 379 failMatchRegExp("a{3,}a", "aaa"); 380 matchRegExp("a{3,}a", "aaaaa"); 381 matchRegExp("a{2,3}a", "aaa"); 382 failMatchRegExp("a{2,3}a", "aa"); 383 matchRegExp("a??b", "ab"); 384 matchRegExp("a*?b", "aaab"); 385 matchRegExp("a+?b", "aaab"); 386 matchRegExp("a{3,}?b", "aaaaab"); 387 matchRegExp("a{2,3}?b", "aaab"); 388 failMatchRegExp("a?+a", "a"); 389 failMatchRegExp("a*+a", "aaaa"); 390 failMatchRegExp("a++a", "aaaa"); 391 failMatchRegExp("a{3,}+a", "aaaaa"); 392 failMatchRegExp("a{2,3}+a", "aaa"); 393 } 394 395 399 public void testQuantifierStackOverflow() { 400 StringBuffer buffer = new StringBuffer (); 401 String str; 402 403 for (int i = 0; i < 4096; i++) { 404 buffer.append("a"); 405 } 406 str = buffer.toString(); 407 matchRegExp("a*" + str, str); 408 failMatchRegExp("a*a" + str, str); 409 matchRegExp("a*?b", str + "b"); 410 failMatchRegExp("a*?b", str); 411 matchRegExp("a*+", str); 412 failMatchRegExp("a*+a", str); 413 } 414 415 418 public void testLogicalOperators() { 419 matchRegExp("a|ab|b", "a"); 420 matchRegExp("a|ab|b", "b"); 421 matchRegExp("a|ab|b", "ab"); 422 matchRegExp("(ab)", "ab"); 423 matchRegExp("(a)(b)", "ab"); 424 } 425 426 429 public void testAssociativity() { 430 matchRegExp("ab?c", "ac"); 431 failMatchRegExp("ab?c", "c"); 432 matchRegExp("aa|b", "aa"); 433 failMatchRegExp("aa|b", "ab"); 434 matchRegExp("ab|bc", "ab"); 435 matchRegExp("ab|bc", "bc"); 436 matchRegExp("(a|b)c", "ac"); 437 matchRegExp("(a|b)c", "bc"); 438 failMatchRegExp("(a|b)c", "abc"); 439 } 440 441 444 public void testComplex() { 445 matchRegExp("a*-", "aa-"); 446 matchRegExp("([) ])+", ") ))"); 447 matchRegExp("a*a*aa", "aa"); 448 matchRegExp("(a*)*aa", "aaaa"); 449 matchRegExp("a+a+aa", "aaaa"); 450 matchRegExp("(a+)+aa", "aaaa"); 451 } 452 453 456 public void testReset() { 457 Matcher m; 458 459 try { 460 m = createRegExp("a*aa").matcher("a"); 461 if (m.matchFromBeginning()) { 462 fail("found invalid match '" + m.toString() + 463 "' to regexp 'a*aa' in input 'a'"); 464 } 465 m.reset("aaaa"); 466 if (!m.matchFromBeginning()) { 467 fail("couldn't match 'aaaa' to regexp 'a*aa'"); 468 } else if (!m.toString().equals("aaaa")) { 469 fail("incorrect match for 'a*aa', found: '" + 470 m.toString() + "', expected: 'aaaa'"); 471 } 472 m = createRegExp("a*?b").matcher("aaa"); 473 if (m.matchFromBeginning()) { 474 fail("found invalid match '" + m.toString() + 475 "' to regexp 'a*?b' in input 'aaa'"); 476 } 477 m.reset("aaaaab"); 478 if (!m.matchFromBeginning()) { 479 fail("couldn't match 'aaaaab' to regexp 'a*?b'"); 480 } else if (!m.toString().equals("aaaaab")) { 481 fail("incorrect match for 'a*?b', found: '" + 482 m.toString() + "', expected: 'aaaaab'"); 483 } 484 } catch (IOException e) { 485 fail("io error: " + e.getMessage()); 486 } 487 } 488 489 497 private RegExp createRegExp(String pattern) { 498 try { 499 return new RegExp(pattern, false); 500 } catch (RegExpException e) { 501 fail("couldn't create regular expression '" + pattern + 502 "': " + e.getMessage()); 503 return null; 504 } 505 } 506 507 514 private void failCreateRegExp(String pattern) { 515 try { 516 new RegExp(pattern, false); 517 fail("regular expression '" + pattern + "' could be " + 518 "created although it isn't valid"); 519 } catch (RegExpException e) { 520 } 522 } 523 524 533 private void matchRegExp(String pattern, String input) { 534 matchRegExp(pattern, input, input); 535 } 536 537 547 private void matchRegExp(String pattern, String input, String match) { 548 RegExp r = createRegExp(pattern); 549 Matcher m = r.matcher(input); 550 551 try { 552 if (!m.matchFromBeginning()) { 553 fail("couldn't match '" + input + "' to regexp '" + 554 pattern + "'"); 555 } else if (!match.equals(m.toString())) { 556 fail("incorrect match for '" + pattern + "', found: '" + 557 m.toString() + "', expected: '" + match + "'"); 558 } 559 } catch (IOException e) { 560 fail("io error: " + e.getMessage()); 561 } 562 } 563 564 572 private void failMatchRegExp(String pattern, String input) { 573 RegExp r = createRegExp(pattern); 574 Matcher m = r.matcher(input); 575 576 try { 577 if (m.matchFromBeginning()) { 578 fail("found invalid match '" + m.toString() + 579 "' to regexp '" + pattern + "' in input '" + 580 input + "'"); 581 } 582 } catch (IOException e) { 583 fail("io error: " + e.getMessage()); 584 } 585 } 586 } 587 | Popular Tags |