KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > percederberg > grammatica > parser > re > TestRegExp


1 /*
2  * TestRegExp.java
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 2.1
7  * of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free
16  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17  * MA 02111-1307, USA.
18  *
19  * Copyright (c) 2003-2005 Per Cederberg. All rights reserved.
20  */

21
22 package net.percederberg.grammatica.parser.re;
23
24 import java.io.IOException JavaDoc;
25
26 import junit.framework.TestCase;
27
28 /**
29  * A test case for the RegExp class.
30  *
31  * @author Per Cederberg, <per at percederberg dot net>
32  * @version 1.5
33  */

34 public class TestRegExp extends TestCase {
35
36     /**
37      * The ASCII alphabet characters.
38      */

39     private static final String JavaDoc ASCII_ALPHABET =
40         "ABCDEFGHIJKLMNOPQRSTUNWXYZabcdefghijklmnopqrstuvwxyz";
41
42     /**
43      * A set of normal characters from ISO-8859-1 .
44      */

45     private static final String JavaDoc LATIN_1_CHARACTERS =
46         "ÁÀÄÂÅÉÈËÊÍÌÏÎÓÒÖÔÕÚÙÜÛÝáàäâãéèëêíìïîóòöôõúùüûýÿ";
47
48     /**
49      * A set of symbol characters from ISO-8859-1 .
50      */

51     private static final String JavaDoc LATIN_1_SYMBOLS =
52         "§!#¤%&/=`'½@£~-_,:;©Þ®ªß«»µ¡¿²³¼¢";
53
54     /**
55      * A set of digit characters.
56      */

57     private static final String JavaDoc DIGITS =
58         "0123456789";
59
60     /**
61      * A set of whitespace characters.
62      */

63     private static final String JavaDoc WHITESPACE =
64         " \t\n\r\f\r\n\u000B";
65
66     /**
67      * Creates a new test case.
68      *
69      * @param name the test case name
70      */

71     public TestRegExp(String JavaDoc name) {
72         super(name);
73     }
74
75     /**
76      * Tests various regular expression syntax errors.
77      */

78     public void testSyntaxErrors() {
79         failCreateRegExp("");
80         failCreateRegExp("?");
81         failCreateRegExp("*");
82         failCreateRegExp("+");
83         failCreateRegExp("{0}");
84         failCreateRegExp("(");
85         failCreateRegExp(")");
86         failCreateRegExp("[ (])");
87         failCreateRegExp("+|*");
88     }
89
90     /**
91      * Tests matching of plain characters.
92      */

93     public void testCharacters() {
94         matchRegExp(ASCII_ALPHABET, ASCII_ALPHABET);
95         matchRegExp(LATIN_1_CHARACTERS, LATIN_1_CHARACTERS);
96         matchRegExp(LATIN_1_SYMBOLS, LATIN_1_SYMBOLS);
97         matchRegExp(DIGITS, DIGITS);
98         matchRegExp(WHITESPACE, WHITESPACE);
99     }
100
101     /**
102      * Tests matching of special characters.
103      */

104     public void testSpecialCharacters() {
105         matchRegExp(".*", ASCII_ALPHABET);
106         matchRegExp(".*", LATIN_1_CHARACTERS);
107         matchRegExp(".*", LATIN_1_SYMBOLS);
108         matchRegExp(".*", DIGITS);
109         matchRegExp(".*", " \t");
110         failMatchRegExp(".+", "\n");
111         failMatchRegExp(".+", "\r");
112         failMatchRegExp(".+", "\r\n");
113         failMatchRegExp(".+", "\u0085");
114         failMatchRegExp(".+", "\u2028");
115         failMatchRegExp(".+", "\u2029");
116         failCreateRegExp("^");
117         failCreateRegExp("$");
118     }
119
120     /**
121      * Tests matching of character set escape sequences.
122      */

123     public void testCharacterEscapes() {
124         matchRegExp("\\d+", DIGITS);
125         failMatchRegExp("\\d+", ASCII_ALPHABET);
126         failMatchRegExp("\\d+", WHITESPACE);
127         matchRegExp("\\D+", ASCII_ALPHABET);
128         matchRegExp("\\D+", WHITESPACE);
129         failMatchRegExp("\\D+", DIGITS);
130         matchRegExp("\\s+", WHITESPACE);
131         failMatchRegExp("\\s+", ASCII_ALPHABET);
132         matchRegExp("\\S+", ASCII_ALPHABET);
133         failMatchRegExp("\\S+", WHITESPACE);
134         matchRegExp("\\w+", ASCII_ALPHABET);
135         matchRegExp("\\w+", DIGITS);
136         matchRegExp("\\w+", "_");
137         failMatchRegExp("\\w+", WHITESPACE);
138         failMatchRegExp("\\w+", LATIN_1_CHARACTERS);
139         failMatchRegExp("\\W+", ASCII_ALPHABET);
140         failMatchRegExp("\\W+", DIGITS);
141         failMatchRegExp("\\W+", "_");
142         matchRegExp("\\W+", WHITESPACE);
143         matchRegExp("\\W+", LATIN_1_CHARACTERS);
144     }
145
146     /**
147      * Tests matching of symbol escape sequences.
148      */

149     public void testSymbolEscapes() {
150         matchRegExp("\\\\", "\\");
151         matchRegExp("\\\"", "\"");
152         matchRegExp("\\'", "'");
153         matchRegExp("\\.", ".");
154         matchRegExp("\\*", "*");
155         matchRegExp("\\+", "+");
156         matchRegExp("\\?", "?");
157         matchRegExp("\\(", "(");
158         matchRegExp("\\)", ")");
159         matchRegExp("\\{", "{");
160         matchRegExp("\\}", "}");
161         matchRegExp("\\[", "[");
162         matchRegExp("\\]", "]");
163         matchRegExp("\\@", "@");
164         matchRegExp("\\<", "<");
165         matchRegExp("\\>", ">");
166         matchRegExp("\\$", "$");
167         matchRegExp("\\%", "%");
168         matchRegExp("\\&", "&");
169     }
170
171     /**
172      * Tests matching of control escape sequences.
173      */

174     public void testControlEscapes() {
175         matchRegExp("\\t", "\t");
176         matchRegExp("\\n", "\n");
177         matchRegExp("\\r", "\r");
178         matchRegExp("\\f", "\f");
179         matchRegExp("\\a", "\u0007");
180         matchRegExp("\\e", "\u001B");
181     }
182
183     /**
184      * Tests matching of octal escape sequences.
185      */

186     public void testOctalEscapes() {
187         failCreateRegExp("\\0");
188         matchRegExp("\\01", "\01");
189         matchRegExp("\\012", "\012");
190         matchRegExp("\\0101", "A");
191         matchRegExp("\\01174", "O4");
192         matchRegExp("\\0117a", "Oa");
193         matchRegExp("\\018", "\018");
194         matchRegExp("\\0118", "\0118");
195         failCreateRegExp("\\08");
196         failCreateRegExp("\\043");
197         failCreateRegExp("\\0432");
198     }
199
200     /**
201      * Tests matching of hexadecimal escape sequences.
202      */

203     public void testHexEscapes() {
204         failCreateRegExp("\\x");
205         failCreateRegExp("\\x1");
206         failCreateRegExp("\\x1g");
207         matchRegExp("\\x41", "A");
208         matchRegExp("\\x4f", "O");
209         matchRegExp("\\xABC", "\u00ABC");
210     }
211
212     /**
213      * Tests matching of unicode escape sequences.
214      */

215     public void testUnicodeEscapes() {
216         failCreateRegExp("\\u");
217         failCreateRegExp("\\u1");
218         failCreateRegExp("\\u11");
219         failCreateRegExp("\\u111");
220         failCreateRegExp("\\u111g");
221         matchRegExp("\\u0041", "A");
222         matchRegExp("\\u004f", "O");
223         matchRegExp("\\u00ABC", "\u00ABC");
224     }
225
226     /**
227      * Tests matching of invalid escape characters.
228      */

229     public void testInvalidEscapes() {
230         failCreateRegExp("\\A");
231         failCreateRegExp("\\B");
232         failCreateRegExp("\\C");
233         failCreateRegExp("\\E");
234         failCreateRegExp("\\F");
235         failCreateRegExp("\\G");
236         failCreateRegExp("\\H");
237         failCreateRegExp("\\I");
238         failCreateRegExp("\\J");
239         failCreateRegExp("\\K");
240         failCreateRegExp("\\L");
241         failCreateRegExp("\\M");
242         failCreateRegExp("\\N");
243         failCreateRegExp("\\O");
244         failCreateRegExp("\\P");
245         failCreateRegExp("\\Q");
246         failCreateRegExp("\\R");
247         failCreateRegExp("\\T");
248         failCreateRegExp("\\U");
249         failCreateRegExp("\\V");
250         failCreateRegExp("\\X");
251         failCreateRegExp("\\Y");
252         failCreateRegExp("\\Z");
253         failCreateRegExp("\\b");
254         failCreateRegExp("\\c");
255         failCreateRegExp("\\g");
256         failCreateRegExp("\\h");
257         failCreateRegExp("\\i");
258         failCreateRegExp("\\j");
259         failCreateRegExp("\\k");
260         failCreateRegExp("\\l");
261         failCreateRegExp("\\m");
262         failCreateRegExp("\\o");
263         failCreateRegExp("\\p");
264         failCreateRegExp("\\q");
265         failCreateRegExp("\\u");
266         failCreateRegExp("\\v");
267         failCreateRegExp("\\y");
268         failCreateRegExp("\\z");
269     }
270
271     /**
272      * Tests matching of character sets.
273      */

274     public void testCharacterSet() {
275         matchRegExp("[ab]", "a");
276         matchRegExp("[ab]", "b");
277         failMatchRegExp("[ab]", "c");
278         failMatchRegExp("[^ab]", "a");
279         failMatchRegExp("[^ab]", "b");
280         matchRegExp("[^ab]", "c");
281         matchRegExp("[A-Za-z]+", ASCII_ALPHABET);
282         failMatchRegExp("[A-Za-z]+", DIGITS);
283         failMatchRegExp("[A-Za-z]+", WHITESPACE);
284         failMatchRegExp("[^A-Za-z]+", ASCII_ALPHABET);
285         matchRegExp("[^A-Za-z]+", DIGITS);
286         matchRegExp("[^A-Za-z]+", WHITESPACE);
287         matchRegExp("[.]", ".");
288         failMatchRegExp("[.]", "a");
289         matchRegExp("[a-]+", "a-");
290         matchRegExp("[-a]+", "a-");
291         matchRegExp("[a-]+", "ab", "a");
292         matchRegExp("[ \\t\\n\\r\\f\\x0B]*", WHITESPACE);
293     }
294
295     /**
296      * Tests matching of various greedy quantifiers.
297      */

298     public void testGreedyQuantifiers() {
299         matchRegExp("a?", "");
300         matchRegExp("a?", "a");
301         matchRegExp("a?", "aaaa", "a");
302         matchRegExp("a*", "");
303         matchRegExp("a*", "aaaa");
304         failMatchRegExp("a+", "");
305         matchRegExp("a+", "a");
306         matchRegExp("a+", "aaaa");
307         failCreateRegExp("a{0}");
308         failMatchRegExp("a{3}", "aa");
309         matchRegExp("a{3}", "aaa");
310         matchRegExp("a{3}", "aaaa", "aaa");
311         failMatchRegExp("a{3,}", "aa");
312         matchRegExp("a{3,}", "aaa");
313         matchRegExp("a{3,}", "aaaaa");
314         failMatchRegExp("a{2,3}", "a");
315         matchRegExp("a{2,3}", "aa");
316         matchRegExp("a{2,3}", "aaa");
317         matchRegExp("a{2,3}", "aaaa", "aaa");
318     }
319
320     /**
321      * Tests matching of various reluctant quantifiers.
322      */

323     public void testReluctantQuantifiers() {
324         matchRegExp("a??", "");
325         matchRegExp("a??", "a", "");
326         matchRegExp("a*?", "");
327         matchRegExp("a*?", "aaaa", "");
328         failMatchRegExp("a+?", "");
329         matchRegExp("a+?", "a");
330         matchRegExp("a+?", "aaaa", "a");
331         failMatchRegExp("a{3}?", "aa");
332         failCreateRegExp("a{0}?");
333         matchRegExp("a{3}?", "aaa");
334         matchRegExp("a{3}?", "aaaa", "aaa");
335         failMatchRegExp("a{3,}?", "aa");
336         matchRegExp("a{3,}?", "aaa");
337         matchRegExp("a{3,}?", "aaaaa", "aaa");
338         failMatchRegExp("a{2,3}?", "a");
339         matchRegExp("a{2,3}?", "aa");
340         matchRegExp("a{2,3}?", "aaa", "aa");
341         matchRegExp("a{2,3}?", "aaaa", "aa");
342     }
343
344     /**
345      * Tests matching of various possessive quantifiers.
346      */

347     public void testPossessiveQuantifiers() {
348         matchRegExp("a?+", "");
349         matchRegExp("a?+", "a");
350         matchRegExp("a*+", "");
351         matchRegExp("a*+", "aaaa");
352         failMatchRegExp("a++", "");
353         matchRegExp("a++", "a");
354         matchRegExp("a++", "aaaa");
355         failMatchRegExp("a{3}+", "aa");
356         failCreateRegExp("a{0}+");
357         matchRegExp("a{3}+", "aaa");
358         matchRegExp("a{3}+", "aaaa", "aaa");
359         failMatchRegExp("a{3,}+", "aa");
360         matchRegExp("a{3,}+", "aaa");
361         matchRegExp("a{3,}+", "aaaaa", "aaaaa");
362         failMatchRegExp("a{2,3}+", "a");
363         matchRegExp("a{2,3}+", "aa");
364         matchRegExp("a{2,3}+", "aaa");
365         matchRegExp("a{2,3}+", "aaaa", "aaa");
366     }
367
368     /**
369      * Tests the backtracking over the quantifier matches.
370      */

371     public void testQuantifierBacktracking() {
372         matchRegExp("a?a", "a");
373         matchRegExp("a*a", "aaaa");
374         matchRegExp("a*aaaa", "aaaa");
375         failMatchRegExp("a*aaaa", "aaa");
376         matchRegExp("a+a", "aaaa");
377         matchRegExp("a+aaa", "aaaa");
378         failMatchRegExp("a+aaaa", "aaaa");
379         failMatchRegExp("a{3,}a", "aaa");
380         matchRegExp("a{3,}a", "aaaaa");
381         matchRegExp("a{2,3}a", "aaa");
382         failMatchRegExp("a{2,3}a", "aa");
383         matchRegExp("a??b", "ab");
384         matchRegExp("a*?b", "aaab");
385         matchRegExp("a+?b", "aaab");
386         matchRegExp("a{3,}?b", "aaaaab");
387         matchRegExp("a{2,3}?b", "aaab");
388         failMatchRegExp("a?+a", "a");
389         failMatchRegExp("a*+a", "aaaa");
390         failMatchRegExp("a++a", "aaaa");
391         failMatchRegExp("a{3,}+a", "aaaaa");
392         failMatchRegExp("a{2,3}+a", "aaa");
393     }
394
395     /**
396      * Tests the quantifier backtracking for stack overflows.
397      * (Bug #3632)
398      */

399     public void testQuantifierStackOverflow() {
400         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
401         String JavaDoc str;
402
403         for (int i = 0; i < 4096; i++) {
404             buffer.append("a");
405         }
406         str = buffer.toString();
407         matchRegExp("a*" + str, str);
408         failMatchRegExp("a*a" + str, str);
409         matchRegExp("a*?b", str + "b");
410         failMatchRegExp("a*?b", str);
411         matchRegExp("a*+", str);
412         failMatchRegExp("a*+a", str);
413     }
414
415     /**
416      * Tests matching of various logical operators.
417      */

418     public void testLogicalOperators() {
419         matchRegExp("a|ab|b", "a");
420         matchRegExp("a|ab|b", "b");
421         matchRegExp("a|ab|b", "ab");
422         matchRegExp("(ab)", "ab");
423         matchRegExp("(a)(b)", "ab");
424     }
425
426     /**
427      * Tests the regular expression operator associativity.
428      */

429     public void testAssociativity() {
430         matchRegExp("ab?c", "ac");
431         failMatchRegExp("ab?c", "c");
432         matchRegExp("aa|b", "aa");
433         failMatchRegExp("aa|b", "ab");
434         matchRegExp("ab|bc", "ab");
435         matchRegExp("ab|bc", "bc");
436         matchRegExp("(a|b)c", "ac");
437         matchRegExp("(a|b)c", "bc");
438         failMatchRegExp("(a|b)c", "abc");
439     }
440
441     /**
442      * Tests matching of various complex expressions.
443      */

444     public void testComplex() {
445         matchRegExp("a*-", "aa-");
446         matchRegExp("([) ])+", ") ))");
447         matchRegExp("a*a*aa", "aa");
448         matchRegExp("(a*)*aa", "aaaa");
449         matchRegExp("a+a+aa", "aaaa");
450         matchRegExp("(a+)+aa", "aaaa");
451     }
452
453     /**
454      * Tests resetting the matcher with another input string.
455      */

456     public void testReset() {
457         Matcher m;
458
459         try {
460             m = createRegExp("a*aa").matcher("a");
461             if (m.matchFromBeginning()) {
462                 fail("found invalid match '" + m.toString() +
463                      "' to regexp 'a*aa' in input 'a'");
464             }
465             m.reset("aaaa");
466             if (!m.matchFromBeginning()) {
467                 fail("couldn't match 'aaaa' to regexp 'a*aa'");
468             } else if (!m.toString().equals("aaaa")) {
469                 fail("incorrect match for 'a*aa', found: '" +
470                      m.toString() + "', expected: 'aaaa'");
471             }
472             m = createRegExp("a*?b").matcher("aaa");
473             if (m.matchFromBeginning()) {
474                 fail("found invalid match '" + m.toString() +
475                      "' to regexp 'a*?b' in input 'aaa'");
476             }
477             m.reset("aaaaab");
478             if (!m.matchFromBeginning()) {
479                 fail("couldn't match 'aaaaab' to regexp 'a*?b'");
480             } else if (!m.toString().equals("aaaaab")) {
481                 fail("incorrect match for 'a*?b', found: '" +
482                      m.toString() + "', expected: 'aaaaab'");
483             }
484         } catch (IOException JavaDoc e) {
485             fail("io error: " + e.getMessage());
486         }
487     }
488
489     /**
490      * Creates a new regular expression. If the expression couldn't be
491      * parsed correctly, a test failure will be reported.
492      *
493      * @param pattern the pattern to use
494      *
495      * @return the newly created regular expression
496      */

497     private RegExp createRegExp(String JavaDoc pattern) {
498         try {
499             return new RegExp(pattern, false);
500         } catch (RegExpException e) {
501             fail("couldn't create regular expression '" + pattern +
502                  "': " + e.getMessage());
503             return null;
504         }
505     }
506
507     /**
508      * Checks that a specified regular expression pattern is
509      * erroneous. If the regular expression class doesn't detect the
510      * error, a test failure will be reported.
511      *
512      * @param pattern the pattern to check
513      */

514     private void failCreateRegExp(String JavaDoc pattern) {
515         try {
516             new RegExp(pattern, false);
517             fail("regular expression '" + pattern + "' could be " +
518                  "created although it isn't valid");
519         } catch (RegExpException e) {
520             // Failure was expected
521
}
522     }
523
524     /**
525      * Checks that a specified regular expression matches an input
526      * string. The whole input string must be matched by the regular
527      * expression. This method will report a failure if the regular
528      * expression couldn't be created or if the match wasn't exact.
529      *
530      * @param pattern the regular expression to check
531      * @param input the input and match string
532      */

533     private void matchRegExp(String JavaDoc pattern, String JavaDoc input) {
534         matchRegExp(pattern, input, input);
535     }
536
537     /**
538      * Checks that a specified regular expression matches an input
539      * string. The exact match is compared to a specified match. This
540      * method will report a failure if the regular expression couldn't
541      * be created or if the match wasn't exact.
542      *
543      * @param pattern the regular expression to check
544      * @param input the input string
545      * @param match the match string
546      */

547     private void matchRegExp(String JavaDoc pattern, String JavaDoc input, String JavaDoc match) {
548         RegExp r = createRegExp(pattern);
549         Matcher m = r.matcher(input);
550
551         try {
552             if (!m.matchFromBeginning()) {
553                 fail("couldn't match '" + input + "' to regexp '" +
554                      pattern + "'");
555             } else if (!match.equals(m.toString())) {
556                 fail("incorrect match for '" + pattern + "', found: '" +
557                      m.toString() + "', expected: '" + match + "'");
558             }
559         } catch (IOException JavaDoc e) {
560             fail("io error: " + e.getMessage());
561         }
562     }
563
564     /**
565      * Checks that a specified regular expression does not match the
566      * input string. This method will report a failure if the regular
567      * expression couldn't be created or if a match was found.
568      *
569      * @param pattern the regular expression to check
570      * @param input the input and match string
571      */

572     private void failMatchRegExp(String JavaDoc pattern, String JavaDoc input) {
573         RegExp r = createRegExp(pattern);
574         Matcher m = r.matcher(input);
575
576         try {
577             if (m.matchFromBeginning()) {
578                 fail("found invalid match '" + m.toString() +
579                      "' to regexp '" + pattern + "' in input '" +
580                      input + "'");
581             }
582         } catch (IOException JavaDoc e) {
583             fail("io error: " + e.getMessage());
584         }
585     }
586 }
587
Popular Tags