KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > jfun > parsec > pattern > Patterns


1 /*****************************************************************************
2  * Copyright (C) Zephyr Business Solutions Corp. All rights reserved. *
3  * ------------------------------------------------------------------------- *
4  * The software in this package is published under the terms of the BSD *
5  * style license a copy of which has been included with this distribution in *
6  * the LICENSE.txt file. *
7  *****************************************************************************/

8 /*
9  * Created on Dec 16, 2004
10  *
11  * Author Ben Yu
12  */

13 package jfun.parsec.pattern;
14
15 import java.util.regex.Matcher JavaDoc;
16
17
18 /**
19  * This class provides all the basic Pattern implementations and all Pattern combinators.
20  * @author Ben Yu
21  *
22  * Dec 16, 2004
23  */

24 public final class Patterns {
25   
26   
27   /**
28    * @deprecated Use {@link #hasAtLeast(int)} instead.
29    */

30   public static Pattern chars_ge(int l){
31     return hasAtLeast(l);
32   }
33   /**
34    * @deprecated Use {@link #hasExact(int)} instead.
35    */

36   public static Pattern chars_eq(int l){
37     return hasExact(l);
38   }
39   /**
40    * Ensures the input has at least l characters left.
41    * match length is l if succeed.
42    * @param l the number of characters.
43    * @return the Pattern object.
44    */

45   public static Pattern hasAtLeast(final int l){
46     return new Pattern(){
47       public int match(final CharSequence JavaDoc src, final int len,
48           final int from){
49         if(from+l > len) return Pattern.MISMATCH;
50         else return l;
51       }
52     };
53   }
54   /**
55    * Ensures the input has exactly l characters left.
56    * match length is l if succeed.
57    * @param l the number of characters.
58    * @return the Pattern object.
59    */

60   public static Pattern hasExact(final int l){
61     return new Pattern(){
62       public int match(final CharSequence JavaDoc src, final int len,
63           final int from){
64         if(from+l != len) return Pattern.MISMATCH;
65         else return l;
66       }
67     };
68   }
69   /**
70    * Ensures the input has no character left.
71    * match length is 0 if succeed.
72    * @return the Pattern object.
73    */

74   public static Pattern eof(){
75     return hasExact(0);
76   }
77   /**
78    * Succeed with match length 1
79    * if the current character in the input is same as character c.
80    * Mismatch otherwise.
81    * @param c the character to compare with.
82    * @return the Pattern object.
83    */

84   public static Pattern isChar(final char c){
85     return new Pattern(){
86       public int match(final CharSequence JavaDoc src, final int len,
87           final int from){
88         if(from>=len) return Pattern.MISMATCH;
89         else if(src.charAt(from) != c) return Pattern.MISMATCH;
90         else return 1;
91       }
92     };
93   }
94   /**
95    * Succeed with match length 1
96    * if the current character in the input is between character c1 and c2.
97    * @param c1 the first character.
98    * @param c2 the second character.
99    * @return the Pattern object.
100    */

101   public static Pattern range(final char c1, final char c2){
102     return new Pattern(){
103       public int match(final CharSequence JavaDoc src, final int len,
104           final int from){
105         if(from>=len) return Pattern.MISMATCH;
106         final char c = src.charAt(from);
107         if(c >= c1 && c <= c2) return 1;
108         else return Pattern.MISMATCH;
109       }
110     };
111   }
112   /**
113    * Succeed with match length 1
114    * if the current character in the input is not between character c1 and c2.
115    * @param c1 the first character.
116    * @param c2 the second character.
117    * @return the Pattern object.
118    */

119   public static Pattern notRange(final char c1, final char c2){
120     return new Pattern(){
121       public int match(final CharSequence JavaDoc src, final int len,
122           final int from){
123         if(from>=len) return Pattern.MISMATCH;
124         final char c = src.charAt(from);
125         if(c >= c1 && c <= c2) return Pattern.MISMATCH;
126         else return 1;
127       }
128     };
129   }
130   /**
131    * Succeed with match length 1
132    * if the current character in the input is among the given characters.
133    * @param cs the characters to compare with.
134    * @return the Pattern object.
135    */

136   public static Pattern among(final char[] cs){
137     return isChar(CharPredicates.among(cs));
138   }
139   /**
140    * Succeed with match length 1
141    * if the current character in the input is not among the given characters.
142    * @param cs the characters to compare with.
143    * @return the Pattern object.
144    */

145   public static Pattern notAmong(final char[] cs){
146     return isChar(CharPredicates.notAmong(cs));
147   }
148   /**
149    * Succeed with match length 1
150    * if the current character in the input is not the same as character c.
151    * Mismatch otherwise.
152    * @param c the character to compare with.
153    * @return the Pattern object.
154    */

155   public static Pattern notChar(final char c){
156     return new Pattern(){
157       public int match(final CharSequence JavaDoc src, final int len,
158           final int from){
159         if(from>=len) return Pattern.MISMATCH;
160         else if(src.charAt(from) == c) return Pattern.MISMATCH;
161         else return 1;
162       }
163     };
164   }
165   /**
166    * Succeed with match length 1
167    * if the current character in the input satisfies the given predicate.
168    * Mismatch otherwise.
169    * @param cp the predicate object.
170    * @return the Pattern object.
171    */

172   public static Pattern isChar(final CharPredicate cp){
173     return new Pattern(){
174       public int match(final CharSequence JavaDoc src, final int len,
175           final int from){
176         if(from>=len) return Pattern.MISMATCH;
177         else if(cp.isChar(src.charAt(from))) return 1;
178         else return Pattern.MISMATCH;
179       }
180       public String JavaDoc toString(){
181         return ""+cp;
182       }
183     };
184   }
185   /**
186    * Succeed with match length 2
187    * if there are at least 2 characters in the input and the first character is '\'
188    * Mismatch otherwise.
189    * @return the Pattern object.
190    */

191   public static Pattern isEscaped(){
192     return new Pattern(){
193       public int match(final CharSequence JavaDoc src, final int len,
194           final int from){
195         if(from>=len-1) return Pattern.MISMATCH;
196         else if(src.charAt(from)=='\\') return 2;
197         else return Pattern.MISMATCH;
198       }
199     };
200   }
201
202   /**
203    * Matches a line comment that starts with a string
204    * and end with EOF or Line Feed character.
205    * @param open the line comment starting string.
206    * @return the Pattern object.
207    */

208   public static Pattern isLineComment(final String JavaDoc open){
209     return seq(isString(open), many(CharPredicates.notChar('\n')));
210   }
211   /**
212    * Matches a string.
213    * @return the Pattern object.
214    */

215   public static Pattern isString(final String JavaDoc str){
216     return new Pattern(){
217       public int match(final CharSequence JavaDoc src, final int len,
218           final int from){
219         if(len - from < str.length()) return MISMATCH;
220         return matchString(str, src, len, from);
221       }
222       public String JavaDoc toString(){
223         return str;
224       }
225     };
226   }
227   /**
228    * Matches a string case insensitively.
229    * @return the Pattern object.
230    */

231   public static Pattern isStringCI(final String JavaDoc str){
232     return new Pattern(){
233       public int match(final CharSequence JavaDoc src, final int len,
234           final int from){
235         if(len - from < str.length()) return MISMATCH;
236         return matchStringCI(str, src, len, from);
237       }
238     };
239   }
240   /**
241    * Matches a character if the input has at least 1 character
242    * and does not match the given string.
243    * @return the Pattern object.
244    */

245   public static Pattern notString(final String JavaDoc str){
246     return new Pattern(){
247       public int match(final CharSequence JavaDoc src, final int len,
248           final int from){
249         if(from >= len) return MISMATCH;
250         if(matchString(str, src, len, from) == Pattern.MISMATCH)
251           return 1;
252         else return MISMATCH;
253       }
254     };
255   }
256   /**
257    * Matches a character if the input has at least 1 character
258    * and does not match the given string case insensitively.
259    * @return the Pattern object.
260    */

261   public static Pattern notStringCI(final String JavaDoc str){
262     return new Pattern(){
263       public int match(final CharSequence JavaDoc src, final int len,
264           final int from){
265         if(from >= len) return MISMATCH;
266         if(matchStringCI(str, src, len, from) == Pattern.MISMATCH)
267           return 1;
268         else return MISMATCH;
269       }
270     };
271   }
272
273   private static boolean compareIgnoreCase(final char a, final char b){
274     return Character.toLowerCase(a) == Character.toLowerCase(b);
275   }
276
277   private static int matchString(String JavaDoc str,
278       final CharSequence JavaDoc src, final int len,
279       final int from){
280     final int slen = str.length();
281     if(len - from < slen) return Pattern.MISMATCH;
282     for(int i=0; i<slen; i++){
283       final char exp = str.charAt(i);
284       final char enc = src.charAt(from+i);
285       if(exp != enc){
286         return Pattern.MISMATCH;
287       }
288     }
289     return slen;
290   }
291
292   private static int matchStringCI(String JavaDoc str,
293       final CharSequence JavaDoc src, final int len,
294       final int from){
295     final int slen = str.length();
296     if(len - from < slen) return Pattern.MISMATCH;
297     for(int i=0; i<slen; i++){
298       final char exp = str.charAt(i);
299       final char enc = src.charAt(from+i);
300       if(!compareIgnoreCase(exp, enc)){
301         return Pattern.MISMATCH;
302       }
303     }
304     return slen;
305   }
306
307   /**
308    * Matches with match length 0 if the Pattern object pp mismatch.
309    * Mismatch otherwise.
310    * @param pp the Pattern object.
311    * @return the new Pattern object.
312    */

313   public static Pattern not(final Pattern pp){
314     return new Pattern(){
315       public int match(final CharSequence JavaDoc src, final int len,
316           final int from){
317         if(pp.match(src,len,from)!=Pattern.MISMATCH) return Pattern.MISMATCH;
318         else return 0;
319       }
320     };
321   }
322   /**
323    * Matches with match length 0 if the Pattern object pp matches.
324    * Mismatch otherwise.
325    * @param pp the Pattern object.
326    * @return the new Pattern object.
327    */

328   public static Pattern peek(final Pattern pp){
329     return new Pattern(){
330       public int match(final CharSequence JavaDoc src, final int len,
331           final int from){
332         if(pp.match(src,len,from)==Pattern.MISMATCH) return Pattern.MISMATCH;
333         else return 0;
334       }
335     };
336   }
337   /**
338    * if the first Pattern object pp1 mismatches, try the second Pattern object pp2.
339    * @param pp1 the 1st Pattern object.
340    * @param pp2 the 2nd Pattern object.
341    * @return the new Pattern object.
342    */

343   public static Pattern or(final Pattern pp1, final Pattern pp2){
344     return new Pattern(){
345       public int match(final CharSequence JavaDoc src, final int len,
346           final int from){
347         final int l1 = pp1.match(src,len,from);
348         if(l1!=Pattern.MISMATCH) return l1;
349         else return pp2.match(src, len, from);
350       }
351     };
352   }
353
354   /**
355    * Find the match length that matches
356    * all of the patterns in the given Pattern object array.
357    * Mismatch if any one mismatches.
358    * @param pps the Pattern array.
359    * @return the new Pattern object.
360    */

361   public static Pattern and(final Pattern...pps){
362     if(pps.length==0) return always();
363     if(pps.length==1) return pps[0];
364     return _and(pps);
365   }
366   /**
367    * First matches Pattern object pp1.
368    * If succeed, match the remaining input against Pattern pp2.
369    * Fails if either pp1 or pp2 fails.
370    * Succeed with the entire match length,
371    * which is the sum of the match length of pp1 and pp2.
372    * @param pp1 the 1st Pattern object to match.
373    * @param pp2 the 2nd Pattern object to match.
374    * @return the new Pattern object.
375    */

376   public static Pattern seq(final Pattern pp1, final Pattern pp2){
377     return new Pattern(){
378       public int match(final CharSequence JavaDoc src, final int len,
379           final int from){
380         final int l1 = pp1.match(src,len,from);
381         if(l1==Pattern.MISMATCH) return l1;
382         final int l2 = pp2.match(src, len, from+l1);
383         if(l2==Pattern.MISMATCH) return l2;
384         return l1+l2;
385       }
386     };
387   }
388   /**
389    * try an array of Pattern objects subsequently until one matches.
390    * Mismatch if the array is empty.
391    * @param pps the Pattern object array.
392    * @return the new Pattern object.
393    */

394   public static Pattern or(final Pattern... pps){
395     if(pps.length==0) return never();
396     else if(pps.length==1) return pps[0];
397     return _or(pps);
398   }
399
400   /**
401    * Runs an array of Pattern objects subsequently until one mismatches.
402    * Return the total match length if all succeed.
403    * @param pps the Pattern object array.
404    * @return the new Pattern object.
405    */

406   public static Pattern seq(final Pattern... pps){
407     if(pps.length==0) return always();
408     else if(pps.length==1) return pps[0];
409     return _seq(pps);
410   }
411
412   /**
413    * Matches if the input has at least n characters
414    * and the first n characters all satisfy the given predicate.
415    * @param n the number of characters to test.
416    * @param cp the predicate object.
417    * @return the Pattern object.
418    */

419   public static Pattern repeat(final int n, final CharPredicate cp){
420     if(n==0) return always();
421     if(n==1) return isChar(cp);
422     return new Pattern(){
423       public int match(final CharSequence JavaDoc src, final int len,
424           final int from){
425         return match_repeat(n, cp, src, len, from, 0);
426       }
427     };
428   }
429   /**
430    * Matches if the input n occurrences of Pattern pp.
431    * @param n the number of occurrences.
432    * @param pp the Pattern object.
433    * @return the new Pattern object.
434    */

435   public static Pattern repeat(final int n, final Pattern pp){
436     if(n==0) return always();
437     if(n==1) return pp;
438     return new Pattern(){
439       public int match(final CharSequence JavaDoc src, final int len,
440           final int from){
441         return match_repeat(n, pp, src, len, from, 0);
442       }
443     };
444   }
445   private static int min(int a, int b){
446     return a>b?b:a;
447   }
448   /**
449    * Matches if the input starts with min or more characters
450    * that all satisfy the given predicate,
451    * mismatch otherwise.
452    * @param min the minimal number of characters to match.
453    * @param cp the predicate.
454    * @return the Pattern object.
455    */

456   public static Pattern many(final int min, final CharPredicate cp){
457     if(min<0) throw new IllegalArgumentException JavaDoc("min<0");
458     return new Pattern(){
459       public int match(final CharSequence JavaDoc src, final int len,
460           final int from){
461         final int minlen = match_repeat(min, cp, src, len, from, 0);
462         if(minlen==MISMATCH) return MISMATCH;
463         return match_many(cp, src, len, from+minlen, minlen);
464       }
465     };
466   }
467   /**
468    * Matches 0 or more characters that all satisfy the given predicate.
469    * @param cp the predicate.
470    * @return the Pattern object.
471    */

472   public static Pattern many(final CharPredicate cp){
473     return new Pattern(){
474       public int match(final CharSequence JavaDoc src, final int len,
475           final int from){
476         return match_many(cp, src, len, from, 0);
477       }
478     };
479   }
480   /**
481    * Matches if the input starts with min or more occurrences of
482    * patterns recognized by Pattern object pp,
483    * mismatch otherwise.
484    * @param min the minimal number of occurrences to match.
485    * @param pp the Pattern object.
486    * @return the new Pattern object.
487    */

488   public static Pattern many(final int min, final Pattern pp){
489     if(min<0) throw new IllegalArgumentException JavaDoc("min<0");
490     return new Pattern(){
491       public int match(final CharSequence JavaDoc src, final int len,
492           final int from){
493         final int minlen = match_repeat(min, pp, src, len, from, 0);
494         if(MISMATCH==minlen) return MISMATCH;
495         return match_many(pp, src, len, from+minlen, minlen);
496       }
497     };
498   }
499   /**
500    * Matches 0 or more occurrences of
501    * patterns recognized by Pattern object pp.
502    * @param pp the Pattern object.
503    * @return the new Pattern object.
504    */

505   public static Pattern many(final Pattern pp){
506     return new Pattern(){
507       public int match(final CharSequence JavaDoc src, final int len,
508           final int from){
509         return match_many(pp, src, len, from, 0);
510       }
511     };
512   }
513   /**
514    * Matches at least min and at most max number of characters
515    * that satisfies the given predicate,
516    * mismatch otherwise.
517    * @param min the minimal number of characters.
518    * @param max the maximal number of characters.
519    * @param cp the predicate.
520    * @return the Pattern object.
521    */

522   public static Pattern some(final int min, final int max, final CharPredicate cp){
523     if(max<0 || min <0 || min > max) throw new IllegalArgumentException JavaDoc();
524     if(max == 0) return always();
525     return new Pattern(){
526       public int match(final CharSequence JavaDoc src, final int len,
527           final int from){
528         final int minlen = match_repeat(min, cp, src, len, from, 0);
529         if(minlen==MISMATCH) return MISMATCH;
530         return match_some(max-min, cp, src, len, from+minlen, minlen);
531       }
532     };
533   }
534   /**
535    * Matches at most max number of characters
536    * that satisfies the given predicate.
537    * @param max the maximal number of characters.
538    * @param cp the predicate.
539    * @return the Pattern object.
540    */

541   public static Pattern some(final int max, final CharPredicate cp){
542     if(max < 0) throw new IllegalArgumentException JavaDoc("max<0");
543     if(max == 0) return always();
544     return new Pattern(){
545       public int match(final CharSequence JavaDoc src, final int len,
546           final int from){
547         return match_some(max, cp, src, len, from, 0);
548       }
549     };
550   }
551   /**
552    * Matches at least min and at most max number of occurrences
553    * of pattern recognized by Pattern object pp,
554    * mismatch otherwise.
555    * @param min the minimal number of occurrences of pattern.
556    * @param max the maximal number of occurrences of pattern.
557    * @param pp the Pattern object.
558    * @return the new Pattern object.
559    */

560   public static Pattern some(final int min, final int max, final Pattern pp){
561     if(min<0 || max<0 || min>max) throw new IllegalArgumentException JavaDoc();
562     if(max == 0) return always();
563     return new Pattern(){
564       public int match(final CharSequence JavaDoc src, final int len,
565           final int from){
566         final int minlen = match_repeat(min, pp, src, len, from, 0);
567         if(MISMATCH==minlen) return MISMATCH;
568         return match_some(max-min, pp, src, len, from+minlen, minlen);
569       }
570     };
571   }
572   /**
573    * Matches at most max number of occurrences
574    * of pattern recognized by Pattern object pp.
575    * @param max the maximal number of occurrences of pattern.
576    * @param pp the Pattern object.
577    * @return the new Pattern object.
578    */

579   public static Pattern some(final int max, final Pattern pp){
580     if(max<0) throw new IllegalArgumentException JavaDoc("max<0");
581     if(max == 0) return always();
582     return new Pattern(){
583       public int match(final CharSequence JavaDoc src, final int len,
584           final int from){
585         return match_some(max, pp, src, len, from, 0);
586       }
587     };
588   }
589   /**
590    * Try two pattern objects, pick the one with the longer match length.
591    * If two pattern objects have the same length, the first one is favored.
592    * @param p1 the 1st pattern object.
593    * @param p2 the 2nd pattern object.
594    * @return the new Pattern object.
595    */

596   public static Pattern longer(final Pattern p1, final Pattern p2){
597     return longest(p1, p2);
598   }
599   /**
600    * Try an array of pattern objects, pick the one with the longest match length.
601    * If two pattern objects have the same length, the first one is favored.
602    * @param pps the array of Pattern objects.
603    * @return the new Pattern object.
604    */

605   public static Pattern longest(final Pattern... pps){
606     if(pps.length==0) return never();
607     if(pps.length==1) return pps[0];
608     return new Pattern(){
609       public int match(final CharSequence JavaDoc src, final int len,
610           final int from){
611         int r = MISMATCH;
612         for(int i=0;i<pps.length;i++){
613           final int l = pps[i].match(src,len,from);
614           if(l > r) r = l;
615         }
616         return r;
617       }
618     };
619   }
620   /**
621    * Try two pattern objects, pick the one with the shorter match length.
622    * If two pattern objects have the same length, the first one is favored.
623    * @param p1 the 1st pattern object.
624    * @param p2 the 2nd pattern object.
625    * @return the new Pattern object.
626    */

627   public static Pattern shorter(final Pattern p1, final Pattern p2){
628     return shortest(p1, p2);
629   }
630   /**
631    * Try an array of pattern objects, pick the one with the shortest match length.
632    * If two pattern objects have the same length, the first one is favored.
633    * @param pps the array of Pattern objects.
634    * @return the new Pattern object.
635    */

636   public static Pattern shortest(final Pattern... pps){
637     if(pps.length==0) return never();
638     if(pps.length==1) return pps[0];
639     return new Pattern(){
640       public int match(final CharSequence JavaDoc src, final int len,
641           final int from){
642         int r = MISMATCH;
643         for(int i=0;i<pps.length;i++){
644           final int l = pps[i].match(src,len,from);
645           if(l != MISMATCH){
646             if(r==MISMATCH || l < r)
647               r = l;
648           }
649         }
650         return r;
651       }
652     };
653   }
654   /**
655    * If the condiction Pattern object cond matches,
656    * match the remaining input against Pattern object yes.
657    * Otherwise, match the input against Pattern object no.
658    * @param cond the condition Pattern.
659    * @param yes the true Pattern.
660    * @param no the false Pattern.
661    * @return the new Pattern object.
662    */

663   public static Pattern ifelse(final Pattern cond, final Pattern yes, final Pattern no){
664     return new Pattern(){
665       public int match(final CharSequence JavaDoc src, final int len,
666           final int from){
667         final int lc = cond.match(src, len, from);
668         if(lc==MISMATCH){
669           return no.match(src, len, from);
670         }
671         else{
672           final int ly = yes.match(src, len, from+lc);
673           if(ly==MISMATCH) return MISMATCH;
674           else return lc+ly;
675         }
676       }
677     };
678   }
679   /**
680    * Matches characters that satisfies the given predicate
681    * for 1 or more times.
682    * Return the total match length.
683    * @return the new Pattern object.
684    */

685   public static Pattern many1(final CharPredicate cp){
686     return many(1, cp);
687   }
688   /**
689    * Match with 0 length even if Pattern object pp mismatches.
690    * @return the new Pattern object.
691    */

692   public static Pattern optional(final Pattern pp){
693     return new Pattern(){
694       public int match(final CharSequence JavaDoc src, final int len,
695           final int from){
696         final int l= pp.match(src, len, from);
697         return (l==Pattern.MISMATCH)?0:l;
698       }
699     };
700   }
701
702   /**
703    * A Pattern object that always returns MISMATCH.
704    * @return the Pattern object.
705    */

706   public static Pattern never(){
707     return _never;
708   }
709   /**
710    * A Pattern object that always matches with 0 length.
711    * @return the Pattern object.
712    */

713   public static Pattern always(){
714     return _always;
715   }
716   
717   
718   
719   /**
720    * a decimal number that has at least one number before the decimal point.
721    * the decimal point and the numbers to the right are optional.
722    * 0, 11., 2.3 are all good candidates. While .1, . are not.
723    * @return the Pattern object.
724    */

725   public static Pattern isDecimalL(){
726     final CharPredicate cp = CharPredicates.isDigit();
727     return seq(many1(cp),
728         optional(
729             seq(isChar('.'), many(cp))
730         )
731     );
732   }
733   /**
734    * Recognizes a decimal point and 1 or more digits after it.
735    * @return the Pattern object.
736    */

737   public static Pattern isDecimalR(){
738     return seq(isChar('.'), many1(CharPredicates.isDigit()));
739   }
740   /**
741    * Recognizes a decimal number that can start with a decimal point.
742    * @return the Pattern object.
743    */

744   public static Pattern isDecimal(){
745     return or(isDecimalL(), isDecimalR());
746   }
747   /**
748    * a pattern for a standard english word.
749    * it starts with an underscore or an alphametic character, followed by 0 or more alphanumeric characters.
750    * @return the Pattern object.
751    */

752   public static Pattern isWord(){
753     /*
754     return seq(isChar(CharPredicates.isAlpha_()),
755         many(CharPredicates.isAlphaNumeric()));
756     */

757     return regex("[a-zA-Z_][0-9a-zA-Z_]*");
758   }
759   /**
760    * pattern for an integer. ([0-9]+)
761    * @return the Pattern object.
762    */

763   public static Pattern isInteger(){
764     return many1(CharPredicates.isDigit());
765   }
766   /**
767    * pattern for a octal integer that starts with a 0 and followed by 0 or more [0-7] characters.
768    * @return the Pattern object.
769    */

770   public static Pattern isOctInteger(){
771     return seq(isChar('0'), many(CharPredicates.range('0','7')));
772   }
773   /**
774    * pattern for a decimal integer.
775    * It starts with a non-zero digit and followed by 0 or more digits.
776    * @return the Pattern object.
777    */

778   public static Pattern isDecInteger(){
779     return seq(range('1', '9'), many(CharPredicates.isDigit()));
780   }
781   /**
782    * pattern for a hex integer.
783    * It starts with a 0x or 0X, followed by 1 or more hex digits.
784    * @return the Pattern object.
785    */

786   public static Pattern isHexInteger(){
787     return seq(or(isString("0x"), isString("0X")),
788         many1(CharPredicates.isHexDigit()));
789   }
790   /**
791    * Recognizes a the exponent part of a scientific number notation.
792    * It can be e12, E-1, etc.
793    * @return the Pattern object.
794    */

795   public static Pattern isExponential(){
796     return seq(
797         among(new char[]{'e','E'}),
798         optional(isChar('-')),
799         isInteger()
800     );
801   }
802   /**
803    * Adapt a regular expression pattern to a jfun.parsec.pattern.Pattern;
804    * @param p the regular expression pattern.
805    * @return the jfun.parsec.pattern.Pattern object.
806    */

807   public static Pattern regex(final java.util.regex.Pattern JavaDoc p){
808     return new Pattern(){
809       public int match(final CharSequence JavaDoc src, final int len,
810           final int from){
811         if(from > len) return Pattern.MISMATCH;
812         final Matcher JavaDoc matcher = p.matcher(src.subSequence(from, len));
813         if(matcher.lookingAt()){
814           return matcher.end();
815         }
816         else return Pattern.MISMATCH;
817       }
818     };
819   }
820   /**
821    * Adapt a regular expression pattern string to a jfun.parsec.pattern.Pattern;
822    * @param s the regular expression pattern string.
823    * @return the jfun.parsec.pattern.Pattern object.
824    */

825   public static Pattern regex(String JavaDoc s){
826     return regex(java.util.regex.Pattern.compile(s));
827   }
828   
829   /**
830    * Get the Pattern object that matches any regular expression pattern
831    * string in the form of /some pattern here/.
832    * '\' is used as escape character.
833    */

834   public static Pattern regex_pattern(){
835     return regex_pattern;
836   }
837   /**
838    * Get the pattern that matches regular expression modifiers.
839    * Basically this is a list of alpha characters.
840    */

841   public static Pattern regex_modifiers(){
842     return regex_modifiers;
843   }
844   private static final Pattern _never = new Pattern(){
845     public int match(final CharSequence JavaDoc src, final int len,
846         final int from){
847       return Pattern.MISMATCH;
848     }
849   };
850   private static final Pattern _always = new Pattern(){
851     public int match(final CharSequence JavaDoc src, final int len,
852         final int from){
853       return 0;
854     }
855   };
856   
857   private static int match_repeat(final int n, final CharPredicate cp,
858       final CharSequence JavaDoc src, final int len, final int from, final int acc){
859     final int tail = from + n;
860     if(tail > len) return Pattern.MISMATCH;
861     for(int i=from;i<tail;i++){
862       if(!cp.isChar(src.charAt(i))) return Pattern.MISMATCH;
863     }
864     return n+acc;
865   }
866   private static int match_repeat(final int n, final Pattern pp,
867       final CharSequence JavaDoc src, final int len, final int from, final int acc){
868     int end = from;
869     for(int i=0;i<n;i++){
870       final int l = pp.match(src,len,end);
871       if(l==Pattern.MISMATCH) return Pattern.MISMATCH;
872       end += l;
873     }
874     return end-from+acc;
875   }
876   private static int match_some(final int max, final CharPredicate cp,
877       final CharSequence JavaDoc src, final int len, final int from, final int acc){
878     final int k = min(max+from, len);
879     for(int i=from;i<k;i++){
880       if(!cp.isChar(src.charAt(i))) return i-from+acc;
881     }
882     return k-from+acc;
883   }
884   private static int match_some(final int max, final Pattern pp,
885       final CharSequence JavaDoc src, final int len, final int from, final int acc){
886     int begin = from;
887     for(int i=0;i<max;i++){
888       final int l = pp.match(src, len, begin);
889       if(Pattern.MISMATCH==l) return begin-from+acc;
890       begin+=l;
891     }
892     return begin-from+acc;
893   }
894   private static int match_many(final CharPredicate cp,
895       final CharSequence JavaDoc src, final int len, final int from, final int acc){
896     for(int i=from;i<len;i++){
897       if(!cp.isChar(src.charAt(i))) return i-from+acc;
898     }
899     return len-from+acc;
900   }
901   private static int match_many(final Pattern pp,
902       final CharSequence JavaDoc src, final int len, final int from, final int acc){
903     for(int i=from;;){
904       final int l = pp.match(src,len,i);
905       if(Pattern.MISMATCH==l) return i-from+acc;
906       //we simply stop the loop when infinity is found. this may make the parser more user-friendly.
907
if(l==0) return i-from+acc;//throw new IllegalStateException("infinite loop");
908
i += l;
909     }
910   }
911   private static Pattern _or(final Pattern[] pps){
912     return new Pattern(){
913       public int match(final CharSequence JavaDoc src, final int len,
914           final int from){
915         for(int i=0;i<pps.length;i++){
916           final int l = pps[i].match(src,len,from);
917           if(l!=Pattern.MISMATCH) return l;
918         }
919         return Pattern.MISMATCH;
920       }
921     };
922   }
923   private static Pattern _seq(final Pattern[] pps){
924     return new Pattern(){
925       public int match(final CharSequence JavaDoc src, final int len,
926           final int from){
927         int end = from;
928         for(int i=0;i<pps.length;i++){
929           final int l = pps[i].match(src,len,end);
930           if(l==Pattern.MISMATCH) return l;
931           end += l;
932         }
933         return end-from;
934       }
935     };
936   }
937   private static Pattern _and(final Pattern[] pps){
938     return new Pattern(){
939       public int match(final CharSequence JavaDoc src, final int len,
940           final int from){
941         int ret = 0;
942         for(int i=0;i<pps.length;i++){
943           final int l = pps[i].match(src,len,from);
944           if(l==MISMATCH) return MISMATCH;
945           if(l>ret) ret=l;
946         }
947         return ret;
948       }
949     };
950   }
951   
952   private static final jfun.parsec.pattern.Pattern getRegularExpressionPattern(){
953     final jfun.parsec.pattern.Pattern quote = jfun.parsec.pattern.Patterns.isChar('/');
954     final jfun.parsec.pattern.Pattern escape = jfun.parsec.pattern.Patterns.isChar('\\')
955       .seq(jfun.parsec.pattern.Patterns.hasAtLeast(1));
956     final char[] not_allowed = {'/', '\n', '\r', '\\'};
957     final jfun.parsec.pattern.Pattern content = jfun.parsec.pattern.Patterns.or(
958         escape, jfun.parsec.pattern.Patterns.notAmong(not_allowed)
959     );
960     return quote.seq(content.many()).seq(quote);
961   }
962   private static final jfun.parsec.pattern.Pattern getModifiersPattern(){
963     return jfun.parsec.pattern.Patterns.isChar(CharPredicates.isAlpha()).many();
964   }
965   private static final Pattern regex_pattern = getRegularExpressionPattern();
966   private static final Pattern regex_modifiers = getModifiersPattern();
967 }
968
Popular Tags