KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > oro > text > regex > Perl5Matcher


1 package org.apache.oro.text.regex;
2
3 /* ====================================================================
4  * The Apache Software License, Version 1.1
5  *
6  * Copyright (c) 2000 The Apache Software Foundation. All rights
7  * reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  *
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in
18  * the documentation and/or other materials provided with the
19  * distribution.
20  *
21  * 3. The end-user documentation included with the redistribution,
22  * if any, must include the following acknowledgment:
23  * "This product includes software developed by the
24  * Apache Software Foundation (http://www.apache.org/)."
25  * Alternately, this acknowledgment may appear in the software itself,
26  * if and wherever such third-party acknowledgments normally appear.
27  *
28  * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
29  * must not be used to endorse or promote products derived from this
30  * software without prior written permission. For written
31  * permission, please contact apache@apache.org.
32  *
33  * 5. Products derived from this software may not be called "Apache"
34  * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
35  * name, without prior written permission of the Apache Software Foundation.
36  *
37  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  * ====================================================================
50  *
51  * This software consists of voluntary contributions made by many
52  * individuals on behalf of the Apache Software Foundation. For more
53  * information on the Apache Software Foundation, please see
54  * <http://www.apache.org/>.
55  *
56  * Portions of this software are based upon software originally written
57  * by Daniel F. Savarese. We appreciate his contributions.
58  */

59
60 import java.io.IOException JavaDoc;
61 import java.util.*;
62
63 /**
64  * The Perl5Matcher class is used to match regular expressions
65  * (conforming to the Perl5 regular expression syntax) generated by
66  * Perl5Compiler.
67
68  @author <a HREF="dfs@savarese.org">Daniel F. Savarese</a>
69  @version $Id: Perl5Matcher.java,v 1.1.1.1 2000/07/23 23:08:53 jon Exp $
70
71  * @see PatternMatcher
72  * @see Perl5Compiler
73  */

74 public final class Perl5Matcher implements PatternMatcher {
75   private static final char __EOS = Character.MAX_VALUE;
76   private static final int __INITIAL_NUM_OFFSETS = 20;
77
78   private boolean __multiline = false, __lastSuccess = false;
79   private char __previousChar, __input[], __originalInput[];
80   private Perl5Repetition __currentRep;
81   private int __numParentheses, __bol, __eol, __currentOffset, __endOffset;
82
83   private char[] __program;
84   private int __expSize, __inputOffset, __lastParen;
85   private int[] __beginMatchOffsets, __endMatchOffsets;
86   private Stack __stack = new Stack();
87   private Perl5MatchResult __lastMatchResult = null;
88
89   private static boolean
90     __compare(char[] s1, int s1Offs, char[] s2, int s2Offs, int n)
91   {
92     int cnt;
93
94     for(cnt = 0; cnt < n; cnt++, s1Offs++, s2Offs++) {
95       if(s1Offs >= s1.length)
96     return false;
97       if(s2Offs >= s2.length)
98     return false;
99       if(s1[s1Offs] != s2[s2Offs])
100     return false;
101     }
102
103     return true;
104   }
105
106   private static int __findFirst(char[] input, int current, int endOffset,
107                  char[] mustString)
108   {
109     int count, saveCurrent;
110     char ch;
111
112
113     if(input.length == 0)
114       return endOffset;
115
116     ch = mustString[0];
117     // Find the offset of the first character of the must string
118
while(current < endOffset) {
119       if(ch == input[current]){
120     saveCurrent = current;
121     count = 0;
122
123     while(current < endOffset && count < mustString.length) {
124       if(mustString[count] != input[current])
125         break;
126       ++count;
127       ++current;
128     }
129
130     current = saveCurrent;
131
132     if(count >= mustString.length)
133       break;
134       }
135       ++current;
136     }
137
138     return current;
139   }
140
141
142   private void __pushState(int parenFloor) {
143     int[] state;
144     int stateEntries, paren;
145
146     stateEntries = 3*(__expSize - parenFloor);
147     if(stateEntries <= 0)
148       state = new int[3];
149     else
150       state = new int[stateEntries + 3];
151
152     state[0] = __expSize;
153     state[1] = __lastParen;
154     state[2] = __inputOffset;
155
156     for(paren = __expSize; paren > parenFloor; paren-=3, stateEntries-=3) {
157       state[stateEntries] = __endMatchOffsets[paren];
158       state[stateEntries + 1] = __beginMatchOffsets[paren];
159       state[stateEntries + 2] = paren;
160     }
161
162     __stack.push(state);
163   }
164
165
166   private void __popState() {
167     int[] state;
168     int entry, paren;
169
170     state = (int[])__stack.pop();
171
172     __expSize = state[0];
173     __lastParen = state[1];
174     __inputOffset = state[2];
175
176     for(entry = 3; entry < state.length; entry+=3) {
177       paren = state[entry + 2];
178       __beginMatchOffsets[paren] = state[entry + 1];
179
180       if(paren <= __lastParen)
181     __endMatchOffsets[paren] = state[entry];
182     }
183
184     for(paren = __lastParen + 1; paren <= __numParentheses; paren++) {
185       if(paren > __expSize)
186     __beginMatchOffsets[paren] = OpCode._NULL_OFFSET;
187       __endMatchOffsets[paren] = OpCode._NULL_OFFSET;
188     }
189   }
190
191
192   // Initialize globals needed before calling __tryExpression for first time
193
private void __initInterpreterGlobals(Perl5Pattern expression, char[] input,
194                     int beginOffset, int endOffset) {
195     __input = input;
196     __endOffset = endOffset;
197     __currentRep = new Perl5Repetition();
198     __currentRep._numInstances = 0;
199     __currentRep._lastRepetition = null;
200     __program = expression._program;
201     __stack.setSize(0);
202
203     if(beginOffset == 0)
204       __previousChar = '\n';
205     else {
206       __previousChar = input[beginOffset - 1];
207       if(!__multiline && __previousChar == '\n')
208     __previousChar = '\0';
209     }
210
211     __numParentheses = expression._numParentheses;
212     __currentOffset = beginOffset;
213
214     __bol = beginOffset;
215     __eol = endOffset;
216
217     // Ok, here we're using endOffset as a temporary variable.
218
endOffset = __numParentheses + 1;
219     if(__beginMatchOffsets == null || endOffset > __beginMatchOffsets.length) {
220       if(endOffset < __INITIAL_NUM_OFFSETS)
221     endOffset = __INITIAL_NUM_OFFSETS;
222       __beginMatchOffsets = new int[endOffset];
223       __endMatchOffsets = new int[endOffset];
224     }
225   }
226
227   // Set the match result information. Only call this if we successfully
228
// matched.
229
private void __setLastMatchResult() {
230     int offs;
231
232     //endOffset+=dontTry;
233

234     __lastMatchResult = new Perl5MatchResult(__numParentheses + 1);
235
236     // This can happen when using Perl5StreamInput
237
if(__endMatchOffsets[0] > __originalInput.length)
238       throw new ArrayIndexOutOfBoundsException JavaDoc();
239
240     __lastMatchResult._match =
241       new String JavaDoc(__originalInput, __beginMatchOffsets[0],
242          __endMatchOffsets[0] - __beginMatchOffsets[0]);
243
244     __lastMatchResult._matchBeginOffset = __beginMatchOffsets[0];
245
246     while(__numParentheses >= 0) {
247       offs = __beginMatchOffsets[__numParentheses];
248
249       if(offs >= 0)
250     __lastMatchResult._beginGroupOffset[__numParentheses]
251       = offs - __lastMatchResult._matchBeginOffset;
252       else
253     __lastMatchResult._beginGroupOffset[__numParentheses] =
254       OpCode._NULL_OFFSET;
255
256       offs = __endMatchOffsets[__numParentheses];
257
258       if(offs >= 0)
259     __lastMatchResult._endGroupOffset[__numParentheses]
260       = offs - __lastMatchResult._matchBeginOffset;
261       else
262     __lastMatchResult._endGroupOffset[__numParentheses] =
263       OpCode._NULL_OFFSET;
264
265       --__numParentheses;
266     }
267
268     // Free up for garbage collection
269
__originalInput = null;
270   }
271
272
273
274   // Expects to receive a valid regular expression program. No checking
275
// is done to ensure validity.
276
// __originalInput must be set before calling this method for
277
// __lastMatchResult to be set correctly.
278
private boolean __interpret(Perl5Pattern expression, char[] input,
279                   int beginOffset, int endOffset)
280   {
281     boolean success;
282     int minLength = 0, dontTry = 0, offset;
283     char ch, mustString[];
284
285     __initInterpreterGlobals(expression, input, beginOffset, endOffset);
286
287     success = false;
288     mustString = expression._mustString;
289
290   _mainLoop:
291     while(true) {
292
293       if(mustString != null &&
294      ((expression._anchor & Perl5Pattern._OPT_ANCH) == 0 ||
295       (__multiline && expression._back >= 0))) {
296
297     __currentOffset =
298       __findFirst(__input, __currentOffset, endOffset, mustString);
299     
300     if(__currentOffset >= endOffset) {
301       if((expression._options & Perl5Compiler.READ_ONLY_MASK) == 0)
302         expression._mustUtility++;
303       success = false;
304       break _mainLoop;
305     } else if(expression._back >= 0) {
306       __currentOffset-=expression._back;
307       if(__currentOffset < beginOffset)
308         __currentOffset = beginOffset;
309       minLength = expression._back + mustString.length;
310     } else if(!expression._isExpensive &&
311           (expression._options & Perl5Compiler.READ_ONLY_MASK) == 0 &&
312           (--expression._mustUtility < 0)) {
313       // Be careful! The preceding logical expression is constructed
314
// so that mustUtility is only decremented if the expression is
315
// compiled without READ_ONLY_MASK.
316
mustString = expression._mustString = null;
317       __currentOffset = beginOffset;
318     } else {
319       __currentOffset = beginOffset;
320       minLength = mustString.length;
321     }
322       }
323
324       if((expression._anchor & Perl5Pattern._OPT_ANCH) != 0) {
325     if(__tryExpression(expression, beginOffset)) {
326       success = true;
327       break _mainLoop;
328     } else if(__multiline ||
329           (expression._anchor & Perl5Pattern._OPT_IMPLICIT) != 0) {
330
331       if(minLength > 0)
332         dontTry = minLength - 1;
333       endOffset-=dontTry;
334
335       if(__currentOffset > beginOffset)
336         --__currentOffset;
337
338       while(__currentOffset < endOffset) {
339         if(__input[__currentOffset++] == '\n') {
340           if(__currentOffset < endOffset &&
341          __tryExpression(expression, __currentOffset)) {
342         success = true;
343         break _mainLoop;
344           }
345         }
346       }
347     }
348
349     break _mainLoop;
350       }
351
352
353       if(expression._startString != null) {
354     mustString = expression._startString;
355     if((expression._anchor & Perl5Pattern._OPT_SKIP) != 0) {
356       ch = mustString[0];
357
358       while(__currentOffset < endOffset) {
359         if(ch == __input[__currentOffset]) {
360           if(__tryExpression(expression, __currentOffset)){
361         success = true;
362         break _mainLoop;
363           }
364           ++__currentOffset;
365           while(__currentOffset < endOffset &&
366             __input[__currentOffset] == ch)
367             ++__currentOffset;
368         }
369         ++__currentOffset;
370       }
371     } else {
372
373       while((__currentOffset =
374          __findFirst(__input, __currentOffset, endOffset, mustString))
375         < endOffset){
376         if(__tryExpression(expression, __currentOffset)) {
377           success = true;
378           break _mainLoop;
379         }
380         ++__currentOffset;
381       }
382     }
383
384     break _mainLoop;
385       }
386
387       if((offset = expression._startClassOffset) != OpCode._NULL_OFFSET) {
388     boolean doEvery, tmp;
389
390     doEvery = ((expression._anchor & Perl5Pattern._OPT_SKIP) == 0);
391
392     if(minLength > 0)
393       dontTry = minLength - 1;
394     endOffset -= dontTry;
395     tmp = true;
396
397     switch(__program[offset]) {
398     case OpCode._ANYOF:
399       offset = OpCode._getOperand(offset);
400       while(__currentOffset < endOffset) {
401         ch = __input[__currentOffset];
402
403         if(ch < 256 &&
404            (__program[offset + (ch >> 4)] & (1 << (ch & 0xf))) == 0) {
405           if(tmp && __tryExpression(expression, __currentOffset)) {
406         success = true;
407         break _mainLoop;
408           } else
409         tmp = doEvery;
410         } else
411           tmp = true;
412         ++__currentOffset;
413       }
414
415       break;
416
417     case OpCode._BOUND:
418       if(minLength > 0) {
419         ++dontTry;
420         --endOffset;
421       }
422
423       if(__currentOffset != beginOffset) {
424         ch = __input[__currentOffset - 1];
425         tmp = OpCode._isWordCharacter(ch);
426       } else
427         tmp = OpCode._isWordCharacter(__previousChar);
428
429       while(__currentOffset < endOffset) {
430         ch = __input[__currentOffset];
431         if(tmp != OpCode._isWordCharacter(ch)){
432           tmp = !tmp;
433           if(__tryExpression(expression, __currentOffset)) {
434         success = true;
435         break _mainLoop;
436           }
437         }
438         ++__currentOffset;
439       }
440
441       if((minLength > 0 || tmp) &&
442          __tryExpression(expression, __currentOffset)) {
443         success = true;
444         break _mainLoop;
445       }
446       break;
447
448     case OpCode._NBOUND:
449       if(minLength > 0) {
450         ++dontTry;
451         --endOffset;
452       }
453
454       if(__currentOffset != beginOffset) {
455         ch = __input[__currentOffset - 1];
456         tmp = OpCode._isWordCharacter(ch);
457       } else
458         tmp = OpCode._isWordCharacter(__previousChar);
459
460       while(__currentOffset < endOffset) {
461         ch = __input[__currentOffset];
462         if(tmp != OpCode._isWordCharacter(ch))
463           tmp = !tmp;
464         else if(__tryExpression(expression, __currentOffset)) {
465           success = true;
466           break _mainLoop;
467         }
468
469         ++__currentOffset;
470       }
471
472       if((minLength > 0 || !tmp) &&
473          __tryExpression(expression, __currentOffset)) {
474         success = true;
475         break _mainLoop;
476       }
477       break;
478
479     case OpCode._ALNUM:
480       while(__currentOffset < endOffset) {
481         ch = __input[__currentOffset];
482         if(OpCode._isWordCharacter(ch)) {
483           if(tmp && __tryExpression(expression, __currentOffset)) {
484         success = true;
485         break _mainLoop;
486           } else
487         tmp = doEvery;
488         } else
489           tmp = true;
490         ++__currentOffset;
491       }
492       break;
493
494     case OpCode._NALNUM:
495       while(__currentOffset < endOffset) {
496         ch = __input[__currentOffset];
497         if(!OpCode._isWordCharacter(ch)) {
498           if(tmp && __tryExpression(expression, __currentOffset)) {
499         success = true;
500         break _mainLoop;
501           } else
502         tmp = doEvery;
503         } else
504           tmp = true;
505         ++__currentOffset;
506       }
507       break;
508
509     case OpCode._SPACE:
510       while(__currentOffset < endOffset) {
511         if(Character.isWhitespace(__input[__currentOffset])) {
512           if(tmp && __tryExpression(expression, __currentOffset)) {
513         success = true;
514         break _mainLoop;
515           } else
516         tmp = doEvery;
517         } else
518           tmp = true;
519         ++__currentOffset;
520       }
521       break;
522
523     case OpCode._NSPACE:
524       while(__currentOffset < endOffset) {
525         if(!Character.isWhitespace(__input[__currentOffset])) {
526           if(tmp && __tryExpression(expression, __currentOffset)) {
527         success = true;
528         break _mainLoop;
529           } else
530         tmp = doEvery;
531         } else
532           tmp = true;
533         ++__currentOffset;
534       }
535       break;
536
537     case OpCode._DIGIT:
538       while(__currentOffset < endOffset) {
539         if(Character.isDigit(__input[__currentOffset])) {
540           if(tmp && __tryExpression(expression, __currentOffset)) {
541         success = true;
542         break _mainLoop;
543           } else
544         tmp = doEvery;
545         } else
546           tmp = true;
547         ++__currentOffset;
548       }
549       break;
550
551
552     case OpCode._NDIGIT:
553       while(__currentOffset < endOffset) {
554         if(!Character.isDigit(__input[__currentOffset])) {
555           if(tmp && __tryExpression(expression, __currentOffset)) {
556         success = true;
557         break _mainLoop;
558           } else
559         tmp = doEvery;
560         } else
561           tmp = true;
562         ++__currentOffset;
563       }
564       break;
565     } // end switch
566

567       } else {
568     if(minLength > 0)
569       dontTry = minLength - 1;
570     endOffset-=dontTry;
571
572     do {
573       if(__tryExpression(expression, __currentOffset)) {
574         success = true;
575         break _mainLoop;
576       }
577     } while(__currentOffset++ < endOffset);
578
579       }
580
581
582       break _mainLoop;
583     } // end while
584

585     __lastSuccess = success;
586     __lastMatchResult = null;
587
588     return success;
589   }
590
591
592   private boolean __tryExpression(Perl5Pattern expression, int offset) {
593     int count;
594
595     __inputOffset = offset;
596     __lastParen = 0;
597     __expSize = 0;
598
599     if(__numParentheses > 0) {
600       for(count=0; count <= __numParentheses; count++) {
601     __beginMatchOffsets[count] = OpCode._NULL_OFFSET;
602     __endMatchOffsets[count] = OpCode._NULL_OFFSET;
603       }
604     }
605
606     if(__match(1)){
607       __beginMatchOffsets[0] = offset;
608       __endMatchOffsets[0] = __inputOffset;
609       return true;
610     }
611
612     return false;
613   }
614     
615
616   private int __repeat(int offset, int max) {
617     int scan, eol, operand, ret;
618     char ch;
619
620     scan = __inputOffset;
621     eol = __eol;
622
623     if(max != Character.MAX_VALUE && max < eol - scan)
624       eol = scan + max;
625
626     operand = OpCode._getOperand(offset);
627
628     switch(__program[offset]) {
629
630     case OpCode._ANY:
631       while(scan < eol && __input[scan] != '\n')
632     ++scan;
633       break;
634
635     case OpCode._SANY:
636       scan = eol;
637       break;
638
639     case OpCode._EXACTLY:
640       ++operand;
641       while(scan < eol && __program[operand] == __input[scan])
642     ++scan;
643       break;
644
645     case OpCode._ANYOF:
646       if(scan < eol && (ch = __input[scan]) < 256) {
647     while((__program[operand + (ch >> 4)] & (1 << (ch & 0xf))) == 0) {
648       if(++scan < eol)
649         ch = __input[scan];
650       else
651         break;
652     }
653       }
654       break;
655
656     case OpCode._ALNUM:
657       while(scan < eol && OpCode._isWordCharacter(__input[scan]))
658     ++scan;
659       break;
660
661     case OpCode._NALNUM:
662       while(scan < eol && !OpCode._isWordCharacter(__input[scan]))
663     ++scan;
664       break;
665
666     case OpCode._SPACE:
667       while(scan < eol && Character.isWhitespace(__input[scan]))
668     ++scan;
669       break;
670
671     case OpCode._NSPACE:
672       while(scan < eol && !Character.isWhitespace(__input[scan]))
673     ++scan;
674       break;
675
676     case OpCode._DIGIT:
677       while(scan < eol && Character.isDigit(__input[scan]))
678     ++scan;
679       break;
680
681     case OpCode._NDIGIT:
682       while(scan < eol && !Character.isDigit(__input[scan]))
683     ++scan;
684       break;
685
686     default:
687       break;
688
689     }
690
691     ret = scan - __inputOffset;
692     __inputOffset = scan;
693
694     return ret;
695   }
696
697
698   private boolean __match(int offset) {
699     char nextChar, op;
700     int scan, next, input, maxScan, current, line, arg;
701     boolean inputRemains = true, minMod = false;
702     Perl5Repetition rep;
703
704
705     input = __inputOffset;
706     inputRemains = (input < __endOffset);
707     nextChar = (inputRemains ? __input[input] : __EOS);
708
709     scan = offset;
710     maxScan = __program.length;
711
712     while(scan < maxScan /*&& scan > 0*/){
713       next = OpCode._getNext(__program, scan);
714
715       switch(op = __program[scan]) {
716
717       case OpCode._BOL:
718     if(input == __bol ? __previousChar == '\n' :
719        (__multiline && (inputRemains || input < __eol) &&
720         __input[input - 1] == '\n'))
721       break;
722     return false;
723
724       case OpCode._MBOL:
725     if(input == __bol ? __previousChar == '\n' :
726        ((inputRemains || input < __eol) && __input[input - 1] == '\n'))
727       break;
728     return false;
729
730       case OpCode._SBOL:
731     if(input == __bol && __previousChar == '\n')
732       break;
733     return false;
734
735       case OpCode._GBOL:
736     if(input == __bol)
737       break;
738     return true;
739
740       case OpCode._EOL :
741     if((inputRemains || input < __eol) && nextChar != '\n')
742       return false;
743     if(!__multiline && __eol - input > 1)
744       return false;
745     break;
746
747       case OpCode._MEOL:
748     if((inputRemains || input < __eol) && nextChar != '\n')
749       return false;
750     break;
751
752       case OpCode._SEOL:
753     if((inputRemains || input < __eol) && nextChar != '\n')
754       return false;
755     if(__eol - input > 1)
756       return false;
757     break;
758
759       case OpCode._SANY:
760     if(!inputRemains && input >= __eol)
761       return false;
762     inputRemains = (++input < __endOffset);
763     nextChar = (inputRemains ? __input[input] : __EOS);
764     break;
765
766       case OpCode._ANY:
767     if((!inputRemains && input >= __eol) || nextChar == '\n')
768       return false;
769     inputRemains = (++input < __endOffset);
770     nextChar = (inputRemains ? __input[input] : __EOS);
771     break;
772
773       case OpCode._EXACTLY:
774     current = OpCode._getOperand(scan);
775     line = __program[current++];
776
777     if(__program[current] != nextChar)
778       return false;
779     if(__eol - input < line)
780       return false;
781
782     if(line > 1 && !__compare(__program, current, __input, input, line))
783       return false;
784
785     input+=line;
786     inputRemains = (input < __endOffset);
787     nextChar = (inputRemains ? __input[input] : __EOS);
788     break;
789
790       case OpCode._ANYOF:
791     current = OpCode._getOperand(scan);
792
793     if(nextChar == __EOS && inputRemains)
794       nextChar = __input[input];
795
796     if(nextChar >= 256 || (__program[current + (nextChar >> 4)] &
797         (1 << (nextChar & 0xf))) != 0)
798       return false;
799
800     if(!inputRemains && input >= __eol)
801       return false;
802
803     inputRemains = (++input < __endOffset);
804     nextChar = (inputRemains ? __input[input] : __EOS);
805     break;
806
807       case OpCode._ALNUM:
808     if(!inputRemains)
809       return false;
810     if(!OpCode._isWordCharacter(nextChar))
811       return false;
812     inputRemains = (++input < __endOffset);
813     nextChar = (inputRemains ? __input[input] : __EOS);
814     break;
815
816       case OpCode._NALNUM:
817     if(!inputRemains && input >= __eol)
818       return false;
819     if(OpCode._isWordCharacter(nextChar))
820       return false;
821     inputRemains = (++input < __endOffset);
822     nextChar = (inputRemains ? __input[input] : __EOS);
823     break;
824
825
826       case OpCode._NBOUND:
827       case OpCode._BOUND:
828     boolean a, b;
829
830     if(input == __bol)
831       a = OpCode._isWordCharacter(__previousChar);
832     else
833       a = OpCode._isWordCharacter(__input[input - 1]);
834
835     b = OpCode._isWordCharacter(nextChar);
836
837     if((a == b) == (__program[scan] == OpCode._BOUND))
838       return false;
839     break;
840
841       case OpCode._SPACE:
842     if(!inputRemains && input >= __eol)
843       return false;
844     if(!Character.isWhitespace(nextChar))
845       return false;
846     inputRemains = (++input < __endOffset);
847     nextChar = (inputRemains ? __input[input] : __EOS);
848     break;
849
850
851       case OpCode._NSPACE:
852     if(!inputRemains)
853       return false;
854     if(Character.isWhitespace(nextChar))
855       return false;
856     inputRemains = (++input < __endOffset);
857     nextChar = (inputRemains ? __input[input] : __EOS);
858     break;
859
860       case OpCode._DIGIT:
861     if(!Character.isDigit(nextChar))
862       return false;
863     inputRemains = (++input < __endOffset);
864     nextChar = (inputRemains ? __input[input] : __EOS);
865     break;
866
867       case OpCode._NDIGIT:
868     if(!inputRemains && input >= __eol)
869       return false;
870     if(Character.isDigit(nextChar))
871       return false;
872     inputRemains = (++input < __endOffset);
873     nextChar = (inputRemains ? __input[input] : __EOS);
874     break;
875
876       case OpCode._REF:
877     arg = OpCode._getArg1(__program, scan);
878     current = __beginMatchOffsets[arg];
879
880     if(current == OpCode._NULL_OFFSET)
881       return false;
882
883     if(__endMatchOffsets[arg] == OpCode._NULL_OFFSET)
884       return false;
885
886     if(current == __endMatchOffsets[arg])
887       break;
888
889     if(__input[current] != nextChar)
890       return false;
891
892     line = __endMatchOffsets[arg] - current;
893
894     if(input + line > __eol)
895       return false;
896
897     if(line > 1 && !__compare(__input, current, __input, input, line))
898       return false;
899
900     input+=line;
901     inputRemains = (input < __endOffset);
902     nextChar = (inputRemains ? __input[input] : __EOS);
903     break;
904
905       case OpCode._NOTHING:
906     break;
907
908       case OpCode._BACK:
909     break;
910
911       case OpCode._OPEN:
912     arg = OpCode._getArg1(__program, scan);
913     __beginMatchOffsets[arg] = input;
914
915     if(arg > __expSize)
916       __expSize = arg;
917     break;
918
919       case OpCode._CLOSE:
920     arg = OpCode._getArg1(__program, scan);
921     __endMatchOffsets[arg] = input;
922
923     if(arg > __lastParen)
924       __lastParen = arg;
925     break;
926
927       case OpCode._CURLYX:
928     rep = new Perl5Repetition();
929     rep._lastRepetition = __currentRep;
930     __currentRep = rep;
931
932     rep._parenFloor = __lastParen;
933     rep._numInstances = -1;
934     rep._min = OpCode._getArg1(__program, scan);
935     rep._max = OpCode._getArg2(__program, scan);
936     rep._scan = OpCode._getNextOperator(scan) + 2;
937     rep._next = next;
938     rep._minMod = minMod;
939     // Must initialize to -1 because if we initialize to 0 and are
940
// at the beginning of the input the OpCode._WHILEM case will
941
// not work right.
942
rep._lastLocation = -1;
943     __inputOffset = input;
944
945     // use minMod as temporary
946
minMod = __match(OpCode._getPrevOperator(next));
947
948     // leave scope call not pertinent?
949
__currentRep = rep._lastRepetition;
950     return minMod;
951
952       case OpCode._WHILEM:
953     rep = __currentRep;
954
955     arg = rep._numInstances + 1;
956     __inputOffset = input;
957
958     if(input == rep._lastLocation) {
959       __currentRep = rep._lastRepetition;
960       line = __currentRep._numInstances;
961       if(__match(rep._next))
962         return true;
963       __currentRep._numInstances = line;
964       __currentRep = rep;
965       return false;
966     }
967
968     if(arg < rep._min) {
969       rep._numInstances = arg;
970       rep._lastLocation = input;
971       if(__match(rep._scan))
972         return true;
973       rep._numInstances = arg - 1;
974       return false;
975     }
976
977     if(rep._minMod) {
978       __currentRep = rep._lastRepetition;
979       line = __currentRep._numInstances;
980       if(__match(rep._next))
981         return true;
982       __currentRep._numInstances = line;
983       __currentRep = rep;
984
985       if(arg >= rep._max)
986         return false;
987
988       __inputOffset = input;
989       rep._numInstances = arg;
990       rep._lastLocation = input;
991
992       if(__match(rep._scan))
993         return true;
994
995       rep._numInstances = arg - 1;
996       return false;
997     }
998
999     if(arg < rep._max) {
1000      __pushState(rep._parenFloor);
1001      rep._numInstances = arg;
1002      rep._lastLocation = input;
1003      if(__match(rep._scan))
1004        return true;
1005      __popState();
1006      __inputOffset = input;
1007    }
1008
1009    __currentRep = rep._lastRepetition;
1010    line = __currentRep._numInstances;
1011    if(__match(rep._next))
1012      return true;
1013
1014    rep._numInstances = line;
1015    __currentRep = rep;
1016    rep._numInstances = arg - 1;
1017    return false;
1018
1019      case OpCode._BRANCH:
1020    if(__program[next] != OpCode._BRANCH)
1021      next = OpCode._getNextOperator(scan);
1022    else {
1023      int lastParen;
1024
1025      lastParen = __lastParen;
1026
1027      do {
1028
1029        __inputOffset = input;
1030
1031        if(__match(OpCode._getNextOperator(scan)))
1032          return true;
1033
1034        for(arg = __lastParen; arg > lastParen; --arg)
1035          //__endMatchOffsets[arg] = 0;
1036
__endMatchOffsets[arg] = OpCode._NULL_OFFSET;
1037        __lastParen = arg;
1038
1039        scan = OpCode._getNext(__program, scan);
1040      } while(scan != OpCode._NULL_OFFSET &&
1041          __program[scan] == OpCode._BRANCH);
1042      return false;
1043    }
1044
1045    break;
1046
1047      case OpCode._MINMOD:
1048    minMod = true;
1049    break;
1050
1051
1052      case OpCode._CURLY:
1053      case OpCode._STAR:
1054      case OpCode._PLUS:
1055    if(op == OpCode._CURLY) {
1056      line = OpCode._getArg1(__program, scan);
1057      arg = OpCode._getArg2(__program, scan);
1058      scan = OpCode._getNextOperator(scan) + 2;
1059    } else if(op == OpCode._STAR) {
1060      line = 0;
1061      arg = Character.MAX_VALUE;
1062      scan = OpCode._getNextOperator(scan);
1063    } else {
1064      line = 1;
1065      arg = Character.MAX_VALUE;
1066      scan = OpCode._getNextOperator(scan);
1067    }
1068
1069    if(__program[next] == OpCode._EXACTLY) {
1070      nextChar = __program[OpCode._getOperand(next) + 1];
1071      current = 0;
1072    } else {
1073      nextChar = __EOS;
1074      current = -1000;
1075    }
1076    __inputOffset = input;
1077
1078    if(minMod) {
1079      minMod = false;
1080
1081      if(line > 0 && __repeat(scan, line) < line)
1082        return false;
1083
1084
1085      while(arg >= line || (arg == Character.MAX_VALUE && line > 0)) {
1086        // there may be a bug here with respect to
1087
// __inputOffset >= __input.length, but it seems to be right for
1088
// now. the issue is with __inputOffset being reset later.
1089
// is this test really supposed to happen here?
1090
if(current == -1000 || __inputOffset >= __endOffset ||
1091           __input[__inputOffset] == nextChar) {
1092          if(__match(next))
1093        return true;
1094        }
1095
1096        __inputOffset = input + line;
1097
1098        if(__repeat(scan, 1) != 0) {
1099          ++line;
1100          __inputOffset = input + line;
1101        } else
1102          return false;
1103      }
1104
1105    } else {
1106      arg = __repeat(scan, arg);
1107
1108      if(line < arg && OpCode._opType[__program[next]] == OpCode._EOL &&
1109         (!__multiline || __program[next] == OpCode._SEOL))
1110        line = arg;
1111
1112      while(arg >= line) {
1113        // there may be a bug here with respect to
1114
// __inputOffset >= __input.length, but it seems to be right for
1115
// now. the issue is with __inputOffset being reset later.
1116
// is this test really supposed to happen here?
1117
if(current == -1000 || __inputOffset >= __endOffset ||
1118           __input[__inputOffset] == nextChar) {
1119          if(__match(next))
1120        return true;
1121        }
1122
1123        --arg;
1124        __inputOffset = input + arg;
1125      }
1126    }
1127
1128    return false;
1129
1130      case OpCode._SUCCEED:
1131      case OpCode._END:
1132    __inputOffset = input;
1133    // This enforces the rule that two consecutive matches cannot have
1134
// the same end offset.
1135
if(__inputOffset == __lastMatchInputEndOffset)
1136      return false;
1137    return true;
1138
1139      case OpCode._IFMATCH:
1140    __inputOffset = input;
1141    scan = OpCode._getNextOperator(scan);
1142    if(!__match(scan))
1143      return false;
1144    break;
1145
1146      case OpCode._UNLESSM:
1147    __inputOffset = input;
1148    scan = OpCode._getNextOperator(scan);
1149    if(__match(scan))
1150      return false;
1151    break;
1152
1153
1154      default:
1155    // todo: Need to throw an exception here.
1156

1157      } // end switch
1158

1159      //scan = (next > 0 ? next : 0);
1160
scan = next;
1161    } // end while scan
1162

1163
1164
1165    return false;
1166  }
1167
1168
1169  /**
1170   * Set whether or not subsequent calls to {@link #matches matches()}
1171   * or {@link #contains contains()} should treat the input as
1172   * consisting of multiple lines. The default behavior is for
1173   * input to be treated as consisting of multiple lines. This method
1174   * should only be called if the Perl5Pattern used for a match was
1175   * compiled without either of the Perl5Compiler.MULTILINE_MASK or
1176   * Perl5Compiler.SINGLELINE_MASK flags, and you want to alter the
1177   * behavior of how the <b>^</b>, <b>$</b>, and <b>.</b> metacharacters are
1178   * interpreted on the fly. The compilation options used when compiling
1179   * a pattern ALWAYS override the behavior specified by setMultiline(). See
1180   * {@link Perl5Compiler} for more details.
1181   * <p>
1182   * @param multiline If set to true treats the input as consisting of
1183   * multiple lines with respect to the <b>^</b> and <b>$</b>
1184   * metacharacters. If set to false treats the input as consisting
1185   * of a single line with respect to the <b>^</b> and <b>$</b>
1186   * metacharacters.
1187   */

1188  public void setMultiline(boolean multiline) { __multiline = multiline; }
1189
1190
1191  /**
1192   * @return True if the matcher is treating input as consisting of multiple
1193   * lines with respect to the <b>^</b> and <b>$</b> metacharacters,
1194   * false otherwise.
1195   */

1196  public boolean isMultiline() { return __multiline; }
1197
1198  char[] _toLower(char[] input) {
1199    int current;
1200    char[] inp;
1201    // todo:
1202
// Certainly not the best way to do case insensitive matching.
1203
// Must definitely change this in some way, but for now we
1204
// do what Perl does and make a copy of the input, converting
1205
// it all to lowercase. This is truly better handled in the
1206
// compilation phase.
1207
inp = new char[input.length];
1208    System.arraycopy(input, 0, inp, 0, input.length);
1209    input = inp;
1210
1211    // todo: Need to inline toLowerCase()
1212
for(current = 0; current < input.length; current++)
1213      if(Character.isUpperCase(input[current]))
1214    input[current] = Character.toLowerCase(input[current]);
1215
1216    return input;
1217  }
1218
1219
1220  /**
1221   * Determines if a prefix of a string (represented as a char[])
1222   * matches a given pattern, starting from a given offset into the string.
1223   * If a prefix of the string matches the pattern, a MatchResult instance
1224   * representing the match is made accesible via
1225   * {@link #getMatch()}.
1226   * <p>
1227   * This method is useful for certain common token identification tasks
1228   * that are made more difficult without this functionality.
1229   * <p>
1230   * @param input The char[] to test for a prefix match.
1231   * @param pattern The Pattern to be matched.
1232   * @param offset The offset at which to start searching for the prefix.
1233   * @return True if input matches pattern, false otherwise.
1234   */

1235  public boolean matchesPrefix(char[] input, Pattern pattern, int offset) {
1236    Perl5Pattern expression;
1237
1238    expression = (Perl5Pattern)pattern;
1239    __originalInput = input;
1240    if(expression._isCaseInsensitive)
1241      input = _toLower(input);
1242
1243    __initInterpreterGlobals(expression, input, offset, input.length);
1244
1245    __lastSuccess = __tryExpression(expression, offset);
1246    __lastMatchResult = null;
1247
1248    return __lastSuccess;
1249  }
1250
1251
1252  /**
1253   * Determines if a prefix of a string (represented as a char[])
1254   * matches a given pattern.
1255   * If a prefix of the string matches the pattern, a MatchResult instance
1256   * representing the match is made accesible via
1257   * {@link #getMatch()}.
1258   * <p>
1259   * This method is useful for certain common token identification tasks
1260   * that are made more difficult without this functionality.
1261   * <p>
1262   * @param input The char[] to test for a prefix match.
1263   * @param pattern The Pattern to be matched.
1264   * @return True if input matches pattern, false otherwise.
1265   */

1266  public boolean matchesPrefix(char[] input, Pattern pattern) {
1267    return matchesPrefix(input, pattern, 0);
1268  }
1269
1270
1271  /**
1272   * Determines if a prefix of a string matches a given pattern.
1273   * If a prefix of the string matches the pattern, a MatchResult instance
1274   * representing the match is made accesible via
1275   * {@link #getMatch()}.
1276   * <p>
1277   * This method is useful for certain common token identification tasks
1278   * that are made more difficult without this functionality.
1279   * <p>
1280   * @param input The String to test for a prefix match.
1281   * @param pattern The Pattern to be matched.
1282   * @return True if input matches pattern, false otherwise.
1283   */

1284  public boolean matchesPrefix(String JavaDoc input, Pattern pattern) {
1285    return matchesPrefix(input.toCharArray(), pattern, 0);
1286  }
1287
1288  /**
1289   * Determines if a prefix of a PatternMatcherInput instance
1290   * matches a given pattern. If there is a match, a MatchResult instance
1291   * representing the match is made accesible via
1292   * {@link #getMatch()}. Unlike the
1293   * {@link #contains(PatternMatcherInput, Pattern)}
1294   * method, the current offset of the PatternMatcherInput argument
1295   * is not updated. However, unlike the
1296   * {@link #matches matches(PatternMatcherInput, Pattern)} method,
1297   * matchesPrefix() will start its search from the current offset
1298   * rather than the begin offset of the PatternMatcherInput.
1299   * <p>
1300   * This method is useful for certain common token identification tasks
1301   * that are made more difficult without this functionality.
1302   * <p>
1303   * @param input The PatternMatcherInput to test for a prefix match.
1304   * @param pattern The Pattern to be matched.
1305   * @return True if input matches pattern, false otherwise.
1306   */

1307  public boolean matchesPrefix(PatternMatcherInput input, Pattern pattern) {
1308    char[] inp;
1309    Perl5Pattern expression;
1310
1311    expression = (Perl5Pattern)pattern;
1312
1313    __originalInput = input._originalBuffer;
1314    if(expression._isCaseInsensitive) {
1315      if(input._toLowerBuffer == null)
1316    input._toLowerBuffer = _toLower(__originalInput);
1317      inp = input._toLowerBuffer;
1318    } else
1319      inp = __originalInput;
1320
1321    __initInterpreterGlobals(expression, inp, input._currentOffset,
1322                input._endOffset);
1323    __lastSuccess = __tryExpression(expression, input._currentOffset);
1324    __lastMatchResult = null;
1325
1326    return __lastSuccess;
1327  }
1328
1329
1330
1331  /**
1332   * Determines if a string (represented as a char[]) exactly
1333   * matches a given pattern. If
1334   * there is an exact match, a MatchResult instance
1335   * representing the match is made accesible via
1336   * {@link #getMatch()}. The pattern must be
1337   * a Perl5Pattern instance, otherwise a ClassCastException will
1338   * be thrown. You are not required to, and indeed should NOT try to
1339   * (for performance reasons), catch a ClassCastException because it
1340   * will never be thrown as long as you use a Perl5Pattern as the pattern
1341   * parameter.
1342   * <p>
1343   * <b>Note:</b> matches() is not the same as sticking a ^ in front of
1344   * your expression and a $ at the end of your expression in Perl5
1345   * and using the =~ operator, even though in many cases it will be
1346   * equivalent. matches() literally looks for an exact match according
1347   * to the rules of Perl5 expression matching. Therefore, if you have
1348   * a pattern <em>foo|foot</em> and are matching the input <em>foot</em>
1349   * it will not produce an exact match. But <em>foot|foo</em> will
1350   * produce an exact match for either <em>foot</em> or <em>foo</em>.
1351   * Remember, Perl5 regular expressions do not match the longest
1352   * possible match. From the perlre manpage:
1353   * <blockquote>
1354   * Alternatives are tried from left to right, so the first
1355   * alternative found for which the entire expression matches,
1356   * is the one that is chosen. This means that alternatives
1357   * are not necessarily greedy. For example: when matching
1358   * foo|foot against "barefoot", only the "foo" part will
1359   * match, as that is the first alternative tried, and it
1360   * successfully matches the target string.
1361   * </blockquote>
1362   * <p>
1363   * @param input The char[] to test for an exact match.
1364   * @param pattern The Perl5Pattern to be matched.
1365   * @return True if input matches pattern, false otherwise.
1366   * @exception ClassCastException If a Pattern instance other than a
1367   * Perl5Pattern is passed as the pattern parameter.
1368   */

1369  public boolean matches(char[] input, Pattern pattern) {
1370    Perl5Pattern expression;
1371
1372    expression = (Perl5Pattern)pattern;
1373    __originalInput = input;
1374    if(expression._isCaseInsensitive)
1375      input = _toLower(input);
1376    /*
1377    if(__interpret(expression, input, 0, input.length)) {
1378      if(__lastMatchResult.beginOffset(0) == 0 &&
1379     __lastMatchResult.endOffset(0) == input.length)
1380    return true;
1381    }
1382    */

1383    __initInterpreterGlobals(expression, input, 0, input.length);
1384    __lastSuccess = (__tryExpression(expression, 0) &&
1385             __endMatchOffsets[0] == input.length);
1386    __lastMatchResult = null;
1387
1388    return __lastSuccess;
1389  }
1390
1391
1392  /**
1393   * Determines if a string exactly matches a given pattern. If
1394   * there is an exact match, a MatchResult instance
1395   * representing the match is made accesible via
1396   * {@link #getMatch()}. The pattern must be
1397   * a Perl5Pattern instance, otherwise a ClassCastException will
1398   * be thrown. You are not required to, and indeed should NOT try to
1399   * (for performance reasons), catch a ClassCastException because it
1400   * will never be thrown as long as you use a Perl5Pattern as the pattern
1401   * parameter.
1402   * <p>
1403   * <b>Note:</b> matches() is not the same as sticking a ^ in front of
1404   * your expression and a $ at the end of your expression in Perl5
1405   * and using the =~ operator, even though in many cases it will be
1406   * equivalent. matches() literally looks for an exact match according
1407   * to the rules of Perl5 expression matching. Therefore, if you have
1408   * a pattern <em>foo|foot</em> and are matching the input <em>foot</em>
1409   * it will not produce an exact match. But <em>foot|foo</em> will
1410   * produce an exact match for either <em>foot</em> or <em>foo</em>.
1411   * Remember, Perl5 regular expressions do not match the longest
1412   * possible match. From the perlre manpage:
1413   * <blockquote>
1414   * Alternatives are tried from left to right, so the first
1415   * alternative found for which the entire expression matches,
1416   * is the one that is chosen. This means that alternatives
1417   * are not necessarily greedy. For example: when matching
1418   * foo|foot against "barefoot", only the "foo" part will
1419   * match, as that is the first alternative tried, and it
1420   * successfully matches the target string.
1421   * </blockquote>
1422   * <p>
1423   * @param input The String to test for an exact match.
1424   * @param pattern The Perl5Pattern to be matched.
1425   * @return True if input matches pattern, false otherwise.
1426   * @exception ClassCastException If a Pattern instance other than a
1427   * Perl5Pattern is passed as the pattern parameter.
1428   */

1429  public boolean matches(String JavaDoc input, Pattern pattern) {
1430    return matches(input.toCharArray(), pattern);
1431  }
1432
1433
1434  /**
1435   * Determines if the contents of a PatternMatcherInput instance
1436   * exactly matches a given pattern. If
1437   * there is an exact match, a MatchResult instance
1438   * representing the match is made accesible via
1439   * {@link #getMatch()}. Unlike the
1440   * {@link #contains(PatternMatcherInput, Pattern)}
1441   * method, the current offset of the PatternMatcherInput argument
1442   * is not updated. You should remember that the region between
1443   * the begin (NOT the current) and end offsets of the PatternMatcherInput
1444   * will be tested for an exact match.
1445   * <p>
1446   * The pattern must be a Perl5Pattern instance, otherwise a
1447   * ClassCastException will be thrown. You are not required to, and
1448   * indeed should NOT try to (for performance reasons), catch a
1449   * ClassCastException because it will never be thrown as long as you use
1450   * a Perl5Pattern as the pattern parameter.
1451   * <p>
1452   * <b>Note:</b> matches() is not the same as sticking a ^ in front of
1453   * your expression and a $ at the end of your expression in Perl5
1454   * and using the =~ operator, even though in many cases it will be
1455   * equivalent. matches() literally looks for an exact match according
1456   * to the rules of Perl5 expression matching. Therefore, if you have
1457   * a pattern <em>foo|foot</em> and are matching the input <em>foot</em>
1458   * it will not produce an exact match. But <em>foot|foo</em> will
1459   * produce an exact match for either <em>foot</em> or <em>foo</em>.
1460   * Remember, Perl5 regular expressions do not match the longest
1461   * possible match. From the perlre manpage:
1462   * <blockquote>
1463   * Alternatives are tried from left to right, so the first
1464   * alternative found for which the entire expression matches,
1465   * is the one that is chosen. This means that alternatives
1466   * are not necessarily greedy. For example: when matching
1467   * foo|foot against "barefoot", only the "foo" part will
1468   * match, as that is the first alternative tried, and it
1469   * successfully matches the target string.
1470   * </blockquote>
1471   * <p>
1472   * @param input The PatternMatcherInput to test for a match.
1473   * @param pattern The Perl5Pattern to be matched.
1474   * @return True if input matches pattern, false otherwise.
1475   * @exception ClassCastException If a Pattern instance other than a
1476   * Perl5Pattern is passed as the pattern parameter.
1477   */

1478  public boolean matches(PatternMatcherInput input, Pattern pattern) {
1479    char[] inp;
1480    Perl5Pattern expression;
1481
1482    expression = (Perl5Pattern)pattern;
1483
1484    __originalInput = input._originalBuffer;
1485    if(expression._isCaseInsensitive) {
1486      if(input._toLowerBuffer == null)
1487    input._toLowerBuffer = _toLower(__originalInput);
1488      inp = input._toLowerBuffer;
1489    } else
1490      inp = __originalInput;
1491
1492    /*
1493    if(__interpret(expression, inp, input._beginOffset,
1494           input._endOffset)) {
1495      // debug
1496
1497      //System.err.println("contains: " + getMatch());
1498      //System.err.println(__lastMatchResult.beginOffset(0) + "-" +
1499      //__lastMatchResult.endOffset(0));
1500      //System.err.println(input._beginOffset + "-" +
1501      //input._endOffset);
1502
1503      if(__lastMatchResult.beginOffset(0) == input._beginOffset &&
1504     __lastMatchResult.endOffset(0) == input._endOffset)
1505    return true;
1506      // Handle special case.
1507      if(input.length() == 0 || (input._beginOffset == input._endOffset))
1508    return true;
1509    }
1510    */

1511
1512    __initInterpreterGlobals(expression, inp, input._beginOffset,
1513                input._endOffset);
1514
1515    __lastMatchResult = null;
1516
1517    if(__tryExpression(expression, input._beginOffset)) {
1518      if(__endMatchOffsets[0] == input._endOffset ||
1519     input.length() == 0 || input._beginOffset == input._endOffset) {
1520    __lastSuccess = true;
1521    return true;
1522      }
1523    }
1524
1525    __lastSuccess = false;
1526
1527    return false;
1528  }
1529
1530
1531
1532  /**
1533   * Determines if a string contains a pattern. If the pattern is
1534   * matched by some substring of the input, a MatchResult instance
1535   * representing the <b> first </b> such match is made acessible via
1536   * {@link #getMatch()}. If you want to access
1537   * subsequent matches you should either use a PatternMatcherInput object
1538   * or use the offset information in the MatchResult to create a substring
1539   * representing the remaining input. Using the MatchResult offset
1540   * information is the recommended method of obtaining the parts of the
1541   * string preceeding the match and following the match.
1542   * <p>
1543   * The pattern must be a Perl5Pattern instance, otherwise a
1544   * ClassCastException will be thrown. You are not required to, and
1545   * indeed should NOT try to (for performance reasons), catch a
1546   * ClassCastException because it will never be thrown as long as you use
1547   * a Perl5Pattern as the pattern parameter.
1548   * <p>
1549   * @param input The String to test for a match.
1550   * @param pattern The Perl5Pattern to be matched.
1551   * @return True if the input contains a pattern match, false otherwise.
1552   * @exception ClassCastException If a Pattern instance other than a
1553   * Perl5Pattern is passed as the pattern parameter.
1554   */

1555  public boolean contains(String JavaDoc input, Pattern pattern) {
1556    /*
1557    char[] inp;
1558    Perl5Pattern expression;
1559
1560    expression = (Perl5Pattern)pattern;
1561
1562    __originalInput = inp = input.toCharArray();
1563
1564    if(expression._isCaseInsensitive)
1565      //_toLower(inp, false);
1566      inp = _toLower(inp, false);
1567
1568    return __interpret(expression, inp, 0, inp.length);
1569    */

1570    return contains(input.toCharArray(), pattern);
1571  }
1572
1573
1574  /**
1575   * Determines if a string (represented as a char[]) contains a pattern.
1576   * If the pattern is
1577   * matched by some substring of the input, a MatchResult instance
1578   * representing the <b> first </b> such match is made acessible via
1579   * {@link #getMatch()}. If you want to access
1580   * subsequent matches you should either use a PatternMatcherInput object
1581   * or use the offset information in the MatchResult to create a substring
1582   * representing the remaining input. Using the MatchResult offset
1583   * information is the recommended method of obtaining the parts of the
1584   * string preceeding the match and following the match.
1585   * <p>
1586   * The pattern must be a Perl5Pattern instance, otherwise a
1587   * ClassCastException will be thrown. You are not required to, and
1588   * indeed should NOT try to (for performance reasons), catch a
1589   * ClassCastException because it will never be thrown as long as you use
1590   * a Perl5Pattern as the pattern parameter.
1591   * <p>
1592   * @param input The char[] to test for a match.
1593   * @param pattern The Perl5Pattern to be matched.
1594   * @return True if the input contains a pattern match, false otherwise.
1595   * @exception ClassCastException If a Pattern instance other than a
1596   * Perl5Pattern is passed as the pattern parameter.
1597   */

1598  public boolean contains(char[] input, Pattern pattern) {
1599    Perl5Pattern expression;
1600
1601    expression = (Perl5Pattern)pattern;
1602
1603    __originalInput = input;
1604
1605    if(expression._isCaseInsensitive)
1606      input = _toLower(input);
1607
1608    return __interpret(expression, input, 0, input.length);
1609  }
1610
1611
1612  private static final int __DEFAULT_LAST_MATCH_END_OFFSET = -100;
1613  private int __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET;
1614  /**
1615   * Determines if the contents of a PatternMatcherInput, starting from the
1616   * current offset of the input contains a pattern.
1617   * If a pattern match is found, a MatchResult
1618   * instance representing the <b>first</b> such match is made acessible via
1619   * {@link #getMatch()}. The current offset of the
1620   * PatternMatcherInput is set to the offset corresponding to the end
1621   * of the match, so that a subsequent call to this method will continue
1622   * searching where the last call left off. You should remember that the
1623   * region between the begin and end offsets of the PatternMatcherInput are
1624   * considered the input to be searched, and that the current offset
1625   * of the PatternMatcherInput reflects where a search will start from.
1626   * Matches extending beyond the end offset of the PatternMatcherInput
1627   * will not be matched. In other words, a match must occur entirely
1628   * between the begin and end offsets of the input. See
1629   * {@link PatternMatcherInput} for more details.
1630   * <p>
1631   * As a side effect, if a match is found, the PatternMatcherInput match
1632   * offset information is updated. See the
1633   * {@link PatternMatcherInput#setMatchOffsets(int, int)}
1634   * method for more details.
1635   * <p>
1636   * The pattern must be a Perl5Pattern instance, otherwise a
1637   * ClassCastException will be thrown. You are not required to, and
1638   * indeed should NOT try to (for performance reasons), catch a
1639   * ClassCastException because it will never be thrown as long as you use
1640   * a Perl5Pattern as the pattern parameter.
1641   * <p>
1642   * This method is usually used in a loop as follows:
1643   * <blockquote><pre>
1644   * PatternMatcher matcher;
1645   * PatternCompiler compiler;
1646   * Pattern pattern;
1647   * PatternMatcherInput input;
1648   * MatchResult result;
1649   *
1650   * compiler = new Perl5Compiler();
1651   * matcher = new Perl5Matcher();
1652   *
1653   * try {
1654   * pattern = compiler.compile(somePatternString);
1655   * } catch(MalformedPatternException e) {
1656   * System.err.println("Bad pattern.");
1657   * System.err.println(e.getMessage());
1658   * return;
1659   * }
1660   *
1661   * input = new PatternMatcherInput(someStringInput);
1662   *
1663   * while(matcher.contains(input, pattern)) {
1664   * result = matcher.getMatch();
1665   * // Perform whatever processing on the result you want.
1666   * }
1667   *
1668   * </pre></blockquote>
1669   * <p>
1670   * @param input The PatternMatcherInput to test for a match.
1671   * @param pattern The Pattern to be matched.
1672   * @return True if the input contains a pattern match, false otherwise.
1673   * @exception ClassCastException If a Pattern instance other than a
1674   * Perl5Pattern is passed as the pattern parameter.
1675   */

1676  public boolean contains(PatternMatcherInput input, Pattern pattern) {
1677    char[] inp;
1678    Perl5Pattern expression;
1679    boolean matchFound;
1680
1681
1682    //if(input.length() > 0) {
1683
// We want to allow a null string to match at the end of the input
1684
// which is why we don't check endOfInput. Not sure if this is a
1685
// safe thing to do or not.
1686
if(input._currentOffset > input._endOffset)
1687      return false;
1688    //}
1689
/* else
1690      if(input._endOfInput())
1691    return false;
1692    */

1693    expression = (Perl5Pattern)pattern;
1694    __originalInput = input._originalBuffer;
1695
1696    // Todo:
1697
// Really should only reduce to lowercase that part of the
1698
// input that is necessary, instead of the whole thing.
1699
// Adjust MatchResult offsets accordingly. Actually, pass an adjustment
1700
// value to __interpret.
1701
__originalInput = input._originalBuffer;
1702    if(expression._isCaseInsensitive) {
1703      if(input._toLowerBuffer == null)
1704    input._toLowerBuffer = _toLower(__originalInput);
1705      inp = input._toLowerBuffer;
1706    } else
1707      inp = __originalInput;
1708
1709    __lastMatchInputEndOffset = input.getMatchEndOffset();
1710
1711    matchFound =
1712      __interpret(expression, inp, input._currentOffset, input._endOffset);
1713
1714    if(matchFound) {
1715      input.setCurrentOffset(__endMatchOffsets[0]);
1716      input.setMatchOffsets(__beginMatchOffsets[0], __endMatchOffsets[0]);
1717    } else {
1718      input.setCurrentOffset(input._endOffset + 1);
1719    }
1720
1721    // Restore so it doesn't interfere with other unrelated matches.
1722
__lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET;
1723
1724    return matchFound;
1725  }
1726
1727
1728  /**
1729   * Fetches the last match found by a call to a matches() or contains()
1730   * method. If you plan on modifying the original search input, you
1731   * must call this method BEFORE you modify the original search input,
1732   * as a lazy evaluation technique is used to create the MatchResult.
1733   * This reduces the cost of pattern matching when you don't care about
1734   * the actual match and only care if the pattern occurs in the input.
1735   * Otherwise, a MatchResult would be created for every match found,
1736   * whether or not the MatchResult was later used by a call to getMatch().
1737   * <p>
1738   * @return A MatchResult instance containing the pattern match found
1739   * by the last call to any one of the matches() or contains()
1740   * methods. If no match was found by the last call, returns
1741   * null.
1742   */

1743  public MatchResult getMatch() {
1744    if(!__lastSuccess)
1745      return null;
1746
1747    if(__lastMatchResult == null)
1748      __setLastMatchResult();
1749
1750    return __lastMatchResult;
1751  }
1752
1753}
1754
Popular Tags