KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > quercus > lib > RegexpModule


1 /*
2  * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3  *
4  * This file is part of Resin(R) Open Source
5  *
6  * Each copy or derived work must preserve the copyright notice and this
7  * notice unmodified.
8  *
9  * Resin Open Source is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * Resin Open Source is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17  * of NON-INFRINGEMENT. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with Resin Open Source; if not, write to the
22  *
23  * Free Software Foundation, Inc.
24  * 59 Temple Place, Suite 330
25  * Boston, MA 02111-1307 USA
26  *
27  * @author Scott Ferguson
28  */

29
30 package com.caucho.quercus.lib;
31
32 import com.caucho.quercus.QuercusException;
33 import com.caucho.quercus.QuercusRuntimeException;
34 import com.caucho.quercus.annotation.Optional;
35 import com.caucho.quercus.annotation.Reference;
36 import com.caucho.quercus.env.*;
37 import com.caucho.quercus.module.AbstractQuercusModule;
38 import com.caucho.util.L10N;
39 import com.caucho.util.LruCache;
40
41 import java.io.IOException JavaDoc;
42 import java.util.ArrayList JavaDoc;
43 import java.util.HashMap JavaDoc;
44 import java.util.Iterator JavaDoc;
45 import java.util.Map JavaDoc;
46 import java.util.regex.Matcher JavaDoc;
47 import java.util.regex.Pattern JavaDoc;
48
49 /**
50  * PHP regexp routines.
51  */

52 public class RegexpModule
53   extends AbstractQuercusModule
54 {
55   private static final L10N L = new L10N(RegexpModule.class);
56
57   private static final int REGEXP_EVAL = 0x01;
58
59   public static final int PREG_PATTERN_ORDER = 0x01;
60   public static final int PREG_SET_ORDER = 0x02;
61   public static final int PREG_OFFSET_CAPTURE = 0x04;
62
63   public static final int PREG_SPLIT_NO_EMPTY = 0x01;
64   public static final int PREG_SPLIT_DELIM_CAPTURE = 0x02;
65   public static final int PREG_SPLIT_OFFSET_CAPTURE = 0x04;
66
67   public static final int PREG_GREP_INVERT = 1;
68
69   public static final boolean [] PREG_QUOTE = new boolean[256];
70
71   private static final LruCache<StringValue, Pattern JavaDoc> _patternCache
72     = new LruCache<StringValue, Pattern JavaDoc>(1024);
73
74   private static final LruCache<StringValue, ArrayList JavaDoc<Replacement>> _replacementCache
75     = new LruCache<StringValue, ArrayList JavaDoc<Replacement>>(1024);
76
77   private static final HashMap JavaDoc<String JavaDoc, Value> _constMap
78     = new HashMap JavaDoc<String JavaDoc, Value>();
79
80   public String JavaDoc []getLoadedExtensions()
81   {
82     return new String JavaDoc[] { "pcre" };
83   }
84
85   /**
86    * Returns the index of the first match.
87    *
88    * @param env the calling environment
89    */

90   public static Value ereg(Env env,
91                            StringValue pattern,
92                            StringValue string,
93                            @Optional @Reference Value regsV)
94   {
95     return ereg(env, pattern, string, regsV, 0);
96   }
97
98   /**
99    * Returns the index of the first match.
100    *
101    * @param env the calling environment
102    */

103   public static Value eregi(Env env,
104                             StringValue pattern,
105                             StringValue string,
106                             @Optional @Reference Value regsV)
107   {
108     return ereg(env, pattern, string, regsV, Pattern.CASE_INSENSITIVE);
109   }
110
111   /**
112    * Returns the index of the first match.
113    *
114    * @param env the calling environment
115    */

116   private static Value ereg(Env env,
117                             StringValue rawPattern,
118                             StringValue string,
119                             Value regsV,
120                             int flags)
121   {
122     String JavaDoc cleanPattern = cleanRegexp(rawPattern, false);
123
124     Pattern JavaDoc pattern = Pattern.compile(cleanPattern, flags);
125     Matcher JavaDoc matcher = pattern.matcher(string);
126
127     if (! (matcher.find())) {
128       return BooleanValue.FALSE;
129     }
130
131     if (regsV != null && ! (regsV instanceof NullValue)) {
132       ArrayValue regs = new ArrayValueImpl();
133       regsV.set(regs);
134
135       regs.put(LongValue.ZERO, new StringValueImpl(matcher.group()));
136       int count = matcher.groupCount();
137
138       for (int i = 1; i <= count; i++) {
139         String JavaDoc group = matcher.group(i);
140
141         Value value;
142         if (group == null)
143           value = BooleanValue.FALSE;
144         else
145           value = new StringValueImpl(group);
146
147         regs.put(new LongValue(i), value);
148       }
149
150       int len = matcher.end() - matcher.start();
151
152       if (len == 0)
153         return LongValue.ONE;
154       else
155         return new LongValue(len);
156     }
157     else {
158       return LongValue.ONE;
159     }
160   }
161
162   /**
163    * Returns the index of the first match.
164    *
165    * php/151u
166    * The array that preg_match (PHP 5) returns does not have trailing unmatched
167    * groups. Therefore, an unmatched group should not be added to the array
168    * unless a matched group appears after it.
169    * (Only preg_match exhibits this odd behavior).
170    *
171    * @param env the calling environment
172    */

173   public static int preg_match(Env env,
174                                StringValue patternString,
175                                StringValue string,
176                                @Optional @Reference Value matchRef,
177                                @Optional int flags,
178                                @Optional int offset)
179   {
180     if (patternString.length() < 2) {
181       env.warning(L.l("Regexp pattern must have opening and closing delimiters"));
182       return 0;
183     }
184
185     Pattern JavaDoc pattern = compileRegexp(patternString);
186     Matcher JavaDoc matcher = pattern.matcher(string);
187
188     ArrayValue regs;
189
190     if (matchRef instanceof DefaultValue)
191       regs = null;
192     else
193       regs = new ArrayValueImpl();
194
195     if (! (matcher.find(offset))) {
196       matchRef.set(regs);
197       return 0;
198     }
199
200     boolean isOffsetCapture = (flags & PREG_OFFSET_CAPTURE) != 0;
201
202     if (regs != null) {
203       if (isOffsetCapture) {
204         ArrayValueImpl part = new ArrayValueImpl();
205         part.append(new StringValueImpl(matcher.group()));
206         part.append(new LongValue(matcher.start()));
207
208         regs.put(LongValue.ZERO, part);
209       }
210       else
211         regs.put(LongValue.ZERO, new StringValueImpl(matcher.group()));
212
213       int count = matcher.groupCount();
214
215       for (int i = 1; i <= count; i++) {
216         String JavaDoc group = matcher.group(i);
217
218         if (group == null)
219           continue;
220
221         if (isOffsetCapture) {
222           // php/151u
223
// add unmatched groups first
224
for (int j = regs.getSize(); j < i; j++) {
225             ArrayValue part = new ArrayValueImpl();
226
227             part.append(StringValue.EMPTY);
228             part.append(LongValue.MINUS_ONE);
229
230             regs.put(new LongValue(j), part);
231           }
232
233           ArrayValueImpl part = new ArrayValueImpl();
234           part.append(new StringValueImpl(group));
235           part.append(new LongValue(matcher.start(i)));
236
237           regs.put(new LongValue(i), part);
238         }
239         else {
240           // php/151u
241
// add unmatched groups first
242
for (int j = regs.getSize(); j < i; j++) {
243             regs.put(new LongValue(j), StringValue.EMPTY);
244           }
245
246           regs.put(new LongValue(i), new StringValueImpl(group));
247         }
248       }
249
250       matchRef.set(regs);
251     }
252
253     return 1;
254   }
255
256   /**
257    * Returns the index of the first match.
258    *
259    * @param env the calling environment
260    */

261   public static int preg_match_all(Env env,
262                                    StringValue patternString,
263                                    StringValue subject,
264                                    @Reference Value matchRef,
265                                    @Optional("PREG_PATTERN_ORDER") int flags,
266                                    @Optional int offset)
267   {
268     if (patternString.length() < 2) {
269       env.warning(L.l("Pattern must have at least opening and closing delimiters"));
270       return 0;
271     }
272
273     if (((flags & PREG_PATTERN_ORDER) != 0) && ((flags & PREG_SET_ORDER) != 0)) {
274       env.warning((L.l("Cannot combine PREG_PATTER_ORDER and PREG_SET_ORDER")));
275       return 0;
276     }
277
278     Pattern JavaDoc pattern = compileRegexp(patternString);
279
280     ArrayValue matches;
281
282     if (matchRef instanceof ArrayValue)
283       matches = (ArrayValue) matchRef;
284     else
285       matches = new ArrayValueImpl();
286
287     matches.clear();
288
289     matchRef.set(matches);
290
291     if ((flags & PREG_PATTERN_ORDER) != 0) {
292       return pregMatchAllPatternOrder(env, pattern, subject,
293                       matches, flags, offset);
294     }
295     else if ((flags & PREG_SET_ORDER) != 0) {
296       return pregMatchAllSetOrder(env, pattern, subject,
297                   matches, flags, offset);
298     }
299     else
300       throw new UnsupportedOperationException JavaDoc();
301   }
302
303   /**
304    * Returns the index of the first match.
305    *
306    * @param env the calling environment
307    */

308   public static int pregMatchAllPatternOrder(Env env,
309                          Pattern JavaDoc pattern,
310                          StringValue subject,
311                          ArrayValue matches,
312                          int flags,
313                          int offset)
314   {
315     Matcher JavaDoc matcher = pattern.matcher(subject);
316
317     int groupCount = matcher.groupCount();
318
319     ArrayValue []matchList = new ArrayValue[groupCount + 1];
320
321     for (int j = 0; j <= groupCount; j++) {
322       ArrayValue values = new ArrayValueImpl();
323       matches.put(values);
324       matchList[j] = values;
325     }
326
327     if (! (matcher.find())) {
328       return 0;
329     }
330
331     int count = 0;
332
333     do {
334       count++;
335
336       for (int j = 0; j <= groupCount; j++) {
337     ArrayValue values = matchList[j];
338
339     int start = matcher.start(j);
340     int end = matcher.end(j);
341       
342     StringValue groupValue = subject.substring(start, end);
343
344     Value result = NullValue.NULL;
345
346     if (groupValue != null) {
347       if ((flags & PREG_OFFSET_CAPTURE) != 0) {
348         result = new ArrayValueImpl();
349         result.put(groupValue);
350         result.put(LongValue.create(start));
351       } else {
352         result = groupValue;
353       }
354     }
355
356     values.put(result);
357       }
358     } while (matcher.find());
359
360     return count;
361   }
362
363   /**
364    * Returns the index of the first match.
365    *
366    * @param env the calling environment
367    */

368   private static int pregMatchAllSetOrder(Env env,
369                       Pattern JavaDoc pattern,
370                       StringValue subject,
371                       ArrayValue matches,
372                       int flags,
373                       int offset)
374   {
375     Matcher JavaDoc matcher = pattern.matcher(subject);
376
377     if (! (matcher.find())) {
378       return 0;
379     }
380
381     int count = 0;
382
383     do {
384       count++;
385
386       ArrayValue matchResult = new ArrayValueImpl();
387       matches.put(matchResult);
388
389       for (int j = 0; j <= matcher.groupCount(); j++) {
390     int start = matcher.start(j);
391     int end = matcher.end(j);
392       
393     StringValue groupValue = subject.substring(start, end);
394
395     Value result = NullValue.NULL;
396
397     if (groupValue != null) {
398       if ((flags & PREG_OFFSET_CAPTURE) != 0) {
399         result = new ArrayValueImpl();
400         result.put(groupValue);
401         result.put(LongValue.create(start));
402       } else {
403         result = groupValue;
404       }
405     }
406     matchResult.put(result);
407       }
408     } while (matcher.find());
409
410     return count;
411   }
412
413   /**
414    * Quotes regexp values
415    */

416   public static String JavaDoc preg_quote(String JavaDoc string,
417                                   @Optional String JavaDoc delim)
418   {
419     StringBuilder JavaDoc sb = new StringBuilder JavaDoc();
420
421     boolean []extra = null;
422
423     if (delim != null && ! delim.equals("")) {
424       extra = new boolean[256];
425
426       for (int i = 0; i < delim.length(); i++)
427         extra[delim.charAt(i)] = true;
428     }
429
430     int length = string.length();
431     for (int i = 0; i < length; i++) {
432       char ch = string.charAt(i);
433
434       if (ch >= 256)
435         sb.append(ch);
436       else if (PREG_QUOTE[ch]) {
437         sb.append('\\');
438         sb.append(ch);
439       }
440       else if (extra != null && extra[ch]) {
441         sb.append('\\');
442         sb.append(ch);
443       }
444       else
445         sb.append(ch);
446     }
447
448     return sb.toString();
449   }
450
451   /**
452    * Loops through subject if subject is array of strings
453    *
454    * @param env
455    * @param pattern string or array
456    * @param replacement string or array
457    * @param subject string or array
458    * @param limit
459    * @param count
460    * @return
461    */

462   public static Value preg_replace(Env env,
463                                    Value pattern,
464                                    Value replacement,
465                                    Value subject,
466                                    @Optional("-1") long limit,
467                                    @Optional @Reference Value count)
468   {
469     if (subject instanceof ArrayValue) {
470       ArrayValue result = new ArrayValueImpl();
471
472       for (Value value : ((ArrayValue) subject).values()) {
473         result.put(pregReplace(env,
474                                pattern,
475                                replacement,
476                                value.toStringValue(),
477                                limit,
478                                count));
479       }
480
481       return result;
482
483     }
484     else if (subject.isset()) {
485       return pregReplace(env, pattern, replacement, subject.toStringValue(),
486              limit, count);
487     } else
488       return StringValue.EMPTY;
489
490   }
491
492   /**
493    * Replaces values using regexps
494    */

495   private static Value pregReplace(Env env,
496                                    Value patternValue,
497                                    Value replacement,
498                                    StringValue subject,
499                                    @Optional("-1") long limit,
500                                    Value countV)
501   {
502     StringValue string = subject;
503
504     if (limit < 0)
505       limit = Long.MAX_VALUE;
506
507     if (patternValue.isArray() && replacement.isArray()) {
508       ArrayValue patternArray = (ArrayValue) patternValue;
509       ArrayValue replacementArray = (ArrayValue) replacement;
510
511       Iterator JavaDoc<Value> patternIter = patternArray.values().iterator();
512       Iterator JavaDoc<Value> replacementIter = replacementArray.values().iterator();
513
514       while (patternIter.hasNext() && replacementIter.hasNext()) {
515         string = pregReplaceString(env,
516                                    patternIter.next().toStringValue(),
517                                    replacementIter.next().toStringValue(),
518                                    string,
519                                    limit,
520                                    countV);
521       }
522     } else if (patternValue.isArray()) {
523       ArrayValue patternArray = (ArrayValue) patternValue;
524
525       for (Value value : patternArray.values()) {
526         string = pregReplaceString(env,
527                                    value.toStringValue(),
528                                    replacement.toStringValue(),
529                                    string,
530                                    limit,
531                                    countV);
532       }
533     } else {
534       return pregReplaceString(env,
535                    patternValue.toStringValue(),
536                    replacement.toStringValue(),
537                    string,
538                    limit,
539                    countV);
540     }
541
542     return string;
543   }
544
545   /**
546    * replaces values using regexps and callback fun
547    * @param env
548    * @param patternString
549    * @param fun
550    * @param subject
551    * @param limit
552    * @param countV
553    * @return subject with everything replaced
554    */

555   private static StringValue pregReplaceCallbackImpl(Env env,
556                              StringValue patternString,
557                              Callback fun,
558                              StringValue subject,
559                              long limit,
560                              Value countV)
561   {
562
563     long numberOfMatches = 0;
564
565     if (limit < 0)
566       limit = Long.MAX_VALUE;
567
568     Pattern JavaDoc pattern = compileRegexp(patternString);
569
570     Matcher JavaDoc matcher = pattern.matcher(subject);
571
572     StringBuilderValue result = new StringBuilderValue();
573     int tail = 0;
574
575     while (matcher.find() && numberOfMatches < limit) {
576       // Increment countV (note: if countV != null, then it should be a Var)
577
if ((countV != null) && (countV instanceof Var)) {
578         long count = ((Var) countV).getRawValue().toLong();
579         countV.set(LongValue.create(count + 1));
580       }
581
582       if (tail < matcher.start())
583         result.append(subject.substring(tail, matcher.start()));
584
585       ArrayValue regs = new ArrayValueImpl();
586
587       for (int i = 0; i <= matcher.groupCount(); i++) {
588         String JavaDoc group = matcher.group(i);
589
590         if (group != null)
591           regs.put(new StringValueImpl(group));
592         else
593           regs.put(StringValue.EMPTY);
594       }
595
596       Value replacement = fun.call(env, regs);
597
598       result.append(replacement);
599
600       tail = matcher.end();
601
602       numberOfMatches++;
603     }
604
605     if (tail < subject.length())
606       result.append(subject.substring(tail));
607
608     return result;
609   }
610
611   /**
612    * Replaces values using regexps
613    */

614   private static StringValue pregReplaceString(Env env,
615                            StringValue patternString,
616                            StringValue replacement,
617                            StringValue subject,
618                            long limit,
619                            Value countV)
620   {
621     Pattern JavaDoc pattern = compileRegexp(patternString);
622
623     // check for e modifier in patternString
624
int patternFlags = regexpFlags(patternString);
625     boolean isEval = (patternFlags & REGEXP_EVAL) != 0;
626
627     ArrayList JavaDoc<Replacement> replacementProgram
628       = _replacementCache.get(replacement);
629
630     if (replacementProgram == null) {
631       replacementProgram = compileReplacement(env, replacement, isEval);
632       _replacementCache.put(replacement, replacementProgram);
633     }
634
635     return pregReplaceStringImpl(env,
636                                  pattern,
637                                  replacementProgram,
638                                  subject,
639                                  limit,
640                                  countV,
641                  isEval);
642   }
643
644   /**
645    * Replaces values using regexps
646    */

647   public static Value ereg_replace(Env env,
648                                    StringValue patternString,
649                                    StringValue replacement,
650                                    StringValue subject)
651   {
652     Pattern JavaDoc pattern = Pattern.compile(cleanRegexp(patternString, false));
653
654     ArrayList JavaDoc<Replacement> replacementProgram
655       = _replacementCache.get(replacement);
656
657     if (replacementProgram == null) {
658       replacementProgram = compileReplacement(env, replacement, false);
659       _replacementCache.put(replacement, replacementProgram);
660     }
661
662     return pregReplaceStringImpl(env,
663                  pattern,
664                  replacementProgram,
665                  subject,
666                  -1,
667                  NullValue.NULL,
668                  false);
669   }
670
671   /**
672    * Replaces values using regexps
673    */

674   public static Value eregi_replace(Env env,
675                                     StringValue patternString,
676                                     StringValue replacement,
677                                     StringValue subject)
678   {
679     Pattern JavaDoc pattern = Pattern.compile(cleanRegexp(patternString, false),
680                                       Pattern.CASE_INSENSITIVE);
681
682     ArrayList JavaDoc<Replacement> replacementProgram
683       = _replacementCache.get(replacement);
684
685     if (replacementProgram == null) {
686       replacementProgram = compileReplacement(env, replacement, false);
687       _replacementCache.put(replacement, replacementProgram);
688     }
689
690     return pregReplaceStringImpl(env, pattern, replacementProgram,
691                  subject, -1, NullValue.NULL, false);
692   }
693
694   /**
695    * Replaces values using regexps
696    */

697   private static StringValue pregReplaceStringImpl(Env env,
698                            Pattern JavaDoc pattern,
699                            ArrayList JavaDoc<Replacement> replacementList,
700                            StringValue subject,
701                            long limit,
702                            Value countV,
703                            boolean isEval)
704   {
705     if (limit < 0)
706       limit = Long.MAX_VALUE;
707
708     int length = subject.length();
709
710     Matcher JavaDoc matcher = pattern.matcher(subject);
711
712     StringBuilderValue result = null;
713     int tail = 0;
714
715     int replacementLen = replacementList.size();
716
717     while (matcher.find() && limit-- > 0) {
718       if (result == null)
719     result = new StringBuilderValue();
720       
721       // Increment countV (note: if countV != null, then it should be a Var)
722
if ((countV != null) && (countV instanceof Var)) {
723         countV.set(LongValue.create(countV.toLong() + 1));
724       }
725
726       // append all text up to match
727
if (tail < matcher.start())
728         result.append(subject, tail, matcher.start());
729
730       // if isEval then append replacement evaluated as PHP code
731
// else append replacement string
732
if (isEval) {
733         StringBuilderValue evalString = new StringBuilderValue();
734
735         for (int i = 0; i < replacementLen; i++) {
736           Replacement replacement = replacementList.get(i);
737
738           replacement.eval(evalString, subject, matcher);
739         }
740
741     try {
742       result.append(env.evalCode(evalString.toString()));
743     } catch (IOException JavaDoc e) {
744       throw new QuercusException(e);
745     }
746       } else {
747         for (int i = 0; i < replacementLen; i++) {
748           Replacement replacement = replacementList.get(i);
749
750           replacement.eval(result, subject, matcher);
751         }
752       }
753
754       tail = matcher.end();
755     }
756
757     if (result == null)
758       return subject;
759     
760     if (tail < length)
761       result.append(subject, tail, length);
762
763     return result;
764   }
765
766   /**
767    * Loops through subject if subject is array of strings
768    *
769    * @param env
770    * @param pattern
771    * @param fun
772    * @param subject
773    * @param limit
774    * @param count
775    * @return
776    */

777   public static Value preg_replace_callback(Env env,
778                                             Value pattern,
779                                             Callback fun,
780                                             Value subject,
781                                             @Optional("-1") long limit,
782                                             @Optional @Reference Value count)
783   {
784     if (subject instanceof ArrayValue) {
785       ArrayValue result = new ArrayValueImpl();
786
787       for (Value value : ((ArrayValue) subject).values()) {
788         result.put(pregReplaceCallback(env,
789                                        pattern.toStringValue(),
790                                        fun,
791                                        value.toStringValue(),
792                                        limit,
793                                        count));
794       }
795
796       return result;
797
798     } else if (subject instanceof StringValue) {
799       return pregReplaceCallback(env,
800                                  pattern.toStringValue(),
801                                  fun,
802                                  subject.toStringValue(),
803                                  limit,
804                                  count);
805     } else {
806       return NullValue.NULL;
807     }
808   }
809
810   /**
811    * Replaces values using regexps
812    */

813   private static Value pregReplaceCallback(Env env,
814                                            Value patternValue,
815                                            Callback fun,
816                                            StringValue subject,
817                                            @Optional("-1") long limit,
818                                            @Optional @Reference Value countV)
819   {
820     if (limit < 0)
821       limit = Long.MAX_VALUE;
822
823     if (patternValue.isArray()) {
824       ArrayValue patternArray = (ArrayValue) patternValue;
825
826       for (Value value : patternArray.values()) {
827         subject = pregReplaceCallbackImpl(env,
828                                           value.toStringValue(),
829                                           fun,
830                                           subject,
831                                           limit,
832                                           countV);
833       }
834
835       return subject;
836
837     } else if (patternValue instanceof StringValue) {
838       return pregReplaceCallbackImpl(env,
839                      patternValue.toStringValue(),
840                      fun,
841                      subject,
842                      limit,
843                      countV);
844     } else {
845       return NullValue.NULL;
846     }
847   }
848
849   /**
850    * Returns array of substrings or
851    * of arrays ([0] => substring [1] => offset) if
852    * PREG_SPLIT_OFFSET_CAPTURE is set
853    *
854    * @param env the calling environment
855    */

856   public static Value preg_split(Env env,
857                                  StringValue patternString,
858                                  StringValue string,
859                                  @Optional("-1") long limit,
860                                  @Optional int flags)
861   {
862     if (limit <= 0)
863       limit = Long.MAX_VALUE;
864
865     Pattern JavaDoc pattern = compileRegexp(patternString);
866     Matcher JavaDoc matcher = pattern.matcher(string);
867
868     ArrayValue result = new ArrayValueImpl();
869
870     int head = 0;
871     long count = 0;
872     
873     boolean allowEmpty = (flags & PREG_SPLIT_NO_EMPTY) == 0;
874     boolean isCaptureOffset = (flags & PREG_SPLIT_OFFSET_CAPTURE) != 0;
875     boolean isCaptureDelim = (flags & PREG_SPLIT_DELIM_CAPTURE) != 0;
876
877     while (matcher.find()) {
878       int startPosition = head;
879       StringValue unmatched;
880
881       // Get non-matching sequence
882
if (count == limit - 1) {
883         unmatched = string.substring(head);
884         head = string.length();
885       }
886       else {
887         unmatched = string.substring(head, matcher.start());
888         head = matcher.end();
889       }
890
891       // Append non-matching sequence
892
if (unmatched.length() != 0 || allowEmpty) {
893         if (isCaptureOffset) {
894           ArrayValue part = new ArrayValueImpl();
895
896           part.put(unmatched);
897           part.put(LongValue.create(startPosition));
898
899           result.put(part);
900         }
901         else {
902           result.put(unmatched);
903         }
904
905         count++;
906       }
907  
908       if (count == limit)
909         break;
910
911       // Append parameterized delimiters
912
if (isCaptureDelim) {
913         for (int i = 1; i <= matcher.groupCount(); i++) {
914           int start = matcher.start(i);
915           int end = matcher.end(i);
916
917           if ((start != -1 && end - start > 0) || allowEmpty) {
918
919             StringValue groupValue;
920             if (start < 0)
921               groupValue = StringValue.EMPTY;
922             else
923               groupValue = string.substring(start, end);
924
925             if (isCaptureOffset) {
926               ArrayValue part = new ArrayValueImpl();
927
928               part.put(groupValue);
929               part.put(LongValue.create(startPosition));
930
931               result.put(part);
932             }
933             else
934               result.put(groupValue);
935           }
936         }
937       }
938     }
939
940     // Append non-matching sequence at the end
941
if (count < limit && (head < string.length() || allowEmpty)) {
942       if (isCaptureOffset) {
943         ArrayValue part = new ArrayValueImpl();
944
945         part.put(string.substring(head));
946         part.put(LongValue.create(head));
947
948         result.put(part);
949       }
950       else
951         result.put(string.substring(head));
952     }
953
954     return result;
955
956 /*
957     while ((matcher.find()) && (count < limit)) {
958
959       StringValue value;
960
961       int startPosition = head;
962
963       if (head != matcher.start() || isAllowEmpty) {
964         // If at limit, then just output the rest of string
965         if (count == limit - 1) {
966
967           value = string.substring(head);
968           head = string.length();
969         } else {
970           value = string.substring(head, matcher.start());
971           head = matcher.end();
972         }
973
974         if ((flags & PREG_SPLIT_OFFSET_CAPTURE) != 0) {
975           ArrayValue part = new ArrayValueImpl();
976           part.put(value);
977           part.put(LongValue.create(startPosition));
978
979           result.put(part);
980         } else {
981           result.put(value);
982         }
983
984         count++;
985       } else
986         head = matcher.end();
987
988       if ((flags & PREG_SPLIT_DELIM_CAPTURE) != 0) {
989     for (int i = 1; i <= matcher.groupCount(); i++) {
990       String group = matcher.group(i);
991       Value groupValue;
992
993       if (group != null)
994         groupValue = new StringValueImpl(group);
995       else
996         groupValue = StringValue.EMPTY;
997
998           if ((flags & PREG_SPLIT_OFFSET_CAPTURE) != 0) {
999             ArrayValue part = new ArrayValueImpl();
1000            part.put(groupValue);
1001            part.put(LongValue.create(matcher.start()));
1002
1003            result.put(part);
1004          } else {
1005        result.put(groupValue);
1006          }
1007        }
1008      }
1009    }
1010
1011    if (head == string.length() && ! isAllowEmpty) {
1012    }
1013    else if ((head <= string.length()) && (count != limit)) {
1014      if ((flags & PREG_SPLIT_OFFSET_CAPTURE) != 0) {
1015        ArrayValue part = new ArrayValueImpl();
1016        part.put(string.substring(head));
1017        part.put(LongValue.create(head));
1018
1019        result.put(part);
1020      } else {
1021        result.put(string.substring(head));
1022      }
1023    }
1024
1025    return result;
1026*/

1027  }
1028
1029  /**
1030   * Makes a regexp for a case-insensitive match.
1031   */

1032  public static String JavaDoc sql_regcase(String JavaDoc string)
1033  {
1034    StringBuilder JavaDoc sb = new StringBuilder JavaDoc();
1035
1036    int len = string.length();
1037    for (int i = 0; i < len; i++) {
1038      char ch = string.charAt(i);
1039
1040      if (Character.isLowerCase(ch)) {
1041    sb.append('[');
1042    sb.append(Character.toUpperCase(ch));
1043    sb.append(ch);
1044    sb.append(']');
1045      }
1046      else if (Character.isUpperCase(ch)) {
1047    sb.append('[');
1048    sb.append(ch);
1049    sb.append(Character.toLowerCase(ch));
1050    sb.append(']');
1051      }
1052      else
1053    sb.append(ch);
1054    }
1055
1056    return sb.toString();
1057  }
1058
1059  /**
1060   * Returns the index of the first match.
1061   *
1062   * @param env the calling environment
1063   */

1064  public static Value split(Env env,
1065                            StringValue patternString,
1066                            StringValue string,
1067                            @Optional("-1") long limit)
1068  {
1069    if (limit < 0)
1070      limit = Long.MAX_VALUE;
1071
1072    String JavaDoc cleanRegexp = cleanRegexp(patternString, false);
1073
1074    Pattern JavaDoc pattern = Pattern.compile(cleanRegexp);
1075
1076    ArrayValue result = new ArrayValueImpl();
1077
1078    Matcher JavaDoc matcher = pattern.matcher(string);
1079    long count = 0;
1080    int head = 0;
1081
1082    while ((matcher.find()) && (count < limit)) {
1083      StringValue value;
1084      if (count == limit - 1) {
1085        value = string.substring(head);
1086        head = string.length();
1087      } else {
1088        value = string.substring(head, matcher.start());
1089        head = matcher.end();
1090      }
1091
1092      result.put(value);
1093
1094      count++;
1095    }
1096
1097    if ((head <= string.length() && (count != limit))) {
1098      result.put(string.substring(head));
1099    }
1100
1101    return result;
1102  }
1103
1104  /**
1105   * Returns an array of all the values that matched the given pattern if the
1106   * flag no flag is passed. Otherwise it will return an array of all the
1107   * values that did not match.
1108   *
1109   * @param patternString the pattern
1110   * @param input the array to check the pattern against
1111   * @param flag 0 for matching and 1 for elements that do not match
1112   * @return an array of either matching elements are non-matching elements
1113   */

1114  public static ArrayValue preg_grep(Env env,
1115                                     StringValue patternString,
1116                                     ArrayValue input,
1117                                     @Optional("0") int flag)
1118  {
1119    // php/151b
1120

1121    Pattern JavaDoc pattern = compileRegexp(patternString);
1122
1123    Matcher JavaDoc matcher = null;
1124
1125    ArrayValue matchArray = new ArrayValueImpl();
1126
1127    for (Map.Entry JavaDoc<Value, Value> entry : input.entrySet()) {
1128      Value entryValue = entry.getValue();
1129      Value entryKey = entry.getKey();
1130
1131      matcher = pattern.matcher(entryValue.toString());
1132
1133      boolean found = matcher.find();
1134
1135      if (!found && (flag == PREG_GREP_INVERT))
1136        matchArray.append(entryKey, entryValue);
1137      else if (found && (flag != PREG_GREP_INVERT))
1138        matchArray.append(entryKey, entryValue);
1139    }
1140
1141    return matchArray;
1142  }
1143
1144  /**
1145   * Returns an array of strings produces from splitting the passed string
1146   * around the provided pattern. The pattern is case insensitive.
1147   *
1148   * @param patternString the pattern
1149   * @param string the string to split
1150   * @param limit if specified, the maximum number of elements in the array
1151   * @return an array of strings split around the pattern string
1152   */

1153  public static ArrayValue spliti(Env env,
1154                                  StringValue patternString,
1155                                  StringValue string,
1156                                  @Optional("-1") long limit)
1157  {
1158    if (limit < 0)
1159      limit = Long.MAX_VALUE;
1160
1161    // php/151c
1162

1163    String JavaDoc cleanRegexp = cleanRegexp(patternString, false);
1164
1165    Pattern JavaDoc pattern = Pattern.compile(cleanRegexp, Pattern.CASE_INSENSITIVE);
1166
1167    ArrayValue result = new ArrayValueImpl();
1168
1169    Matcher JavaDoc matcher = pattern.matcher(string);
1170    long count = 0;
1171    int head = 0;
1172
1173    while ((matcher.find()) && (count < limit)) {
1174      StringValue value;
1175      if (count == limit - 1) {
1176        value = string.substring(head);
1177        head = string.length();
1178      } else {
1179        value = string.substring(head, matcher.start());
1180        head = matcher.end();
1181      }
1182
1183      result.put(value);
1184
1185      count++;
1186    }
1187
1188    if ((head <= string.length()) && (count != limit)) {
1189      result.put(string.substring(head));
1190    }
1191
1192    return result;
1193  }
1194
1195  private static Pattern JavaDoc compileRegexp(StringValue rawRegexp)
1196  {
1197    Pattern JavaDoc pattern = _patternCache.get(rawRegexp);
1198
1199    if (pattern != null)
1200      return pattern;
1201
1202    if (rawRegexp.length() < 2) {
1203      throw new IllegalStateException JavaDoc(L.l(
1204        "Can't find delimiters in regexp '{0}'.",
1205        rawRegexp));
1206    }
1207
1208    char delim = rawRegexp.charAt(0);
1209
1210    if (delim == '{')
1211      delim = '}';
1212    else if (delim == '[')
1213      delim = ']';
1214    else if (delim == '(')
1215      delim = ')';
1216
1217    int tail = rawRegexp.lastIndexOf(delim);
1218
1219    if (tail <= 0)
1220      throw new IllegalStateException JavaDoc(L.l(
1221        "Can't find second {0} in regexp '{1}'.",
1222        String.valueOf((char) delim),
1223        rawRegexp));
1224
1225    int len = rawRegexp.length();
1226
1227    int flags = 0;
1228    boolean isExt = false;
1229    boolean isGreedy = true;
1230
1231    for (int i = tail + 1; i < len; i++) {
1232      char ch = rawRegexp.charAt(i);
1233
1234      switch (ch) {
1235      case 'i':
1236        flags |= Pattern.CASE_INSENSITIVE;
1237        break;
1238      case 's':
1239        flags |= Pattern.DOTALL;
1240        break;
1241      case 'x':
1242        flags |= Pattern.COMMENTS;
1243        break;
1244      case 'm':
1245        flags |= Pattern.MULTILINE;
1246        break;
1247      case 'U':
1248        isGreedy = false;
1249        break;
1250      }
1251    }
1252
1253    StringValue regexp = rawRegexp.substring(1, tail);
1254
1255    String JavaDoc cleanRegexp = cleanRegexp(regexp, (flags & Pattern.COMMENTS) != 0);
1256
1257    if (! isGreedy)
1258      cleanRegexp = toNonGreedy(cleanRegexp);
1259
1260    pattern = Pattern.compile(cleanRegexp, flags);
1261
1262    _patternCache.put(rawRegexp, pattern);
1263
1264    return pattern;
1265  }
1266
1267  private static int regexpFlags(StringValue rawRegexp)
1268  {
1269    char delim = rawRegexp.charAt(0);
1270    if (delim == '{')
1271      delim = '}';
1272    else if (delim == '[')
1273      delim = ']';
1274
1275    int tail = rawRegexp.lastIndexOf(delim);
1276
1277    if (tail <= 0)
1278      throw new IllegalStateException JavaDoc(L.l(
1279        "Can't find second {0} in regexp '{1}'.",
1280        String.valueOf((char) delim),
1281        rawRegexp));
1282
1283    int len = rawRegexp.length();
1284
1285    int flags = 0;
1286
1287    for (int i = tail + 1; i < len; i++) {
1288      char ch = rawRegexp.charAt(i);
1289
1290      switch (ch) {
1291      case 'e':
1292        flags |= REGEXP_EVAL;
1293        break;
1294      }
1295    }
1296
1297    return flags;
1298  }
1299
1300  private static ArrayList JavaDoc<Replacement>
1301    compileReplacement(Env env, StringValue replacement, boolean isEval)
1302  {
1303    ArrayList JavaDoc<Replacement> program = new ArrayList JavaDoc<Replacement>();
1304    StringBuilder JavaDoc text = new StringBuilder JavaDoc();
1305
1306    for (int i = 0; i < replacement.length(); i++) {
1307      char ch = replacement.charAt(i);
1308
1309      if ((ch == '\\' || ch == '$') && i + 1 < replacement.length()) {
1310        char digit;
1311
1312        if ('0' <= (digit = replacement.charAt(i + 1)) && digit <= '9') {
1313          int group = digit - '0';
1314          i++;
1315
1316          if (i + 1 < replacement.length() &&
1317              '0' <= (digit = replacement.charAt(i + 1)) && digit <= '9') {
1318            group = 10 * group + digit - '0';
1319            i++;
1320          }
1321
1322          if (text.length() > 0)
1323            program.add(new TextReplacement(text));
1324
1325      if (isEval)
1326        program.add(new GroupEscapeReplacement(group));
1327      else
1328        program.add(new GroupReplacement(group));
1329
1330          text.setLength(0);
1331        }
1332    else if (ch == '\\') {
1333          i++;
1334
1335      if (digit != '\\') {
1336        text.append('\\');
1337      }
1338          text.append(digit);
1339      // took out test for ch == '$' because must be true
1340
//} else if (ch == '$' && digit == '{') {
1341
} else if (digit == '{') {
1342          i += 2;
1343
1344          int group = 0;
1345
1346          while (i < replacement.length() &&
1347                 '0' <= (digit = replacement.charAt(i)) && digit <= '9') {
1348            group = 10 * group + digit - '0';
1349
1350            i++;
1351          }
1352
1353          if (digit != '}') {
1354            env.warning(L.l("bad regexp {0}", replacement));
1355            throw new QuercusException("bad regexp");
1356          }
1357
1358          if (text.length() > 0)
1359            program.add(new TextReplacement(text));
1360
1361      if (isEval)
1362        program.add(new GroupEscapeReplacement(group));
1363      else
1364        program.add(new GroupReplacement(group));
1365      
1366          text.setLength(0);
1367        }
1368        else
1369          text.append(ch);
1370      }
1371      else
1372        text.append(ch);
1373    }
1374
1375    if (text.length() > 0)
1376      program.add(new TextReplacement(text));
1377
1378    return program;
1379  }
1380
1381  private static final String JavaDoc [] POSIX_CLASSES = {
1382    "[:alnum:]", "[:alpha:]", "[:blank:]", "[:cntrl:]",
1383    "[:digit:]", "[:graph:]", "[:lower:]", "[:print:]",
1384    "[:punct:]", "[:space:]", "[:upper:]", "[:xdigit:]"
1385  };
1386
1387  private static final String JavaDoc [] REGEXP_CLASSES = {
1388    "\\p{Alnum}", "\\p{Alpha}", "\\p{Blank}", "\\p{Cntrl}",
1389    "\\p{Digit}", "\\p{Graph}", "\\p{Lower}", "\\p{Print}",
1390    "\\p{Punct}", "\\p{Space}", "\\p{Upper}", "\\p{XDigit}"
1391  };
1392
1393  /**
1394   * Cleans the regexp from valid values that the Java regexps can't handle.
1395   * Currently "+?".
1396   */

1397  // XXX: not handling '['
1398
private static String JavaDoc cleanRegexp(StringValue regexp, boolean isComments)
1399  {
1400    int len = regexp.length();
1401
1402    StringBuilder JavaDoc sb = new StringBuilder JavaDoc();
1403    char quote = 0;
1404
1405    for (int i = 0; i < len; i++) {
1406      char ch = regexp.charAt(i);
1407
1408      switch (ch) {
1409      case '\\':
1410        sb.append(ch);
1411
1412        if (i + 1 < len) {
1413          i++;
1414
1415          ch = regexp.charAt(i);
1416
1417          if (ch == '0' ||
1418          '1' <= ch && ch <= '3' && i + 1 < len && '0' <= regexp.charAt(i + 1) && ch <= '7') {
1419            // Java's regexp requires \0 for octal
1420

1421            // sb.append('\\');
1422
sb.append('0');
1423            sb.append(ch);
1424          }
1425          else if (ch == 'x' && i + 1 < len && regexp.charAt(i + 1) == '{') {
1426            int tail = regexp.indexOf('}', i + 1);
1427
1428            if (tail > 0) {
1429              StringValue hex = regexp.substring(i + 2, tail);
1430
1431              if (hex.length() == 1)
1432                sb.append("x0" + hex);
1433              else if (hex.length() == 2)
1434                sb.append("x" + hex);
1435              else if (hex.length() == 4)
1436                sb.append("u" + hex);
1437              else
1438                throw new QuercusRuntimeException(L.l("illegal hex escape"));
1439
1440              i = tail;
1441            }
1442            else {
1443              sb.append("\\x");
1444            }
1445          }
1446          else
1447            sb.append(ch);
1448        }
1449        break;
1450
1451      case '[':
1452        if (quote == '[') {
1453          if (i + 1 < len && regexp.charAt(i + 1) == ':') {
1454            String JavaDoc test = regexp.substring(i).toString();
1455            boolean hasMatch = false;
1456
1457            for (int j = 0; j < POSIX_CLASSES.length; j++) {
1458              if (test.startsWith(POSIX_CLASSES[j])) {
1459                hasMatch = true;
1460
1461                sb.append(REGEXP_CLASSES[j]);
1462
1463                i += POSIX_CLASSES[j].length() - 1;
1464              }
1465            }
1466
1467            if (! hasMatch)
1468              sb.append("\\[");
1469          }
1470          else
1471            sb.append("\\[");
1472        }
1473        else if (i + 1 < len && regexp.charAt(i + 1) == '['
1474         && ! (i + 2 < len && regexp.charAt(i + 2) == ':')) {
1475      // XXX: check regexp grammar
1476
// php/151n
1477
sb.append("[\\[");
1478      i += 1;
1479    }
1480        else if (i + 2 < len &&
1481         regexp.charAt(i + 1) == '^' &&
1482         regexp.charAt(i + 2) == ']') {
1483          sb.append("[^\\]");
1484      i += 2;
1485    }
1486        else
1487          sb.append('[');
1488
1489        if (quote == 0)
1490          quote = '[';
1491        break;
1492
1493      case '#':
1494        if (quote == '[' && isComments)
1495          sb.append("\\#");
1496        else
1497          sb.append(ch);
1498        break;
1499
1500      case ']':
1501        sb.append(ch);
1502
1503        if (quote == '[')
1504          quote = 0;
1505        break;
1506
1507      case '{':
1508        if (i + 1 < len &&
1509            ('0' <= (ch = regexp.charAt(i + 1)) && ch <= '9' || ch == ',')) {
1510          sb.append("{");
1511          for (i++;
1512               i < len &&
1513               ('0' <= (ch = regexp.charAt(i)) && ch <= '9' || ch == ',');
1514               i++) {
1515            sb.append(ch);
1516          }
1517
1518          if (i < len)
1519            sb.append(regexp.charAt(i));
1520        }
1521        else {
1522          sb.append("\\{");
1523        }
1524        break;
1525
1526      case '}':
1527        sb.append("\\}");
1528        break;
1529
1530      default:
1531        sb.append(ch);
1532      }
1533    }
1534
1535    return sb.toString();
1536  }
1537
1538  /**
1539   * Converts to non-greedy.
1540   */

1541  private static String JavaDoc toNonGreedy(String JavaDoc regexp)
1542  {
1543    int len = regexp.length();
1544
1545    StringBuilder JavaDoc sb = new StringBuilder JavaDoc();
1546    char quote = 0;
1547
1548    for (int i = 0; i < len; i++) {
1549      char ch = regexp.charAt(i);
1550
1551      switch (ch) {
1552      case '\\':
1553        sb.append(ch);
1554
1555        if (i + 1 < len) {
1556          sb.append(regexp.charAt(i + 1));
1557          i++;
1558        }
1559        break;
1560
1561      case '[':
1562        sb.append(ch);
1563
1564        if (quote == 0)
1565          quote = ch;
1566        break;
1567
1568      case ']':
1569        sb.append(ch);
1570
1571        if (quote == '[')
1572          quote = 0;
1573        break;
1574
1575      // non-capturing special constructs
1576
case '(':
1577        sb.append(ch);
1578
1579        if (i + 1 < len) {
1580          ch = regexp.charAt(i + 1);
1581
1582          if (ch == '?') {
1583            sb.append(ch);
1584            i++;
1585          }
1586        }
1587        break;
1588
1589      case '*':
1590      case '?':
1591      case '+':
1592        sb.append(ch);
1593
1594        if (i + 1 < len && (ch = regexp.charAt(i + 1)) != '?') {
1595          sb.append('?');
1596        }
1597        else {
1598          // invert non-greedy to greedy
1599
i++;
1600        }
1601        break;
1602
1603      default:
1604        sb.append(ch);
1605      }
1606    }
1607
1608    return sb.toString();
1609  }
1610
1611  static class Replacement {
1612    void eval(StringBuilderValue sb, StringValue subject, Matcher JavaDoc matcher)
1613    {
1614    }
1615  }
1616
1617  static class TextReplacement
1618    extends Replacement
1619  {
1620    private char []_text;
1621
1622    TextReplacement(StringBuilder JavaDoc text)
1623    {
1624      int length = text.length();
1625
1626      _text = new char[length];
1627
1628      text.getChars(0, length, _text, 0);
1629    }
1630
1631    void eval(StringBuilderValue sb, StringValue subject, Matcher JavaDoc matcher)
1632    {
1633      sb.append(_text, 0, _text.length);
1634    }
1635  }
1636
1637  static class GroupReplacement
1638    extends Replacement
1639  {
1640    private int _group;
1641
1642    GroupReplacement(int group)
1643    {
1644      _group = group;
1645    }
1646
1647    void eval(StringBuilderValue sb, StringValue subject, Matcher JavaDoc matcher)
1648    {
1649      if (_group <= matcher.groupCount())
1650        sb.append(subject.substring(matcher.start(_group),
1651                    matcher.end(_group)));
1652    }
1653  }
1654
1655  static class GroupEscapeReplacement
1656    extends Replacement
1657  {
1658    private int _group;
1659
1660    GroupEscapeReplacement(int group)
1661    {
1662      _group = group;
1663    }
1664
1665    void eval(StringBuilderValue sb, StringValue subject, Matcher JavaDoc matcher)
1666    {
1667      if (_group <= matcher.groupCount()) {
1668    StringValue group = subject.substring(matcher.start(_group),
1669                          matcher.end(_group));;
1670    int len = group.length();
1671
1672    for (int i = 0; i < len; i++) {
1673      char ch = group.charAt(i);
1674
1675      if (ch == '\'')
1676        sb.append("\\\'");
1677      else if (ch == '\"')
1678        sb.append("\\\"");
1679      else
1680        sb.append(ch);
1681    }
1682      }
1683    }
1684  }
1685
1686  static {
1687    PREG_QUOTE['\\'] = true;
1688    PREG_QUOTE['+'] = true;
1689    PREG_QUOTE['*'] = true;
1690    PREG_QUOTE['?'] = true;
1691    PREG_QUOTE['['] = true;
1692    PREG_QUOTE['^'] = true;
1693    PREG_QUOTE[']'] = true;
1694    PREG_QUOTE['$'] = true;
1695    PREG_QUOTE['('] = true;
1696    PREG_QUOTE[')'] = true;
1697    PREG_QUOTE['{'] = true;
1698    PREG_QUOTE['}'] = true;
1699    PREG_QUOTE['='] = true;
1700    PREG_QUOTE['!'] = true;
1701    PREG_QUOTE['<'] = true;
1702    PREG_QUOTE['>'] = true;
1703    PREG_QUOTE['|'] = true;
1704    PREG_QUOTE[':'] = true;
1705  }
1706}
1707
Popular Tags