KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > quercus > lib > i18n > MbstringModule


1 /*
2  * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3  *
4  * This file is part of Resin(R) Open Source
5  *
6  * Each copy or derived work must preserve the copyright notice and this
7  * notice unmodified.
8  *
9  * Resin Open Source is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * Resin Open Source is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17  * of NON-INFRINGEMENT. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with Resin Open Source; if not, write to the
22  *
23  * Free Software Foundation, Inc.
24  * 59 Temple Place, Suite 330
25  * Boston, MA 02111-1307 USA
26  *
27  * @author Nam Nguyen
28  */

29
30 package com.caucho.quercus.lib.i18n;
31
32 import com.caucho.quercus.QuercusModuleException;
33 import com.caucho.quercus.UnimplementedException;
34 import com.caucho.quercus.annotation.Optional;
35 import com.caucho.quercus.annotation.Reference;
36 import com.caucho.quercus.annotation.VariableArguments;
37 import com.caucho.quercus.env.*;
38 import com.caucho.quercus.lib.MailModule;
39 import com.caucho.quercus.lib.RegexpModule;
40 import com.caucho.quercus.lib.string.StringModule;
41 import com.caucho.quercus.module.AbstractQuercusModule;
42 import com.caucho.util.L10N;
43 import com.caucho.vfs.Encoding;
44
45 import java.io.UnsupportedEncodingException JavaDoc;
46 import java.util.Map JavaDoc;
47 import java.util.logging.Logger JavaDoc;
48
49 public class MbstringModule
50   extends AbstractQuercusModule
51 {
52   private static final Logger JavaDoc log =
53                        Logger.getLogger(MbstringModule.class.getName());
54   private static final L10N L = new L10N(MbstringModule.class);
55
56   public static final int MB_CASE_UPPER = 0;
57   public static final int MB_CASE_LOWER = 1;
58   public static final int MB_CASE_TITLE = 2;
59
60   /**
61    * Returns the extensions implemented by the module.
62    */

63   public String JavaDoc []getLoadedExtensions()
64   {
65     return new String JavaDoc[] { "mbstring" };
66   }
67
68   /**
69    * Upper-cases, lower-cases, or capitalizes first letter of words.
70    */

71   public static StringValue mb_convert_case(Env env,
72                               StringValue str,
73                               int mode,
74                               @Optional("") String JavaDoc encoding)
75   {
76     if (mode == MB_CASE_TITLE) {
77       encoding = getEncoding(env, encoding);
78
79       str = str.toUnicodeValue(env, encoding);
80       str = toUpperCaseTitle(str);
81
82       return str.toBinaryValue(env, encoding);
83     }
84     else if (mode == MB_CASE_LOWER)
85       return mb_strtolower(env, str, encoding);
86     else if (mode == MB_CASE_UPPER)
87       return mb_strtoupper(env, str, encoding);
88     else
89       return str;
90   }
91
92   /**
93    * Converts string of one encoding to another.
94    */

95   public static StringValue mb_convert_encoding(Env env,
96                               StringValue str,
97                               String JavaDoc destEncoding,
98                               @Optional() String JavaDoc fromEncodings)
99   {
100     // XXX: fallback encoding
101
int tail = fromEncodings.indexOf(',', 1);
102
103     if (tail < 0)
104       tail = fromEncodings.length();
105
106     String JavaDoc srcEncoding;
107
108     if (tail < 0)
109       srcEncoding = fromEncodings;
110     else
111       srcEncoding = getEncoding(env, fromEncodings.substring(0, tail).trim());
112
113     return decodeEncode(env, str, srcEncoding, destEncoding);
114   }
115
116   /**
117    * Performs Japanese specific charset conversion.
118    */

119   public static StringValue mb_convert_kana(Env env,
120                               StringValue str,
121                               @Optional("") String JavaDoc option,
122                               @Optional("") String JavaDoc encoding)
123   {
124     throw new UnimplementedException("mb_convert_kana");
125   }
126
127   /**
128    * Decodes and then encodes variables.
129    *
130    * XXX: variable arguments to convert.
131    */

132   @VariableArguments
133   public static StringValue mb_convert_variables(Env env,
134                               String JavaDoc toEncoding,
135                               String JavaDoc fromEncodings,
136                               @Reference Value vars)
137   {
138     // XXX: fallback encoding
139
int tail = fromEncodings.indexOf(',', 1);
140
141     if (tail < 0)
142       tail = fromEncodings.length();
143
144     String JavaDoc srcEncoding;
145
146     if (tail < 0)
147       srcEncoding = fromEncodings;
148     else
149       srcEncoding = getEncoding(env, fromEncodings.substring(0, tail).trim());
150
151     Value decoded = decodeAll(env, vars, srcEncoding);
152
153     vars.set(encodeAll(env, decoded, toEncoding));
154
155     return new StringValueImpl(srcEncoding);
156   }
157
158   /**
159    * Decodes mime field.
160    */

161   public static Value mb_decode_mimeheader(Env env,
162                               StringValue str)
163   {
164     String JavaDoc encoding = getEncoding(env);
165
166     try {
167       return IconvUtility.decodeMime(env, str, encoding);
168
169     } catch (UnsupportedEncodingException JavaDoc e) {
170       throw new QuercusModuleException(e.getMessage());
171     }
172   }
173
174   /**
175    * Decodes HTML numeric entity.
176    */

177   public static StringValue mb_decode_numericentity(Env env,
178                               StringValue str,
179                               ArrayValue convmap,
180                               @Optional String JavaDoc encoding)
181   {
182     throw new UnimplementedException("mb_decode_numericentity");
183   }
184
185   /**
186    * Detects encoding of string.
187    */

188   public static StringValue mb_detect_encoding(Env env,
189                               StringValue str,
190                               @Optional Value encoding_list,
191                               @Optional boolean strict)
192   {
193     throw new UnimplementedException("mb_detect_encoding");
194   }
195
196   /**
197    * Specifies order of charsets to test when detecting encoding.
198    */

199   public static Value mb_detect_order(Env env,
200                               Value encoding_list)
201   {
202     throw new UnimplementedException("mb_detect_order");
203   }
204
205   /**
206    * Encodes a string into mime.
207    */

208   public static StringValue mb_encode_mimeheader(Env env,
209                               StringValue str,
210                               @Optional("") String JavaDoc charset,
211                               @Optional("B") String JavaDoc transfer_encoding,
212                               @Optional("") String JavaDoc linefeed)
213   {
214     charset = getEncoding(env, charset);
215
216     try {
217       String JavaDoc mime = IconvUtility.encodeMimeWord(str.toString(),
218                                                 charset,
219                                                 transfer_encoding,
220                                                 linefeed,
221                                                 76);
222       return new StringValueImpl(mime);
223
224     } catch (UnsupportedEncodingException JavaDoc e) {
225       throw new QuercusModuleException(e.getMessage());
226     }
227
228   }
229
230   /**
231    * Encodes HTML numeric string entity.
232    */

233   public static StringValue mb_encode_numericentity(Env env,
234                               StringValue str,
235                               ArrayValue convmap,
236                               @Optional String JavaDoc encoding)
237   {
238     throw new UnimplementedException();
239   }
240
241   /**
242    * Returns true if pattern matches a part of string.
243    */

244   public static BooleanValue mb_ereg_match(Env env,
245                               StringValue pattern,
246                               StringValue string,
247                               @Optional String JavaDoc option)
248   {
249     String JavaDoc encoding = getEncoding(env);
250
251     pattern = pattern.toUnicodeValue(env, encoding);
252     string = string.toUnicodeValue(env, encoding);
253
254     // XXX: option
255

256     Value val = RegexpModule.ereg(env, pattern, string, null);
257
258     if (val == BooleanValue.FALSE)
259       return BooleanValue.FALSE;
260     else
261       return BooleanValue.TRUE;
262   }
263
264   /**
265    * Multibyte version of ereg_replace.
266    */

267   public static Value mb_ereg_replace(Env env,
268                               StringValue pattern,
269                               StringValue replacement,
270                               StringValue subject,
271                               @Optional String JavaDoc option)
272   {
273     String JavaDoc encoding = getEncoding(env);
274
275     pattern = pattern.toUnicodeValue(env, encoding);
276     replacement = replacement.toUnicodeValue(env, encoding);
277     subject = subject.toUnicodeValue(env, encoding);
278
279     //XXX: option
280

281     Value val = RegexpModule.ereg_replace(env, pattern, replacement, subject);
282
283     return encodeAll(env, val, encoding);
284   }
285
286   /**
287    * Multibyte version of ereg.
288    */

289   public static Value mb_ereg(Env env,
290                               StringValue pattern,
291                               StringValue string,
292                               @Optional ArrayValue regs)
293   {
294     return eregImpl(env, pattern, string, regs, true);
295   }
296
297   /**
298    * Multibyte version of eregi_replace.
299    */

300   public static Value mb_eregi_replace(Env env,
301                               StringValue pattern,
302                               StringValue replacement,
303                               StringValue subject,
304                               @Optional String JavaDoc option)
305   {
306     String JavaDoc encoding = getEncoding(env);
307
308     pattern = pattern.toUnicodeValue(env, encoding);
309     replacement = replacement.toUnicodeValue(env, encoding);
310     subject = subject.toUnicodeValue(env, encoding);
311
312     //XXX: option
313

314     Value val = RegexpModule.eregi_replace(env, pattern, replacement, subject);
315
316     return encodeAll(env, val, encoding);
317   }
318
319   /**
320    * Multibyte version of eregi.
321    */

322   public static Value mb_eregi(Env env,
323                               StringValue pattern,
324                               StringValue string,
325                               @Optional ArrayValue regs)
326   {
327     return eregImpl(env, pattern, string, regs, false);
328   }
329
330   private static Value eregImpl(Env env,
331                               StringValue pattern,
332                               StringValue string,
333                               ArrayValue regs,
334                               boolean isCaseSensitive)
335   {
336     String JavaDoc encoding = getEncoding(env);
337
338     pattern = pattern.toUnicodeValue(env, encoding);
339     string = string.toUnicodeValue(env, encoding);
340
341     if (regs == null) {
342       if (isCaseSensitive)
343         return RegexpModule.ereg(env, pattern, string, null);
344       else
345         return RegexpModule.eregi(env, pattern, string, null);
346     }
347
348     Value val;
349     Var regVar = new Var();
350
351     if (isCaseSensitive)
352       val = RegexpModule.ereg(env, pattern, string, regVar);
353     else
354       val = RegexpModule.eregi(env, pattern, string, regVar);
355
356     if (regVar.isset()) {
357       regs.clear();
358       ArrayValue results = regVar.toArrayValue(env);
359
360       for (Map.Entry JavaDoc<Value,Value> entry : results.entrySet()) {
361
362         Value bytes = encodeAll(env, entry.getValue(), encoding);
363         regs.put(entry.getKey(), bytes);
364       }
365
366       val = LongValue.create(
367               regs.get(LongValue.ZERO).toStringValue().length());
368     }
369
370     return val;
371   }
372
373   /**
374    * Gets current position of ereg state object.
375    */

376   public static LongValue mb_ereg_search_getpos(Env env)
377   {
378     EregSearch ereg = getEreg(env);
379
380     if (ereg == null)
381       return LongValue.ZERO;
382
383     return LongValue.create(ereg._position);
384   }
385
386   /**
387    * Gets the last match of ereg state object from previous matching.
388    */

389   public static Value mb_ereg_search_getregs(Env env)
390   {
391     EregSearch ereg = getEreg(env);
392
393     if (ereg == null || ereg._lastMatch == null)
394       return BooleanValue.FALSE;
395
396     return ereg._lastMatch;
397   }
398
399   /**
400    * Initializes a ereg state object.
401    */

402   public static BooleanValue mb_ereg_search_init(Env env,
403                               StringValue string,
404                               @Optional Value pattern,
405                               @Optional Value option)
406   {
407     EregSearch ereg = new EregSearch(env, string, pattern, option);
408     env.setSpecialValue("mb.search", ereg);
409
410     return BooleanValue.TRUE;
411   }
412
413   /**
414    * Returns index and position after matching.
415    */

416   public static Value mb_ereg_search_pos(Env env,
417                               @Optional Value pattern,
418                               @Optional Value option)
419   {
420     EregSearch ereg = getEreg(env, pattern, option);
421
422     if (ereg == null) {
423       env.warning(L.l("Regular expression not set"));
424       return BooleanValue.FALSE;
425     }
426
427     return ereg.search(env, true);
428   }
429
430   /**
431    * Returns match array after matching.
432    */

433   public static Value mb_ereg_search_regs(Env env,
434                               @Optional Value pattern,
435                               @Optional Value option)
436   {
437     EregSearch ereg = getEreg(env, pattern, option);
438
439     if (ereg == null) {
440       env.warning(L.l("Regular expression not set"));
441       return BooleanValue.FALSE;
442     }
443
444     if (ereg.search(env, false) == BooleanValue.FALSE)
445       return BooleanValue.FALSE;
446
447     return ereg._lastMatch;
448   }
449
450   /**
451    * Sets the position of the ereg state object.
452    */

453   public static BooleanValue mb_ereg_search_setpos(Env env,
454                               int position)
455   {
456     EregSearch ereg = getEreg(env);
457
458     if (ereg == null)
459       return BooleanValue.FALSE;
460
461     ereg._position = position;
462     return BooleanValue.TRUE;
463   }
464
465   /**
466    * Returns whether or not pattern matches string.
467    */

468   public static BooleanValue mb_ereg_search(Env env,
469                               @Optional Value pattern,
470                               @Optional Value option)
471   {
472     EregSearch ereg = getEreg(env, pattern, option);
473
474     if (ereg == null) {
475       env.warning(L.l("Regular expression not set"));
476       return BooleanValue.FALSE;
477     }
478
479     Value result = ereg.search(env, false);
480
481     return BooleanValue.create(result.toBoolean());
482   }
483
484   /**
485    * Returns the ereg state object from the environment.
486    */

487   private static EregSearch getEreg(Env env)
488   {
489     Object JavaDoc obj = env.getSpecialValue("mb.search");
490
491     if (obj == null)
492       return null;
493
494     return (EregSearch)obj;
495   }
496
497   /**
498    * Returns the ereg state object from the environment iff the ereg object
499    * is a valid one.
500    */

501   private static EregSearch getEreg(Env env,
502                               Value pattern,
503                               Value option)
504   {
505     Object JavaDoc obj = env.getSpecialValue("mb.search");
506
507     if (obj != null) {
508       EregSearch ereg = (EregSearch)obj;
509
510       ereg.init(env, pattern, option);
511
512       if (ereg._isValidRegexp)
513         return ereg;
514       else
515         return null;
516     }
517     else
518       return null;
519   }
520
521   /**
522    * Returns current mb settings.
523    */

524   public static Value mb_get_info(Env env,
525                               @Optional("") String JavaDoc type)
526   {
527     if (type.length() == 0) {
528       ArrayValue array = new ArrayValueImpl();
529
530       array.put(new StringValueImpl("internal_encoding"),
531                 new StringValueImpl(getEncoding(env)));
532
533       return array;
534     }
535
536     else if (type.equals("internal_encoding")) {
537       return new StringValueImpl(getEncoding(env));
538
539     } else {
540       throw new UnimplementedException("mb_get_info");
541     }
542   }
543
544   /**
545    * Returns and/or sets the http input encoding
546    */

547   public static Value mb_http_input(Env env,
548                               @Optional String JavaDoc type)
549   {
550     throw new UnimplementedException("mb_http_input");
551   }
552  
553   /**
554    * Returns and/or sets the http output encoding
555    */

556   public static Value mb_http_output(Env env,
557                               @Optional String JavaDoc encoding)
558   {
559     throw new UnimplementedException("mb_http_output");
560 /*
561     if (encoding.length() == 0)
562       return new StringValueImpl(getEncoding(env));
563     else
564       return BooleanValue.FALSE;
565 */

566   }
567
568   /**
569    * Returns and/or sets the internal encoding.
570    */

571   public static Value mb_internal_encoding(Env env,
572                               @Optional String JavaDoc encoding)
573   {
574     if (encoding.length() == 0)
575       return new StringValueImpl(getEncoding(env));
576     else {
577       setEncoding(env, encoding);
578       return BooleanValue.TRUE;
579     }
580   }
581
582   /**
583    * Returns and/or sets the encoding for mail.
584    */

585   public static Value mb_language(Env env,
586                               @Optional String JavaDoc language)
587   {
588     String JavaDoc encoding = getEncoding(env);
589
590     if (language.length() == 0) {
591       if (encoding.equalsIgnoreCase("ISO-2022-JP"))
592         return new StringValueImpl("Japanese");
593       else if (encoding.equalsIgnoreCase("ISO-8859-1"))
594         return new StringValueImpl("English");
595       else if (encoding.equalsIgnoreCase("UTF-8"))
596         return new StringValueImpl("uni");
597     }
598     else if (language.equals("Japanese") || language.equals("ja"))
599       setEncoding(env, "ISO-2022-JP");
600     else if (language.equals("English") || language.equals("en"))
601       setEncoding(env, "ISO-8859-1");
602     else if (language.equals("uni"))
603       setEncoding(env, "UTF-8");
604     else
605       return BooleanValue.FALSE;
606
607     return BooleanValue.TRUE;
608   }
609
610   /**
611    * XXX: get all supported encodings
612    */

613   public static ArrayValue mb_list_encodings(Env env)
614   {
615     ArrayValue array = new ArrayValueImpl();
616
617     array.put(new StringValueImpl("ASCII"));
618     array.put(new StringValueImpl("UTF-8"));
619     array.put(new StringValueImpl("UTF-16"));
620     array.put(new StringValueImpl("ISO-8859-1"));
621     array.put(new StringValueImpl("ISO-8859-2"));
622     array.put(new StringValueImpl("ISO-8859-15"));
623     array.put(new StringValueImpl("ISO-2022-JP"));
624     array.put(new StringValueImpl("EUC-KR"));
625     array.put(new StringValueImpl("EUC-CN"));
626     array.put(new StringValueImpl("EUC-TW"));
627     array.put(new StringValueImpl("EUC-JP"));
628     array.put(new StringValueImpl("JIS"));
629
630     return array;
631   }
632
633   /**
634    * ob_start() handler
635    */

636   public static StringValue mb_output_handler(Env env,
637                               StringValue contents,
638                               int value)
639   {
640     throw new UnimplementedException("mb_output_handler");
641   }
642
643   /**
644    * Multibyte version of parse_str.
645    */

646   public static BooleanValue mb_parse_str(Env env,
647                               StringValue strValue,
648                               @Optional @Reference Value result)
649   {
650     String JavaDoc encoding = getEncoding(env);
651     StringModule.parse_str(env,strValue.toString(), result);
652
653     if (result == null) {
654       // XXX: encode newly added global variables
655
return BooleanValue.TRUE;
656     }
657     else {
658       Value array = encodeAll(env, result, encoding);
659       result.set(array);
660
661       return BooleanValue.TRUE;
662     }
663   }
664
665   /**
666    * Returns the preferred mime name of this encoding.
667    */

668   public static StringValue mb_preferred_mime_name(Env env,
669                               StringValue encoding)
670   {
671     String JavaDoc mimeName = Encoding.getMimeName(encoding.toString());
672
673     return new StringValueImpl(mimeName);
674   }
675
676   /**
677    * Returns and/or sets encoding for mb regular expressions.
678    */

679   public static Value mb_regex_encoding(Env env,
680                               @Optional("") String JavaDoc encoding)
681   {
682     return mb_internal_encoding(env, encoding);
683   }
684
685   /**
686    * XXX: what does this actually do?
687    */

688   public static StringValue mb_regex_set_options(Env env,
689                               @Optional String JavaDoc options)
690   {
691     throw new UnimplementedException("mb_regex_set_options");
692   }
693
694   /**
695    * Multibyte version of mail.
696    */

697   public static BooleanValue mb_send_mail(Env env,
698                               StringValue to,
699                               StringValue subject,
700                               StringValue message,
701                               @Optional StringValue additionalHeaders,
702                               @Optional StringValue additionalParameters)
703   {
704     String JavaDoc encoding = getEncoding(env);
705
706     subject = subject.toBinaryValue(env, encoding);
707     message = message.toBinaryValue(env, encoding);
708     additionalHeaders = additionalHeaders.toBinaryValue(env, encoding);
709
710     boolean result = MailModule.mail(env,
711                                      to.toString(),
712                                      subject.toString(),
713                                      message,
714                                      additionalHeaders.toString(),
715                                      additionalParameters.toString());
716
717     return BooleanValue.create(result);
718   }
719
720   /**
721    * Multibyte version of split.
722    */

723   public static Value mb_split(Env env,
724                               StringValue pattern,
725                               StringValue string,
726                               @Optional("-1") long limit)
727   {
728     String JavaDoc encoding = getEncoding(env);
729
730     pattern = pattern.toUnicodeValue(env, encoding);
731     string = string.toUnicodeValue(env, encoding);
732
733     Value val = RegexpModule.split(env, pattern, string, limit);
734
735     return encodeAll(env, val, encoding);
736   }
737
738   /**
739    * Similar to substr except start index is at the beginning of char
740    * boundaries.
741    */

742   public static StringValue mb_strcut(Env env,
743                               StringValue str,
744                               int start,
745                               @Optional("7fffffff") int length,
746                               @Optional String JavaDoc encoding)
747   {
748     encoding = getEncoding(env, encoding);
749
750     str = str.toUnicodeValue(env, encoding);
751
752     int len = str.length();
753     int end = start + length;
754
755     if (end > len)
756       end = len;
757
758     if (start < 0 || start > end)
759       return StringValue.EMPTY;
760
761     // XXX: not quite exactly the same behavior as PHP
762
if (Character.isHighSurrogate(str.charAt(start)))
763       start--;
764
765     str = str.substring(start, end);
766
767     return str.toBinaryValue(env, encoding);
768   }
769
770   /**
771    * Truncates the string.
772    */

773   public static StringValue mb_strimwidth(Env env,
774                               StringValue str,
775                               int start,
776                               int width,
777                               @Optional() StringValue trimmarker,
778                               @Optional("") String JavaDoc encoding)
779   {
780     encoding = getEncoding(env, encoding);
781
782     str = str.toUnicodeValue(env, encoding);
783
784     int len = str.length();
785     int end = start + width;
786
787     if (end > len)
788       end = len;
789
790     if (start < 0 || start > end)
791       return StringValue.EMPTY;
792
793     str = str.substring(start, end);
794
795     if (end < len && trimmarker.length() > 0) {
796       StringBuilderValue sb = new StringBuilderValue();
797
798       sb.append(str);
799       sb.append(trimmarker.toUnicodeValue(env, encoding));
800
801       str = sb;
802     }
803
804     return str.toBinaryValue(env, encoding);
805   }
806
807   /**
808    * Multibyte version of strlen.
809    */

810   public static LongValue mb_strlen(Env env,
811                               StringValue str,
812                               @Optional("") String JavaDoc encoding)
813   {
814     encoding = getEncoding(env, encoding);
815
816     str = str.toUnicodeValue(env, encoding);
817
818     return LongValue.create(str.length());
819   }
820
821   /**
822    * Multibyte version of strpos.
823    */

824   public static Value mb_strpos(Env env,
825                               StringValue haystack,
826                               StringValue needle,
827                               @Optional("0") int offset,
828                               @Optional("") String JavaDoc encoding)
829   {
830     encoding = getEncoding(env, encoding);
831
832     haystack = haystack.toUnicodeValue(env, encoding);
833     needle = needle.toUnicodeValue(env, encoding);
834
835     Value val = StringModule.strpos(haystack, needle, offset);
836
837     return encodeAll(env, val, encoding);
838   }
839
840   /**
841    * Multibyte version of strrpos.
842    */

843   public static Value mb_strrpos(Env env,
844                               StringValue haystack,
845                               StringValue needle,
846                               @Optional Value offsetV,
847                               @Optional("") String JavaDoc encoding)
848   {
849     encoding = getEncoding(env, encoding);
850
851     haystack = haystack.toUnicodeValue(env, encoding);
852     needle = needle.toUnicodeValue(env, encoding);
853
854     Value val = StringModule.strrpos(haystack, needle, offsetV);
855
856     return encodeAll(env, val, encoding);
857   }
858
859   /**
860    * Converts all characters to lower-case.
861    */

862   public static StringValue mb_strtolower(Env env,
863                               StringValue str,
864                               @Optional("") String JavaDoc encoding)
865   {
866     encoding = getEncoding(env, encoding);
867
868     str = str.toUnicodeValue(env, encoding);
869     str = StringModule.strtolower(str);
870
871     return str.toBinaryValue(env, encoding);
872   }
873
874   /**
875    * Converts all characters to upper-case.
876    */

877   public static StringValue mb_strtoupper(Env env,
878                               StringValue str,
879                               @Optional("") String JavaDoc encoding)
880   {
881     encoding = getEncoding(env, encoding);
882
883     str = str.toUnicodeValue(env, encoding);
884     str = StringModule.strtoupper(str);
885
886     return str.toBinaryValue(env, encoding);
887   }
888
889   /**
890    * Returns the width of this multibyte string.
891    */

892   public static LongValue mb_strwidth(Env env,
893                               StringValue str,
894                               @Optional("") String JavaDoc encoding)
895   {
896     encoding = getEncoding(env, encoding);
897
898     str = str.toUnicodeValue(env, encoding);
899
900     return LongValue.create(str.length());
901
902 /*
903     int width = 0;
904     int len = string.length();
905
906     // Per PHP manual
907     for (int i = 0; i < len; i++) {
908       char ch = string.charAt(i);
909
910       if (ch <= 0x19)
911         continue;
912       else if (ch <= 0x1fff)
913         width += 1;
914       else if (ch <= 0xff60)
915         width += 2;
916       else if (ch <= 0xff9f)
917         width += 1;
918       else
919         width += 2;
920     }
921
922     return LongValue.create(width);
923 */

924   }
925
926   /**
927    * Sets the character to use when decoding/encoding fails on a character.
928    */

929   public static Value mb_substitute_character(Value substrchar)
930   {
931     throw new UnimplementedException("mb_substitute_character");
932   }
933
934   public static LongValue mb_substr_count(Env env,
935                               StringValue haystack,
936                               StringValue needle,
937                               @Optional("") String JavaDoc encoding)
938   {
939     encoding = getEncoding(env, encoding);
940
941     haystack = haystack.toUnicodeValue(env, encoding);
942     needle = needle.toUnicodeValue(env, encoding);
943
944     int count = 0;
945     int sublen = needle.length();
946
947     int i = haystack.indexOf(needle);
948
949     while (i >= 0) {
950       i = haystack.indexOf(needle, i + sublen);
951       count++;
952     }
953
954     return LongValue.create(count);
955   }
956
957   /**
958    * Multibyte version of substr.
959    */

960   public static StringValue mb_substr(Env env,
961                               StringValue str,
962                               int start,
963                               @Optional Value lengthV,
964                               @Optional String JavaDoc encoding)
965   {
966     encoding = getEncoding(env, encoding);
967
968     str = str.toUnicodeValue(env, encoding);
969
970     Value val = StringModule.substr(env, str, start, lengthV);
971
972     if (val == BooleanValue.FALSE)
973       return StringValue.EMPTY;
974
975     return val.toStringValue().toBinaryValue(env, encoding);
976   }
977
978
979   // Private helper functions
980

981   /**
982    * Returns string with words capitalized and intermediate letters are
983    * made lower-case.
984    */

985   private static StringValue toUpperCaseTitle(StringValue string)
986   {
987     StringBuilderValue sb = new StringBuilderValue();
988
989     int strLen = string.length();
990     boolean isWordStart = true;
991
992     for (int i = 0; i < strLen; i++) {
993       char ch = string.charAt(i);
994
995       switch (ch) {
996       case ' ': case '\t': case '\r': case '\n':
997         isWordStart = true;
998         sb.append(ch);
999         break;
1000      default:
1001        if (isWordStart) {
1002          sb.append(Character.toUpperCase(ch));
1003          isWordStart = false;
1004        }
1005        else
1006          sb.append(Character.toLowerCase(ch));
1007        break;
1008      }
1009    }
1010
1011    return sb;
1012  }
1013
1014  private static String JavaDoc getEncoding(Env env)
1015  {
1016    return env.getRuntimeEncoding().toString();
1017  }
1018
1019  private static String JavaDoc getEncoding(Env env, String JavaDoc encoding)
1020  {
1021    if (encoding.length() == 0)
1022      return getEncoding(env);
1023    else
1024      return encoding;
1025  }
1026
1027  private static void setEncoding(Env env, String JavaDoc encoding)
1028  {
1029    env.setRuntimeEncoding(encoding);
1030  }
1031
1032  /**
1033   * Recursively decodes objects and arrays.
1034   */

1035  private static Value decodeAll(Env env,
1036                              Value val,
1037                              String JavaDoc encoding)
1038  {
1039    val = val.toValue();
1040
1041    if (val instanceof StringValue)
1042      return ((StringValue)val).toUnicodeValue(env, encoding);
1043
1044    else if (val instanceof ArrayValue) {
1045      ArrayValue array = new ArrayValueImpl();
1046
1047      for (Map.Entry JavaDoc<Value,Value> entry : ((ArrayValue)val).entrySet()) {
1048        array.put(entry.getKey(),
1049                  decodeAll(env, entry.getValue(), encoding));
1050      }
1051
1052      return array;
1053    } else if (val instanceof ObjectValue) {
1054
1055      ObjectValue obj = (ObjectValue)val;
1056
1057      for (Map.Entry JavaDoc<String JavaDoc,Value> entry : obj.entrySet()) {
1058        obj.putField(env,
1059                     entry.getKey(),
1060                     decodeAll(env, entry.getValue(), encoding));
1061      }
1062
1063      return obj;
1064    } else
1065      return val;
1066  }
1067
1068  /**
1069   * Recursively encodes objects and arrays.
1070   */

1071  private static Value encodeAll(Env env,
1072                              Value val,
1073                              String JavaDoc encoding)
1074  {
1075    val = val.toValue();
1076
1077    if (val instanceof StringValue)
1078      return ((StringValue)val).toBinaryValue(env, encoding);
1079
1080    else if (val instanceof ArrayValue) {
1081      ArrayValue array = new ArrayValueImpl();
1082
1083      for (Map.Entry JavaDoc<Value,Value> entry : ((ArrayValue)val).entrySet()) {
1084        array.put(entry.getKey(),
1085                  encodeAll(env, entry.getValue(), encoding));
1086      }
1087
1088      return array;
1089    } else if (val instanceof ObjectValue) {
1090
1091      ObjectValue obj = (ObjectValue)val;
1092
1093      for (Map.Entry JavaDoc<String JavaDoc,Value> entry : obj.entrySet()) {
1094        obj.putField(env,
1095                     entry.getKey(),
1096                     encodeAll(env, entry.getValue(), encoding));
1097      }
1098
1099      return obj;
1100    } else
1101      return val;
1102  }
1103
1104  private static StringValue decodeEncode(Env env,
1105                              StringValue val,
1106                              String JavaDoc srcEncoding,
1107                              String JavaDoc destEncoding)
1108  {
1109    try {
1110      return IconvUtility.decodeEncode(val, srcEncoding, destEncoding);
1111
1112    } catch (UnsupportedEncodingException JavaDoc e) {
1113      throw new QuercusModuleException(e.getMessage());
1114    }
1115  }
1116
1117  /**
1118   * ereg state object (saves previous match and other info)
1119   *
1120   * XXX: option
1121   */

1122  static class EregSearch {
1123    private StringValue _string;
1124    private StringValue _pattern;
1125    private Value _option;
1126    private int _length;
1127
1128    ArrayValue _lastMatch;
1129    int _position;
1130    boolean _isValidRegexp;
1131
1132    EregSearch(Env env,
1133                 StringValue string,
1134                 Value pattern,
1135                 Value option)
1136    {
1137      _string = string.toUnicodeValue(env, getEncoding(env));
1138      _position = 0;
1139      _length = _string.length();
1140
1141      init(env, pattern, option);
1142    }
1143
1144    void init(Env env, Value pattern, Value option)
1145    {
1146      _option = option;
1147      initPattern(env, pattern);
1148    }
1149
1150    void initPattern(Env env, Value pattern)
1151    {
1152      if (pattern instanceof StringValue) {
1153        _pattern = pattern.toStringValue();
1154        _isValidRegexp = true;
1155      }
1156      else
1157        _isValidRegexp = (_pattern != null);
1158    }
1159
1160    StringValue getString(Env env)
1161    {
1162      if (_position == 0)
1163        return _string;
1164      else if (_position < _length)
1165        return _string.substring(_position);
1166      else
1167        return StringValue.EMPTY;
1168    }
1169
1170    Value search(Env env, boolean isArrayReturn)
1171    {
1172      StringValue string = getString(env);
1173
1174      ArrayValue regs = new ArrayValueImpl();
1175      Value val = eregImpl(env, _pattern, string, regs, true);
1176
1177      if (val == BooleanValue.FALSE)
1178        return BooleanValue.FALSE;
1179
1180      StringValue match = regs.get(LongValue.ZERO).toStringValue();
1181      int matchIndex = _string.indexOf(match, _position);
1182      int matchLength = match.length();
1183
1184      _position = matchIndex + matchLength;
1185      _lastMatch = regs;
1186
1187      if (isArrayReturn) {
1188        ArrayValue array = new ArrayValueImpl();
1189
1190        array.put(LongValue.create(matchIndex));
1191        array.put(LongValue.create(matchLength));
1192
1193        return array;
1194
1195      } else
1196        return BooleanValue.TRUE;
1197    }
1198  }
1199}
1200
Popular Tags