KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > freemarker > template > utility > StringUtil


1 /*
2  * Copyright (c) 2003 The Visigoth Software Society. All rights
3  * reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in
14  * the documentation and/or other materials provided with the
15  * distribution.
16  *
17  * 3. The end-user documentation included with the redistribution, if
18  * any, must include the following acknowledgement:
19  * "This product includes software developed by the
20  * Visigoth Software Society (http://www.visigoths.org/)."
21  * Alternately, this acknowledgement may appear in the software itself,
22  * if and wherever such third-party acknowledgements normally appear.
23  *
24  * 4. Neither the name "FreeMarker", "Visigoth", nor any of the names of the
25  * project contributors may be used to endorse or promote products derived
26  * from this software without prior written permission. For written
27  * permission, please contact visigoths@visigoths.org.
28  *
29  * 5. Products derived from this software may not be called "FreeMarker" or "Visigoth"
30  * nor may "FreeMarker" or "Visigoth" appear in their names
31  * without prior written permission of the Visigoth Software Society.
32  *
33  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
34  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36  * DISCLAIMED. IN NO EVENT SHALL THE VISIGOTH SOFTWARE SOCIETY OR
37  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
38  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
39  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
40  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
41  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
43  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44  * SUCH DAMAGE.
45  * ====================================================================
46  *
47  * This software consists of voluntary contributions made by many
48  * individuals on behalf of the Visigoth Software Society. For more
49  * information on the Visigoth Software Society, please see
50  * http://www.visigoths.org/
51  */

52
53 package freemarker.template.utility;
54
55 import java.io.UnsupportedEncodingException JavaDoc;
56 import java.util.*;
57 import freemarker.template.Template;
58 import freemarker.core.ParseException;
59 import freemarker.core.Environment;
60
61 /**
62  * Some text related utilities.
63  *
64  * @version $Id: StringUtil.java,v 1.48 2005/06/01 22:39:08 ddekany Exp $
65  */

66 public class StringUtil {
67     private static final char[] ESCAPES = createEscapes();
68
69     /*
70      * For better performance most methods are folded down. Don't you scream... :)
71      */

72
73     /**
74      * HTML encoding (does not convert line breaks).
75      * Replaces all '>' '<' '&' and '"' with entity reference
76      */

77     public static String JavaDoc HTMLEnc(String JavaDoc s) {
78         int ln = s.length();
79         for (int i = 0; i < ln; i++) {
80             char c = s.charAt(i);
81             if (c == '<' || c == '>' || c == '&' || c == '"') {
82                 StringBuffer JavaDoc b =
83                         new StringBuffer JavaDoc(s.substring(0, i));
84                 switch (c) {
85                     case '<': b.append("&lt;"); break;
86                     case '>': b.append("&gt;"); break;
87                     case '&': b.append("&amp;"); break;
88                     case '"': b.append("&quot;"); break;
89                 }
90                 i++;
91                 int next = i;
92                 while (i < ln) {
93                     c = s.charAt(i);
94                     if (c == '<' || c == '>' || c == '&' || c == '"') {
95                         b.append(s.substring(next, i));
96                         switch (c) {
97                             case '<': b.append("&lt;"); break;
98                             case '>': b.append("&gt;"); break;
99                             case '&': b.append("&amp;"); break;
100                             case '"': b.append("&quot;"); break;
101                         }
102                         next = i + 1;
103                     }
104                     i++;
105                 }
106                 if (next < ln) b.append(s.substring(next));
107                 s = b.toString();
108                 break;
109             } // if c ==
110
} // for
111
return s;
112     }
113
114     /**
115      * XML Encoding.
116      * Replaces all '&gt;' '&lt;' '&amp;', "'" and '"' with entity reference
117      */

118     public static String JavaDoc XMLEnc(String JavaDoc s) {
119         int ln = s.length();
120         for (int i = 0; i < ln; i++) {
121             char c = s.charAt(i);
122             if (c == '<' || c == '>' || c == '&' || c == '"' || c == '\'') {
123                 StringBuffer JavaDoc b =
124                         new StringBuffer JavaDoc(s.substring(0, i));
125                 switch (c) {
126                     case '<': b.append("&lt;"); break;
127                     case '>': b.append("&gt;"); break;
128                     case '&': b.append("&amp;"); break;
129                     case '"': b.append("&quot;"); break;
130                     case '\'': b.append("&apos;"); break;
131                 }
132                 i++;
133                 int next = i;
134                 while (i < ln) {
135                     c = s.charAt(i);
136                     if (c == '<' || c == '>' || c == '&' || c == '"' || c == '\'') {
137                         b.append(s.substring(next, i));
138                         switch (c) {
139                             case '<': b.append("&lt;"); break;
140                             case '>': b.append("&gt;"); break;
141                             case '&': b.append("&amp;"); break;
142                             case '"': b.append("&quot;"); break;
143                             case '\'': b.append("&apos;"); break;
144                         }
145                         next = i + 1;
146                     }
147                     i++;
148                 }
149                 if (next < ln) b.append(s.substring(next));
150                 s = b.toString();
151                 break;
152             } // if c ==
153
} // for
154
return s;
155     }
156
157     /**
158      * XML encoding without replacing apostrophes.
159      * @see #XMLEnc(String)
160      */

161     public static String JavaDoc XMLEncNA(String JavaDoc s) {
162         int ln = s.length();
163         for (int i = 0; i < ln; i++) {
164             char c = s.charAt(i);
165             if (c == '<' || c == '>' || c == '&' || c == '"') {
166                 StringBuffer JavaDoc b =
167                         new StringBuffer JavaDoc(s.substring(0, i));
168                 switch (c) {
169                     case '<': b.append("&lt;"); break;
170                     case '>': b.append("&gt;"); break;
171                     case '&': b.append("&amp;"); break;
172                     case '"': b.append("&quot;"); break;
173                 }
174                 i++;
175                 int next = i;
176                 while (i < ln) {
177                     c = s.charAt(i);
178                     if (c == '<' || c == '>' || c == '&' || c == '"') {
179                         b.append(s.substring(next, i));
180                         switch (c) {
181                             case '<': b.append("&lt;"); break;
182                             case '>': b.append("&gt;"); break;
183                             case '&': b.append("&amp;"); break;
184                             case '"': b.append("&quot;"); break;
185                         }
186                         next = i + 1;
187                     }
188                     i++;
189                 }
190                 if (next < ln) b.append(s.substring(next));
191                 s = b.toString();
192                 break;
193             } // if c ==
194
} // for
195
return s;
196     }
197
198     /**
199      * XML encoding for attributes valies quoted with <tt>"</tt> (not with <tt>'</tt>!).
200      * Also can be used for HTML attributes that are quoted with <tt>"</tt>.
201      * @see #XMLEnc(String)
202      */

203     public static String JavaDoc XMLEncQAttr(String JavaDoc s) {
204         int ln = s.length();
205         for (int i = 0; i < ln; i++) {
206             char c = s.charAt(i);
207             if (c == '<' || c == '&' || c == '"') {
208                 StringBuffer JavaDoc b =
209                         new StringBuffer JavaDoc(s.substring(0, i));
210                 switch (c) {
211                     case '<': b.append("&lt;"); break;
212                     case '&': b.append("&amp;"); break;
213                     case '"': b.append("&quot;"); break;
214                 }
215                 i++;
216                 int next = i;
217                 while (i < ln) {
218                     c = s.charAt(i);
219                     if (c == '<' || c == '&' || c == '"') {
220                         b.append(s.substring(next, i));
221                         switch (c) {
222                             case '<': b.append("&lt;"); break;
223                             case '&': b.append("&amp;"); break;
224                             case '"': b.append("&quot;"); break;
225                         }
226                         next = i + 1;
227                     }
228                     i++;
229                 }
230                 if (next < ln) {
231                     b.append(s.substring(next));
232                 }
233                 s = b.toString();
234                 break;
235             } // if c ==
236
} // for
237
return s;
238     }
239
240     /**
241      * XML encoding without replacing apostrophes and quotation marks and greater-than signs.
242      * @see #XMLEnc(String)
243      */

244     public static String JavaDoc XMLEncNQG(String JavaDoc s) {
245         int ln = s.length();
246         for (int i = 0; i < ln; i++) {
247             char c = s.charAt(i);
248             if (c == '<' || c == '&') {
249                 StringBuffer JavaDoc b =
250                         new StringBuffer JavaDoc(s.substring(0, i));
251                 switch (c) {
252                     case '<': b.append("&lt;"); break;
253                     case '&': b.append("&amp;"); break;
254                 }
255                 i++;
256                 int next = i;
257                 while (i < ln) {
258                     c = s.charAt(i);
259                     if (c == '<' || c == '&') {
260                         b.append(s.substring(next, i));
261                         switch (c) {
262                             case '<': b.append("&lt;"); break;
263                             case '&': b.append("&amp;"); break;
264                         }
265                         next = i + 1;
266                     }
267                     i++;
268                 }
269                 if (next < ln) b.append(s.substring(next));
270                 s = b.toString();
271                 break;
272             } // if c ==
273
} // for
274
return s;
275     }
276     
277     /**
278      * Rich Text Format encoding (does not replace line breaks).
279      * Escapes all '\' '{' '}' and '"'
280      */

281     public static String JavaDoc RTFEnc(String JavaDoc s) {
282         int ln = s.length();
283         for (int i = 0; i < ln; i++) {
284             char c = s.charAt(i);
285             if (c == '\\' || c == '{' || c == '}') {
286                 StringBuffer JavaDoc b =
287                         new StringBuffer JavaDoc(s.substring(0, i));
288                 switch (c) {
289                     case '\\': b.append("\\\\"); break;
290                     case '{': b.append("\\{"); break;
291                     case '}': b.append("\\}"); break;
292                 }
293                 i++;
294                 int next = i;
295                 while (i < ln) {
296                     c = s.charAt(i);
297                     if (c == '\\' || c == '{' || c == '}') {
298                         b.append(s.substring(next, i));
299                         switch (c) {
300                             case '\\': b.append("\\\\"); break;
301                             case '{': b.append("\\{"); break;
302                             case '}': b.append("\\}"); break;
303                         }
304                         next = i + 1;
305                     }
306                     i++;
307                 }
308                 if (next < ln) b.append(s.substring(next));
309                 s = b.toString();
310                 break;
311             } // if c ==
312
} // for
313
return s;
314     }
315
316     /**
317      * URL encoding (like%20this).
318      */

319     public static String JavaDoc URLEnc(String JavaDoc s, String JavaDoc charset)
320             throws UnsupportedEncodingException JavaDoc {
321         int ln = s.length();
322         int i;
323         for (i = 0; i < ln; i++) {
324             char c = s.charAt(i);
325             if (!(c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
326                     || c >= '0' && c <= '9'
327                     || c == '_' || c == '-' || c == '.' || c == '!' || c == '~'
328                     || c >= '\'' && c <= '*')) {
329                 break;
330             }
331         }
332         if (i == ln) {
333             // Nothing to escape
334
return s;
335         }
336
337         StringBuffer JavaDoc b = new StringBuffer JavaDoc(ln + ln / 3 + 2);
338         b.append(s.substring(0, i));
339
340         int encstart = i;
341         for (i++; i < ln; i++) {
342             char c = s.charAt(i);
343             if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
344                     || c >= '0' && c <= '9'
345                     || c == '_' || c == '-' || c == '.' || c == '!' || c == '~'
346                     || c >= '\'' && c <= '*') {
347                 if (encstart != -1) {
348                     byte[] o = s.substring(encstart, i).getBytes(charset);
349                     for (int j = 0; j < o.length; j++) {
350                         b.append('%');
351                         byte bc = o[j];
352                         int c1 = bc & 0x0F;
353                         int c2 = (bc >> 4) & 0x0F;
354                         b.append((char) (c2 < 10 ? c2 + '0' : c2 - 10 + 'A'));
355                         b.append((char) (c1 < 10 ? c1 + '0' : c1 - 10 + 'A'));
356                     }
357                     encstart = -1;
358                 }
359                 b.append(c);
360             } else {
361                 if (encstart == -1) {
362                     encstart = i;
363                 }
364             }
365         }
366         if (encstart != -1) {
367             byte[] o = s.substring(encstart, i).getBytes(charset);
368             for (int j = 0; j < o.length; j++) {
369                 b.append('%');
370                 byte bc = o[j];
371                 int c1 = bc & 0x0F;
372                 int c2 = (bc >> 4) & 0x0F;
373                 b.append((char) (c2 < 10 ? c2 + '0' : c2 - 10 + 'A'));
374                 b.append((char) (c1 < 10 ? c1 + '0' : c1 - 10 + 'A'));
375             }
376         }
377         
378         return b.toString();
379     }
380     
381     private static char[] createEscapes()
382     {
383         char[] escapes = new char['\\' + 1];
384         for(int i = 0; i < 32; ++i)
385         {
386             escapes[i] = 1;
387         }
388         escapes['\\'] = '\\';
389         escapes['\''] = '\'';
390         escapes['"'] = '"';
391         escapes['<'] = 'l';
392         escapes['>'] = 'g';
393         escapes['&'] = 'a';
394         escapes['\b'] = 'b';
395         escapes['\t'] = 't';
396         escapes['\n'] = 'n';
397         escapes['\f'] = 'f';
398         escapes['\r'] = 'r';
399         escapes['$'] = '$';
400         return escapes;
401     }
402
403     public static String JavaDoc FTLStringLiteralEnc(String JavaDoc s)
404     {
405         StringBuffer JavaDoc buf = null;
406         int l = s.length();
407         int el = ESCAPES.length;
408         for(int i = 0; i < l; i++)
409         {
410             char c = s.charAt(i);
411             if(c < el)
412             {
413                 char escape = ESCAPES[c];
414                 switch(escape)
415                 {
416                     case 0:
417                     {
418                         if (buf != null) {
419                             buf.append(c);
420                         }
421                         break;
422                     }
423                     case 1:
424                     {
425                         if (buf == null) {
426                             buf = new StringBuffer JavaDoc(s.length() + 3);
427                             buf.append(s.substring(0, i));
428                         }
429                         // hex encoding for characters below 0x20
430
// that have no other escape representation
431
buf.append("\\x00");
432                         int c2 = (c >> 4) & 0x0F;
433                         c = (char) (c & 0x0F);
434                         buf.append((char) (c2 < 10 ? c2 + '0' : c2 - 10 + 'A'));
435                         buf.append((char) (c < 10 ? c + '0' : c - 10 + 'A'));
436                         break;
437                     }
438                     default:
439                     {
440                         if (buf == null) {
441                             buf = new StringBuffer JavaDoc(s.length() + 2);
442                             buf.append(s.substring(0, i));
443                         }
444                         buf.append('\\');
445                         buf.append(escape);
446                     }
447                 }
448             } else {
449                 if (buf != null) {
450                     buf.append(c);
451                 }
452             }
453         }
454         return buf == null ? s : buf.toString();
455     }
456
457     /**
458      * FTL string literal decoding.
459      *
460      * \\, \", \', \n, \t, \r, \b and \f will be replaced according to
461      * Java rules. In additional, it knows \g, \l, \a and \{ which are
462      * replaced with &lt;, >, &amp; and { respectively.
463      * \x works as hexadecimal character code escape. The character
464      * codes are interpreted according to UCS basic plane (Unicode).
465      * "f\x006Fo", "f\x06Fo" and "f\x6Fo" will be "foo".
466      * "f\x006F123" will be "foo123" as the maximum number of digits is 4.
467      *
468      * All other \X (where X is any character not mentioned above or End-of-string)
469      * will cause a ParseException.
470      *
471      * @param s String literal <em>without</em> the surrounding quotation marks
472      * @return String with all escape sequences resolved
473      * @throws ParseException if there string contains illegal escapes
474      */

475     public static String JavaDoc FTLStringLiteralDec(String JavaDoc s) throws ParseException {
476
477         int idx = s.indexOf('\\');
478         if (idx == -1) {
479             return s;
480         }
481
482         int lidx = s.length() - 1;
483         int bidx = 0;
484         StringBuffer JavaDoc buf = new StringBuffer JavaDoc(lidx);
485         do {
486             buf.append(s.substring(bidx, idx));
487             if (idx >= lidx) {
488                 throw new ParseException("The last character of string literal is backslash", 0,0);
489             }
490             char c = s.charAt(idx + 1);
491             switch (c) {
492                 case '"':
493                     buf.append('"');
494                     bidx = idx + 2;
495                     break;
496                 case '\'':
497                     buf.append('\'');
498                     bidx = idx + 2;
499                     break;
500                 case '\\':
501                     buf.append('\\');
502                     bidx = idx + 2;
503                     break;
504                 case 'n':
505                     buf.append('\n');
506                     bidx = idx + 2;
507                     break;
508                 case 'r':
509                     buf.append('\r');
510                     bidx = idx + 2;
511                     break;
512                 case 't':
513                     buf.append('\t');
514                     bidx = idx + 2;
515                     break;
516                 case 'f':
517                     buf.append('\f');
518                     bidx = idx + 2;
519                     break;
520                 case 'b':
521                     buf.append('\b');
522                     bidx = idx + 2;
523                     break;
524                 case 'g':
525                     buf.append('>');
526                     bidx = idx + 2;
527                     break;
528                 case 'l':
529                     buf.append('<');
530                     bidx = idx + 2;
531                     break;
532                 case 'a':
533                     buf.append('&');
534                     bidx = idx + 2;
535                     break;
536                 case '{':
537                     buf.append('{');
538                     bidx = idx + 2;
539                     break;
540                 case 'x': {
541                     idx += 2;
542                     int x = idx;
543                     int y = 0;
544                     int z = lidx > idx + 3 ? idx + 3 : lidx;
545                     while (idx <= z) {
546                         char b = s.charAt(idx);
547                         if (b >= '0' && b <= '9') {
548                             y <<= 4;
549                             y += b - '0';
550                         } else if (b >= 'a' && b <= 'f') {
551                             y <<= 4;
552                             y += b - 'a' + 10;
553                         } else if (b >= 'A' && b <= 'F') {
554                             y <<= 4;
555                             y += b - 'A' + 10;
556                         } else {
557                             break;
558                         }
559                         idx++;
560                     }
561                     if (x < idx) {
562                         buf.append((char) y);
563                     } else {
564                         throw new ParseException("Invalid \\x escape in a string literal",0,0);
565                     }
566                     bidx = idx;
567                     break;
568                 }
569                 default:
570                     throw new ParseException("Invalid escape sequence (\\" + c + ") in a string literal",0,0);
571             }
572             idx = s.indexOf('\\', bidx);
573         } while (idx != -1);
574         buf.append(s.substring(bidx));
575
576         return buf.toString();
577     }
578
579     public static Locale deduceLocale(String JavaDoc input) {
580        Locale locale = Locale.getDefault();
581        if (input.charAt(0) == '"') input = input.substring(1, input.length() -1);
582        StringTokenizer st = new StringTokenizer(input, ",_ ");
583        String JavaDoc lang = "", country = "";
584        if (st.hasMoreTokens()) {
585           lang = st.nextToken();
586        }
587        if (st.hasMoreTokens()) {
588           country = st.nextToken();
589        }
590        if (!st.hasMoreTokens()) {
591           locale = new Locale(lang, country);
592        }
593        else {
594           locale = new Locale(lang, country, st.nextToken());
595        }
596        return locale;
597     }
598
599     public static String JavaDoc capitalize(String JavaDoc s) {
600         StringTokenizer st = new StringTokenizer(s, " \t\r\n", true);
601         StringBuffer JavaDoc buf = new StringBuffer JavaDoc(s.length());
602         while (st.hasMoreTokens()) {
603             String JavaDoc tok = st.nextToken();
604             buf.append(tok.substring(0, 1).toUpperCase());
605             buf.append(tok.substring(1).toLowerCase());
606         }
607         return buf.toString();
608     }
609
610     public static boolean getYesNo(String JavaDoc s) {
611         if (s.startsWith("\"")) {
612             s = s.substring(1, s.length() -1);
613
614         }
615         if (s.equalsIgnoreCase("n")
616                 || s.equalsIgnoreCase("no")
617                 || s.equalsIgnoreCase("f")
618                 || s.equalsIgnoreCase("false")) {
619             return false;
620         }
621         else if (s.equalsIgnoreCase("y")
622                 || s.equalsIgnoreCase("yes")
623                 || s.equalsIgnoreCase("t")
624                 || s.equalsIgnoreCase("true")) {
625             return true;
626         }
627         throw new IllegalArgumentException JavaDoc("Illegal boolean value: " + s);
628     }
629
630     /**
631      * Splits a string at the specified character.
632      */

633     public static String JavaDoc[] split(String JavaDoc s, char c) {
634         int i, b, e;
635         int cnt;
636         String JavaDoc res[];
637         int ln = s.length();
638
639         i = 0;
640         cnt = 1;
641         while ((i = s.indexOf(c, i)) != -1) {
642             cnt++;
643             i++;
644         }
645         res = new String JavaDoc[cnt];
646
647         i = 0;
648         b = 0;
649         while (b <= ln) {
650             e = s.indexOf(c, b);
651             if (e == -1) e = ln;
652             res[i++] = s.substring(b, e);
653             b = e + 1;
654         }
655         return res;
656     }
657
658     /**
659      * Splits a string at the specified string.
660      */

661     public static String JavaDoc[] split(String JavaDoc s, String JavaDoc sep, boolean caseInsensitive) {
662         String JavaDoc splitString = caseInsensitive ? sep.toLowerCase() : sep;
663         String