KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > gnu > xquery > util > StringUtils


1 // Copyright (c) 2001, 2003 Per M.A. Bothner and Brainfood Inc.
2
// This is free software; for terms and warranty disclaimer see ./COPYING.
3

4 package gnu.xquery.util;
5 import gnu.lists.*;
6 import gnu.math.*;
7 import gnu.mapping.*;
8 import gnu.xml.TextUtils;
9 import gnu.kawa.xml.KNode;
10 import gnu.kawa.xml.UntypedAtomic;
11 import java.util.regex.Pattern JavaDoc;
12 import java.util.regex.Matcher JavaDoc;
13 /* #ifdef use:java.text.Normalizer */
14 // import java.text.Normalizer;
15
/* #endif */
16
17 public class StringUtils
18 {
19   private static String JavaDoc ERROR_VALUE = "<error>";
20
21   static String JavaDoc coerceToString (Object JavaDoc arg, String JavaDoc functionName,
22                                 int iarg, String JavaDoc onEmpty)
23   {
24     if (arg instanceof KNode)
25       arg = KNode.atomicValue(arg);
26     if ((arg == Values.empty || arg == null) && onEmpty != ERROR_VALUE)
27       return onEmpty;
28     if (arg instanceof UntypedAtomic
29         /* #ifdef use:java.net.URI */
30         || arg instanceof java.net.URI JavaDoc
31         /* #endif */
32         || arg instanceof String JavaDoc)
33       return arg.toString();
34     throw new WrongType(functionName, iarg, arg,
35                         onEmpty == ERROR_VALUE ? "xs:string" : "xs:string?");
36   }
37
38   public static Object JavaDoc lowerCase (Object JavaDoc node)
39   {
40     return coerceToString(node, "lower-case", 1, "").toLowerCase();
41   }
42
43   public static Object JavaDoc upperCase (Object JavaDoc node)
44   {
45     return coerceToString(node, "upper-case", 1, "").toUpperCase();
46   }
47
48   static double asDouble (Object JavaDoc value)
49   {
50     if (! (value instanceof Number JavaDoc))
51       value = NumberValue.numberValue(value);
52     return ((Number JavaDoc) value).doubleValue();
53   }
54
55   public static Object JavaDoc substring (Object JavaDoc str, Object JavaDoc start)
56   {
57     double d1 = asDouble(start);
58     if (Double.isNaN(d1))
59       return "";
60     int i = (int) (d1 - 0.5);
61     if (i < 0)
62       i = 0;
63     String JavaDoc s = coerceToString(str, "substring", 1, "");
64     int len = s.length();
65     int offset = 0;
66     while (--i >= 0)
67       {
68         if (offset >= len)
69           return "";
70         char ch = s.charAt(offset++);
71         if (ch >= 0xD800 && ch < 0xDC00 && offset < len)
72           offset++;
73       }
74     return s.substring(offset);
75   }
76
77   public static Object JavaDoc substring (Object JavaDoc str, Object JavaDoc start, Object JavaDoc length)
78   {
79     String JavaDoc s = coerceToString(str, "substring", 1, "");
80     int len = s.length();
81     // Don't use Math.round because it returns 0 given NaN!
82
// We pre-subtract 1 before rounding.
83
double d1 = Math.floor(asDouble(start)-0.5);
84     double d2 = d1 + Math.floor(asDouble(length)+0.5);
85     if (d1 <= 0)
86       d1 = 0;
87     if (d2 > len)
88       d2 = len;
89     if (d2 <= d1) // Including the case where either is NaN.
90
return "";
91     int i1 = (int) d1;
92     int i2 = (int) d2 - i1;
93     int offset = 0;
94     while (--i1 >= 0)
95       {
96         if (offset >= len)
97           return "";
98         char ch = s.charAt(offset++);
99         if (ch >= 0xD800 && ch < 0xDC00 && offset < len)
100           offset++;
101       }
102     i1 = offset;
103     while (--i2 >= 0)
104       {
105         if (offset >= len)
106           return "";
107         char ch = s.charAt(offset++);
108         if (ch >= 0xD800 && ch < 0xDC00 && offset < len)
109           offset++;
110       }
111     i2 = offset;
112     return s.substring(i1, i2);
113   }
114
115   public static Object JavaDoc stringLength (Object JavaDoc str)
116   {
117     String JavaDoc s = coerceToString(str, "string-length", 1, "");
118     int slen = s.length();
119     int len = 0;
120     for (int i = 0; i < slen; )
121       {
122         char ch = s.charAt(i++);
123         if (ch >= 0xD800 && ch < 0xDC00 && i < slen)
124           i++;
125         len++;
126       }
127     return IntNum.make(len);
128   }
129
130   public static Object JavaDoc substringBefore (Object JavaDoc str, Object JavaDoc find)
131   {
132     String JavaDoc s = coerceToString(str, "substring-before", 1, "");
133     String JavaDoc f = coerceToString(find, "substring-before", 2, "");
134     int flen = f.length();
135
136     if (flen==0)
137       return "";
138     int start = s.indexOf(f);
139     return start >= 0 ? s.substring(0,start) : "";
140   }
141
142   public static Object JavaDoc substringAfter (Object JavaDoc str, Object JavaDoc find)
143   {
144     String JavaDoc s = coerceToString(str, "substring-after", 1, "");
145     String JavaDoc f = coerceToString(find, "substring-after", 2, "");
146     int flen = f.length();
147
148     if (flen==0)
149       return s;
150
151     int start = s.indexOf(f);
152     return start >= 0 ? s.substring(start+flen) : "";
153   }
154
155   public static Object JavaDoc translate (Object JavaDoc str, Object JavaDoc map, Object JavaDoc trans)
156   {
157     String JavaDoc sv = coerceToString(str, "translate", 1, "");
158     map = KNode.atomicValue(map);
159     if (! (map instanceof UntypedAtomic || map instanceof String JavaDoc))
160       throw new WrongType("translate", 2, str, "xs:string");
161     String JavaDoc m = map.toString();
162     int mlen = m.length();
163
164     trans = KNode.atomicValue(trans);
165     if (! (trans instanceof UntypedAtomic || trans instanceof String JavaDoc))
166       throw new WrongType("translate", 3, str, "xs:string");
167     String JavaDoc t = trans.toString();
168
169     if (mlen==0) return sv;
170
171     int slen = sv.length();
172     StringBuffer JavaDoc s = new StringBuffer JavaDoc(slen);
173     int tlen = t.length();
174
175   mainLoop:
176     for (int i=0; i < slen;)
177       {
178         char c1 = sv.charAt(i++);
179         char c2 = 0;
180         if (c1 >= 0xD800 && c1 < 0xDC00 && i < slen)
181           c2 = sv.charAt(i++);
182         int j = 0;
183         for (int mi = 0; mi < mlen; )
184           {
185             char m1 = m.charAt(mi++);
186             char m2 = 0;
187             if (m1 >= 0xD800 && m1 < 0xDC00 && mi < mlen)
188               m2 = m.charAt(mi++);
189             if (m1 == c1 && m2 == c2)
190               {
191                 for (int ti = 0; ; j--)
192                   {
193                     if (ti >= tlen)
194                       continue mainLoop;
195                     char t1 = t.charAt(ti++);
196                     char t2 = 0;
197                     if (t1 >= 0xD800 && t1 < 0xDC00 && ti < tlen)
198                       t2 = t.charAt(ti++);
199                     if (j == 0)
200                       {
201                         c1 = t1;
202                         c2 = t2;
203                         break;
204                       }
205                   }
206                 break;
207               }
208             j++;
209           }
210         s.append(c1);
211         if (c2 != 0)
212           s.append(c2);
213       }
214
215     return s.toString();
216   }
217
218   public static Object JavaDoc stringPad (Object JavaDoc str, Object JavaDoc padcount)
219   {
220     int count = ((Number JavaDoc) NumberValue.numberValue(padcount)).intValue();
221     if (count <= 0)
222       {
223         if (count == 0)
224           return "";
225     throw new IndexOutOfBoundsException JavaDoc("Invalid string-pad count");
226       }
227
228     String JavaDoc sv = coerceToString(str, "string-pad", 1, "");
229     int slen = sv.length();
230     StringBuffer JavaDoc s = new StringBuffer JavaDoc(count*slen);
231     for (int i=0; i<count; i++) s.append(sv);
232
233     return s.toString();
234   }
235
236   public static Object JavaDoc contains (Object JavaDoc str, Object JavaDoc contain)
237   {
238     String JavaDoc s = coerceToString(str, "contains", 1, "");
239     String JavaDoc c = coerceToString(contain, "contains", 2, "");
240
241     return s.indexOf(c) <0 ? Boolean.FALSE : Boolean.TRUE;
242   }
243
244   public static Object JavaDoc startsWith (Object JavaDoc str, Object JavaDoc with)
245   {
246     String JavaDoc s = coerceToString(str, "starts-with", 1, "");
247     String JavaDoc w = coerceToString(with, "starts-with", 2, "");
248
249     return s.startsWith(w) ? Boolean.TRUE : Boolean.FALSE;
250   }
251
252   public static Object JavaDoc endsWith (Object JavaDoc str, Object JavaDoc with)
253   {
254     String JavaDoc s = coerceToString(str, "ends-with", 1, "");
255     String JavaDoc w = coerceToString(with, "ends-with", 2, "");
256     return s.endsWith(w) ? Boolean.TRUE : Boolean.FALSE;
257   }
258
259   public static Object JavaDoc stringJoin (Object JavaDoc strseq, Object JavaDoc join)
260   {
261     StringBuffer JavaDoc s = new StringBuffer JavaDoc();
262     String JavaDoc glue = coerceToString(join, "string-join", 2, ERROR_VALUE);
263     int glen = glue.length();
264     int index=0;
265     boolean started = false;
266
267     while((index=Values.nextIndex(strseq, index)) >= 0)
268       {
269     Object JavaDoc obj = Values.nextValue(strseq, index-1);
270     if (obj == Values.empty) continue;
271
272     if (started && glen > 0)
273           s.append(glue);
274         s.append(TextUtils.stringValue(obj));
275     started=true;
276       }
277
278     return s.toString();
279   }
280
281   public static String JavaDoc concat$V (Object JavaDoc arg1, Object JavaDoc arg2, Object JavaDoc[] args)
282   {
283     String JavaDoc str1 = TextUtils.stringValue(arg1);
284     String JavaDoc str2 = TextUtils.stringValue(arg2);
285     /* #ifdef JAVA5 */
286     // StringBuilder result = new StringBuilder(str1);
287
/* #else */
288     StringBuffer JavaDoc result = new StringBuffer JavaDoc(str1);
289     /* #endif */
290     result.append(str2);
291     int count = args.length;
292     for (int i = 0; i < count; i++)
293       result.append(TextUtils.stringValue(args[i]));
294     return result.toString();
295   }
296
297   /** This implements the XQuery <code>fn:compare</code> function. */
298   public static Object JavaDoc compare (Object JavaDoc val1, Object JavaDoc val2, NamedCollator coll)
299   {
300     if (val1 == Values.empty || val1 == null
301         || val2 == Values.empty || val2 == null)
302       return Values.empty;
303     if (coll == null)
304       coll = NamedCollator.codepointCollation;
305     int ret = coll.compare(val1.toString(), val2.toString());
306     return ret < 0 ? IntNum.minusOne() : ret > 0 ? IntNum.one() : IntNum.zero();
307   }
308
309   public static void stringToCodepoints$X (Object JavaDoc arg, CallContext ctx)
310   {
311     String JavaDoc str = coerceToString(arg, "string-to-codepoints", 1, "");
312     int len = str.length();
313     Consumer out = ctx.consumer;
314     for (int i = 0; i < len; )
315       {
316         int ch = str.charAt(i++);
317         if (ch >= 0xD800 && ch < 0xDC00 && i < len)
318           ch = (ch - 0xD800) * 0x400 + (str.charAt(i++) - 0xDC00) + 0x10000;
319         out.writeInt(ch);
320       }
321   }
322
323   private static void appendCodepoint (Object JavaDoc code, StringBuffer JavaDoc sbuf)
324   {
325     IntNum I = (IntNum) gnu.kawa.xml.XIntegerType.integerType.cast(code);
326     int i = I.intValue();
327     if (i <= 0
328         || (i > 0xD7FF
329             && (i < 0xE000 || (i > 0xFFFD && i < 0x10000) || i > 0x10FFFF)))
330       throw new IllegalArgumentException JavaDoc("codepoints-to-string: "+i+" is not a valid XML character [FOCH0001]");
331     if (i >= 0x10000)
332       {
333         sbuf.append((char) (((i - 0x10000) >> 10) + 0xD800));
334         i = (i & 0x3FF) + 0xDC00;
335       }
336     sbuf.append((char) i);
337   }
338
339   public static String JavaDoc codepointsToString (Object JavaDoc arg)
340   {
341     if (arg == null)
342       return "";
343     StringBuffer JavaDoc sbuf = new StringBuffer JavaDoc();
344     if (arg instanceof Values)
345       {
346         Values vals = (Values) arg;
347         int ipos = vals.startPos();
348         while ((ipos = vals.nextPos(ipos)) != 0)
349           appendCodepoint(vals.getPosPrevious(ipos), sbuf);
350       }
351     else
352       appendCodepoint(arg, sbuf);
353     return sbuf.toString();
354   }
355
356   public static String JavaDoc encodeForUri (Object JavaDoc arg)
357   {
358     return encodeForUri(arg, 'U');
359   }
360
361   public static String JavaDoc iriToUri (Object JavaDoc arg)
362   {
363     return encodeForUri(arg, 'I');
364   }
365
366   public static String JavaDoc escapeHtmlUri (Object JavaDoc arg)
367   {
368     return encodeForUri(arg, 'H');
369   }
370
371   static String JavaDoc encodeForUri (Object JavaDoc arg, char mode)
372   {
373     StringBuffer JavaDoc sbuf = new StringBuffer JavaDoc();
374     String JavaDoc str;
375     if (arg instanceof String JavaDoc || arg instanceof UntypedAtomic)
376       str = arg.toString();
377     else if (arg == null || arg == Values.empty)
378       str = "";
379     else
380       throw new ClassCastException JavaDoc();
381     int len = str.length();
382     for (int i = 0; i <len; i++)
383       {
384         int ch = str.charAt(i);
385         // FIXME: Check for surrogate.
386
if (mode == 'H' ? ch >= 32 && ch <= 126
387             : ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
388                || (ch >= '0' && ch <= '9')
389                || ch == '-' || ch == '_' || ch == '.' || ch == '~'
390                || (mode == 'I'
391                    && (ch == ';' || ch == '/' || ch == '?' || ch == ':'
392                        || ch == '*' || ch == '\'' || ch == '(' || ch == ')'
393                        || ch == '@' || ch == '&' || ch == '=' || ch == '+'
394                        || ch == '$' || ch == ',' || ch == '[' || ch == ']'
395                        || ch == '#' || ch == '!' || ch == '%'))))
396           sbuf.append((char) ch);
397         else
398           {
399             int pos = sbuf.length();
400             int nbytes = 0;
401             int needed = ch < (1 << 7) ? 1
402               : ch < (1 << 11) ? 2
403               : ch < (1 << 16) ? 3
404               : 4;
405             do
406               {
407                 // We insert encodings for the bytes in right-to-left order.
408
int availbits = nbytes == 0 ? 7 : 6 - nbytes;
409                 int b;
410                 if (ch < (1 << availbits))
411                   {
412                     // The rest fits: handling first bytes.
413
b = ch;
414                     if (nbytes > 0)
415                       b |= (0xff80 >> nbytes) & 0xff;
416                     ch = 0;
417                   }
418                 else
419                   {
420                     b = 0x80 | (ch & 0x3f);
421                     ch >>= 6;
422                   }
423                 nbytes++;
424                 for (int j = 0; j <= 1; j++)
425                   {
426                     int hex = b & 15;
427                     sbuf.insert(pos,
428                                 (char) (hex <= 9 ? hex + '0' : hex - 10 + 'A'));
429                     b >>= 4;
430                   }
431                 sbuf.insert(pos, '%');
432               }
433             while (ch != 0);
434           }
435       }
436     return sbuf.toString();
437   }
438
439   public static String JavaDoc normalizeSpace (Object JavaDoc arg)
440   {
441     String JavaDoc str = coerceToString(arg, "normalize-space", 1, "");
442     int len = str.length();
443     StringBuffer JavaDoc sbuf = null;
444     int skipped = 0;
445     for (int i = 0; i < len; i++)
446       {
447         char ch = str.charAt(i);
448         if (Character.isWhitespace(ch))
449           {
450             if (sbuf == null && skipped == 0 && i > 0)
451               sbuf = new StringBuffer JavaDoc(str.substring(0, i));
452             skipped++;
453           }
454         else
455           {
456             if (skipped > 0)
457               {
458                 if (sbuf != null)
459                   sbuf.append(' ');
460                 else if (skipped > 1 || i == 1 || str.charAt(i-1) != ' ')
461                   sbuf = new StringBuffer JavaDoc();
462                 skipped = 0;
463               }
464             if (sbuf != null)
465               sbuf.append(ch);
466           }
467       }
468     return sbuf != null ? sbuf.toString() : skipped > 0 ? "" : str;
469   }
470
471   /* #ifdef use:java.util.regex */
472   public static Pattern JavaDoc makePattern (String JavaDoc pattern, String JavaDoc flags)
473   {
474     int fl = 0;
475     for (int i = flags.length(); --i >= 0; )
476       {
477         char ch = flags.charAt(i);
478         switch (ch)
479           {
480           case 'i':
481             fl |= Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE;
482             break;
483           case 's':
484             fl |= Pattern.DOTALL;
485             break;
486           case 'x':
487             StringBuffer JavaDoc sbuf = new StringBuffer JavaDoc();
488             int plen = pattern.length();
489             for (int j = 0; j < plen; j++)
490               {
491                 char pch = pattern.charAt(j);
492                 if (! Character.isWhitespace(pch))
493                   sbuf.append(pch);
494               }
495             pattern = sbuf.toString();
496             break;
497           case 'm':
498             fl |= Pattern.MULTILINE;
499             break;
500           default:
501             throw new IllegalArgumentException JavaDoc("unknown 'replace' flag");
502           }
503       }
504     return Pattern.compile(pattern, fl);
505   }
506   /* #endif */
507
508   public static boolean matches (Object JavaDoc input, String JavaDoc pattern)
509   {
510     return matches(input, pattern, "");
511   }
512
513   public static boolean matches (Object JavaDoc arg, String JavaDoc pattern, String JavaDoc flags)
514   {
515     /* #ifdef use:java.util.regex */
516     String JavaDoc str;
517     if (arg instanceof String JavaDoc || arg instanceof UntypedAtomic)
518       str = arg.toString();
519     else if (arg == null || arg == Values.empty)
520       str = "";
521     else
522       throw new ClassCastException JavaDoc();
523     return makePattern(pattern, flags).matcher(str).find();
524     /* #else */
525     // throw new Error("fn:matches requires java.util.regex (JDK 1.4 or equivalent)");
526
/* #endif */
527   }
528
529   public static String JavaDoc replace (Object JavaDoc input, String JavaDoc pattern,
530                                  String JavaDoc replacement)
531   {
532     return replace(input, pattern, replacement, "");
533   }
534
535   public static String JavaDoc replace (Object JavaDoc arg, String JavaDoc pattern,
536                                  String JavaDoc replacement, String JavaDoc flags)
537   {
538     /* #ifdef use:java.util.regex */
539     String JavaDoc str;
540     if (arg instanceof String JavaDoc || arg instanceof UntypedAtomic)
541       str = arg.toString();
542     else if (arg == null || arg == Values.empty)
543       str = "";
544     else
545       throw new ClassCastException JavaDoc();
546     return makePattern(pattern, flags).matcher(str).replaceAll(replacement);
547     /* #else */
548     // throw new Error("fn:replace requires java.util.regex (JDK 1.4 or equivalent)");
549
/* #endif */
550   }
551
552   public static void tokenize$X (Object JavaDoc arg, String JavaDoc pattern, CallContext ctx)
553   {
554     tokenize$X(arg, pattern, "", ctx);
555   }
556
557   public static void tokenize$X (Object JavaDoc arg, String JavaDoc pattern,
558                                  String JavaDoc flags, CallContext ctx)
559   {
560     /* #ifdef use:java.util.regex */
561     String JavaDoc str;
562     if (arg instanceof String JavaDoc || arg instanceof UntypedAtomic)
563       str = arg.toString();
564     else if (arg == null || arg == Values.empty)
565       str = "";
566     else
567       throw new ClassCastException JavaDoc();
568     Consumer out = ctx.consumer;
569     Matcher JavaDoc matcher = makePattern(pattern, flags).matcher(str);
570     int len = str.length();
571     if (len == 0)
572       return;
573     int start = 0;
574     for (;;)
575       {
576         boolean matched = matcher.find();
577         if (! matched)
578           {
579             out.writeObject(str.substring(start));
580             break;
581           }
582         int end = matcher.start();
583         out.writeObject(str.substring(start, end));
584         start = matcher.end();
585         if (start == end)
586           throw new IllegalArgumentException JavaDoc("pattern matches empty string");
587       }
588     /* #else */
589     // throw new Error("fn:tokenize requires java.util.regex (JDK 1.4 or equivalent)");
590
/* #endif */
591   }
592
593   public static Object JavaDoc codepointEqual (Object JavaDoc arg1, Object JavaDoc arg2)
594   {
595     String JavaDoc str1 = coerceToString(arg1, "codepoint-equal", 1, null);
596     String JavaDoc str2 = coerceToString(arg2, "codepoint-equal", 2, null);
597     if (str1 == null || str2 == null)
598       return Values.empty;
599     return str1.equals(str2) ? Boolean.TRUE : Boolean.FALSE;
600   }
601
602   public static Object JavaDoc normalizeUnicode (Object JavaDoc arg)
603   {
604     return normalizeUnicode(arg, "NFC");
605   }
606
607   public static Object JavaDoc normalizeUnicode (Object JavaDoc arg, String JavaDoc form)
608   {
609     String JavaDoc str = coerceToString(arg, "normalize-unicode", 1, "");
610     form = form.trim().toUpperCase();
611     if ("".equals(form))
612       return str;
613     /* #ifdef use:java.text.Normalizer */
614     // Normalizer.Form nform;
615
// if ("NFC".equals(form))
616
// nform = Normalizer.Form.NFC;
617
// else if ("NFD".equals(form))
618
// nform = Normalizer.Form.NFD;
619
// else if ("NFKC".equals(form))
620
// nform = Normalizer.Form.NFKC;
621
// else if ("NFKD".equals(form))
622
// nform = Normalizer.Form.NFKD;
623
// else
624
// throw new RuntimeException("normalize-unicode: unknown normalization form '"+form+'\'');
625
// return Normalizer.normalize(str, nform);
626
/* #else */
627     throw AbstractSequence.unsupportedException("normalize-unicode form "+form);
628     /* #endif */
629   }
630 }
631
Popular Tags