1 4 package gnu.xquery.util; 5 import gnu.lists.*; 6 import gnu.math.*; 7 import gnu.mapping.*; 8 import gnu.xml.TextUtils; 9 import gnu.kawa.xml.KNode; 10 import gnu.kawa.xml.UntypedAtomic; 11 import java.util.regex.Pattern ; 12 import java.util.regex.Matcher ; 13 14 16 17 public class StringUtils 18 { 19 private static String ERROR_VALUE = "<error>"; 20 21 static String coerceToString (Object arg, String functionName, 22 int iarg, String onEmpty) 23 { 24 if (arg instanceof KNode) 25 arg = KNode.atomicValue(arg); 26 if ((arg == Values.empty || arg == null) && onEmpty != ERROR_VALUE) 27 return onEmpty; 28 if (arg instanceof UntypedAtomic 29 30 || arg instanceof java.net.URI 31 32 || arg instanceof String ) 33 return arg.toString(); 34 throw new WrongType(functionName, iarg, arg, 35 onEmpty == ERROR_VALUE ? "xs:string" : "xs:string?"); 36 } 37 38 public static Object lowerCase (Object node) 39 { 40 return coerceToString(node, "lower-case", 1, "").toLowerCase(); 41 } 42 43 public static Object upperCase (Object node) 44 { 45 return coerceToString(node, "upper-case", 1, "").toUpperCase(); 46 } 47 48 static double asDouble (Object value) 49 { 50 if (! (value instanceof Number )) 51 value = NumberValue.numberValue(value); 52 return ((Number ) value).doubleValue(); 53 } 54 55 public static Object substring (Object str, Object start) 56 { 57 double d1 = asDouble(start); 58 if (Double.isNaN(d1)) 59 return ""; 60 int i = (int) (d1 - 0.5); 61 if (i < 0) 62 i = 0; 63 String s = coerceToString(str, "substring", 1, ""); 64 int len = s.length(); 65 int offset = 0; 66 while (--i >= 0) 67 { 68 if (offset >= len) 69 return ""; 70 char ch = s.charAt(offset++); 71 if (ch >= 0xD800 && ch < 0xDC00 && offset < len) 72 offset++; 73 } 74 return s.substring(offset); 75 } 76 77 public static Object substring (Object str, Object start, Object length) 78 { 79 String s = coerceToString(str, "substring", 1, ""); 80 int len = s.length(); 81 double d1 = Math.floor(asDouble(start)-0.5); 84 double d2 = d1 + Math.floor(asDouble(length)+0.5); 85 if (d1 <= 0) 86 d1 = 0; 87 if (d2 > len) 88 d2 = len; 89 if (d2 <= d1) return ""; 91 int i1 = (int) d1; 92 int i2 = (int) d2 - i1; 93 int offset = 0; 94 while (--i1 >= 0) 95 { 96 if (offset >= len) 97 return ""; 98 char ch = s.charAt(offset++); 99 if (ch >= 0xD800 && ch < 0xDC00 && offset < len) 100 offset++; 101 } 102 i1 = offset; 103 while (--i2 >= 0) 104 { 105 if (offset >= len) 106 return ""; 107 char ch = s.charAt(offset++); 108 if (ch >= 0xD800 && ch < 0xDC00 && offset < len) 109 offset++; 110 } 111 i2 = offset; 112 return s.substring(i1, i2); 113 } 114 115 public static Object stringLength (Object str) 116 { 117 String s = coerceToString(str, "string-length", 1, ""); 118 int slen = s.length(); 119 int len = 0; 120 for (int i = 0; i < slen; ) 121 { 122 char ch = s.charAt(i++); 123 if (ch >= 0xD800 && ch < 0xDC00 && i < slen) 124 i++; 125 len++; 126 } 127 return IntNum.make(len); 128 } 129 130 public static Object substringBefore (Object str, Object find) 131 { 132 String s = coerceToString(str, "substring-before", 1, ""); 133 String f = coerceToString(find, "substring-before", 2, ""); 134 int flen = f.length(); 135 136 if (flen==0) 137 return ""; 138 int start = s.indexOf(f); 139 return start >= 0 ? s.substring(0,start) : ""; 140 } 141 142 public static Object substringAfter (Object str, Object find) 143 { 144 String s = coerceToString(str, "substring-after", 1, ""); 145 String f = coerceToString(find, "substring-after", 2, ""); 146 int flen = f.length(); 147 148 if (flen==0) 149 return s; 150 151 int start = s.indexOf(f); 152 return start >= 0 ? s.substring(start+flen) : ""; 153 } 154 155 public static Object translate (Object str, Object map, Object trans) 156 { 157 String sv = coerceToString(str, "translate", 1, ""); 158 map = KNode.atomicValue(map); 159 if (! (map instanceof UntypedAtomic || map instanceof String )) 160 throw new WrongType("translate", 2, str, "xs:string"); 161 String m = map.toString(); 162 int mlen = m.length(); 163 164 trans = KNode.atomicValue(trans); 165 if (! (trans instanceof UntypedAtomic || trans instanceof String )) 166 throw new WrongType("translate", 3, str, "xs:string"); 167 String t = trans.toString(); 168 169 if (mlen==0) return sv; 170 171 int slen = sv.length(); 172 StringBuffer s = new StringBuffer (slen); 173 int tlen = t.length(); 174 175 mainLoop: 176 for (int i=0; i < slen;) 177 { 178 char c1 = sv.charAt(i++); 179 char c2 = 0; 180 if (c1 >= 0xD800 && c1 < 0xDC00 && i < slen) 181 c2 = sv.charAt(i++); 182 int j = 0; 183 for (int mi = 0; mi < mlen; ) 184 { 185 char m1 = m.charAt(mi++); 186 char m2 = 0; 187 if (m1 >= 0xD800 && m1 < 0xDC00 && mi < mlen) 188 m2 = m.charAt(mi++); 189 if (m1 == c1 && m2 == c2) 190 { 191 for (int ti = 0; ; j--) 192 { 193 if (ti >= tlen) 194 continue mainLoop; 195 char t1 = t.charAt(ti++); 196 char t2 = 0; 197 if (t1 >= 0xD800 && t1 < 0xDC00 && ti < tlen) 198 t2 = t.charAt(ti++); 199 if (j == 0) 200 { 201 c1 = t1; 202 c2 = t2; 203 break; 204 } 205 } 206 break; 207 } 208 j++; 209 } 210 s.append(c1); 211 if (c2 != 0) 212 s.append(c2); 213 } 214 215 return s.toString(); 216 } 217 218 public static Object stringPad (Object str, Object padcount) 219 { 220 int count = ((Number ) NumberValue.numberValue(padcount)).intValue(); 221 if (count <= 0) 222 { 223 if (count == 0) 224 return ""; 225 throw new IndexOutOfBoundsException ("Invalid string-pad count"); 226 } 227 228 String sv = coerceToString(str, "string-pad", 1, ""); 229 int slen = sv.length(); 230 StringBuffer s = new StringBuffer (count*slen); 231 for (int i=0; i<count; i++) s.append(sv); 232 233 return s.toString(); 234 } 235 236 public static Object contains (Object str, Object contain) 237 { 238 String s = coerceToString(str, "contains", 1, ""); 239 String c = coerceToString(contain, "contains", 2, ""); 240 241 return s.indexOf(c) <0 ? Boolean.FALSE : Boolean.TRUE; 242 } 243 244 public static Object startsWith (Object str, Object with) 245 { 246 String s = coerceToString(str, "starts-with", 1, ""); 247 String w = coerceToString(with, "starts-with", 2, ""); 248 249 return s.startsWith(w) ? Boolean.TRUE : Boolean.FALSE; 250 } 251 252 public static Object endsWith (Object str, Object with) 253 { 254 String s = coerceToString(str, "ends-with", 1, ""); 255 String w = coerceToString(with, "ends-with", 2, ""); 256 return s.endsWith(w) ? Boolean.TRUE : Boolean.FALSE; 257 } 258 259 public static Object stringJoin (Object strseq, Object join) 260 { 261 StringBuffer s = new StringBuffer (); 262 String glue = coerceToString(join, "string-join", 2, ERROR_VALUE); 263 int glen = glue.length(); 264 int index=0; 265 boolean started = false; 266 267 while((index=Values.nextIndex(strseq, index)) >= 0) 268 { 269 Object obj = Values.nextValue(strseq, index-1); 270 if (obj == Values.empty) continue; 271 272 if (started && glen > 0) 273 s.append(glue); 274 s.append(TextUtils.stringValue(obj)); 275 started=true; 276 } 277 278 return s.toString(); 279 } 280 281 public static String concat$V (Object arg1, Object arg2, Object [] args) 282 { 283 String str1 = TextUtils.stringValue(arg1); 284 String str2 = TextUtils.stringValue(arg2); 285 286 288 StringBuffer result = new StringBuffer (str1); 289 290 result.append(str2); 291 int count = args.length; 292 for (int i = 0; i < count; i++) 293 result.append(TextUtils.stringValue(args[i])); 294 return result.toString(); 295 } 296 297 298 public static Object compare (Object val1, Object val2, NamedCollator coll) 299 { 300 if (val1 == Values.empty || val1 == null 301 || val2 == Values.empty || val2 == null) 302 return Values.empty; 303 if (coll == null) 304 coll = NamedCollator.codepointCollation; 305 int ret = coll.compare(val1.toString(), val2.toString()); 306 return ret < 0 ? IntNum.minusOne() : ret > 0 ? IntNum.one() : IntNum.zero(); 307 } 308 309 public static void stringToCodepoints$X (Object arg, CallContext ctx) 310 { 311 String str = coerceToString(arg, "string-to-codepoints", 1, ""); 312 int len = str.length(); 313 Consumer out = ctx.consumer; 314 for (int i = 0; i < len; ) 315 { 316 int ch = str.charAt(i++); 317 if (ch >= 0xD800 && ch < 0xDC00 && i < len) 318 ch = (ch - 0xD800) * 0x400 + (str.charAt(i++) - 0xDC00) + 0x10000; 319 out.writeInt(ch); 320 } 321 } 322 323 private static void appendCodepoint (Object code, StringBuffer sbuf) 324 { 325 IntNum I = (IntNum) gnu.kawa.xml.XIntegerType.integerType.cast(code); 326 int i = I.intValue(); 327 if (i <= 0 328 || (i > 0xD7FF 329 && (i < 0xE000 || (i > 0xFFFD && i < 0x10000) || i > 0x10FFFF))) 330 throw new IllegalArgumentException ("codepoints-to-string: "+i+" is not a valid XML character [FOCH0001]"); 331 if (i >= 0x10000) 332 { 333 sbuf.append((char) (((i - 0x10000) >> 10) + 0xD800)); 334 i = (i & 0x3FF) + 0xDC00; 335 } 336 sbuf.append((char) i); 337 } 338 339 public static String codepointsToString (Object arg) 340 { 341 if (arg == null) 342 return ""; 343 StringBuffer sbuf = new StringBuffer (); 344 if (arg instanceof Values) 345 { 346 Values vals = (Values) arg; 347 int ipos = vals.startPos(); 348 while ((ipos = vals.nextPos(ipos)) != 0) 349 appendCodepoint(vals.getPosPrevious(ipos), sbuf); 350 } 351 else 352 appendCodepoint(arg, sbuf); 353 return sbuf.toString(); 354 } 355 356 public static String encodeForUri (Object arg) 357 { 358 return encodeForUri(arg, 'U'); 359 } 360 361 public static String iriToUri (Object arg) 362 { 363 return encodeForUri(arg, 'I'); 364 } 365 366 public static String escapeHtmlUri (Object arg) 367 { 368 return encodeForUri(arg, 'H'); 369 } 370 371 static String encodeForUri (Object arg, char mode) 372 { 373 StringBuffer sbuf = new StringBuffer (); 374 String str; 375 if (arg instanceof String || arg instanceof UntypedAtomic) 376 str = arg.toString(); 377 else if (arg == null || arg == Values.empty) 378 str = ""; 379 else 380 throw new ClassCastException (); 381 int len = str.length(); 382 for (int i = 0; i <len; i++) 383 { 384 int ch = str.charAt(i); 385 if (mode == 'H' ? ch >= 32 && ch <= 126 387 : ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') 388 || (ch >= '0' && ch <= '9') 389 || ch == '-' || ch == '_' || ch == '.' || ch == '~' 390 || (mode == 'I' 391 && (ch == ';' || ch == '/' || ch == '?' || ch == ':' 392 || ch == '*' || ch == '\'' || ch == '(' || ch == ')' 393 || ch == '@' || ch == '&' || ch == '=' || ch == '+' 394 || ch == '$' || ch == ',' || ch == '[' || ch == ']' 395 || ch == '#' || ch == '!' || ch == '%')))) 396 sbuf.append((char) ch); 397 else 398 { 399 int pos = sbuf.length(); 400 int nbytes = 0; 401 int needed = ch < (1 << 7) ? 1 402 : ch < (1 << 11) ? 2 403 : ch < (1 << 16) ? 3 404 : 4; 405 do 406 { 407 int availbits = nbytes == 0 ? 7 : 6 - nbytes; 409 int b; 410 if (ch < (1 << availbits)) 411 { 412 b = ch; 414 if (nbytes > 0) 415 b |= (0xff80 >> nbytes) & 0xff; 416 ch = 0; 417 } 418 else 419 { 420 b = 0x80 | (ch & 0x3f); 421 ch >>= 6; 422 } 423 nbytes++; 424 for (int j = 0; j <= 1; j++) 425 { 426 int hex = b & 15; 427 sbuf.insert(pos, 428 (char) (hex <= 9 ? hex + '0' : hex - 10 + 'A')); 429 b >>= 4; 430 } 431 sbuf.insert(pos, '%'); 432 } 433 while (ch != 0); 434 } 435 } 436 return sbuf.toString(); 437 } 438 439 public static String normalizeSpace (Object arg) 440 { 441 String str = coerceToString(arg, "normalize-space", 1, ""); 442 int len = str.length(); 443 StringBuffer sbuf = null; 444 int skipped = 0; 445 for (int i = 0; i < len; i++) 446 { 447 char ch = str.charAt(i); 448 if (Character.isWhitespace(ch)) 449 { 450 if (sbuf == null && skipped == 0 && i > 0) 451 sbuf = new StringBuffer (str.substring(0, i)); 452 skipped++; 453 } 454 else 455 { 456 if (skipped > 0) 457 { 458 if (sbuf != null) 459 sbuf.append(' '); 460 else if (skipped > 1 || i == 1 || str.charAt(i-1) != ' ') 461 sbuf = new StringBuffer (); 462 skipped = 0; 463 } 464 if (sbuf != null) 465 sbuf.append(ch); 466 } 467 } 468 return sbuf != null ? sbuf.toString() : skipped > 0 ? "" : str; 469 } 470 471 472 public static Pattern makePattern (String pattern, String flags) 473 { 474 int fl = 0; 475 for (int i = flags.length(); --i >= 0; ) 476 { 477 char ch = flags.charAt(i); 478 switch (ch) 479 { 480 case 'i': 481 fl |= Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE; 482 break; 483 case 's': 484 fl |= Pattern.DOTALL; 485 break; 486 case 'x': 487 StringBuffer sbuf = new StringBuffer (); 488 int plen = pattern.length(); 489 for (int j = 0; j < plen; j++) 490 { 491 char pch = pattern.charAt(j); 492 if (! Character.isWhitespace(pch)) 493 sbuf.append(pch); 494 } 495 pattern = sbuf.toString(); 496 break; 497 case 'm': 498 fl |= Pattern.MULTILINE; 499 break; 500 default: 501 throw new IllegalArgumentException ("unknown 'replace' flag"); 502 } 503 } 504 return Pattern.compile(pattern, fl); 505 } 506 507 508 public static boolean matches (Object input, String pattern) 509 { 510 return matches(input, pattern, ""); 511 } 512 513 public static boolean matches (Object arg, String pattern, String flags) 514 { 515 516 String str; 517 if (arg instanceof String || arg instanceof UntypedAtomic) 518 str = arg.toString(); 519 else if (arg == null || arg == Values.empty) 520 str = ""; 521 else 522 throw new ClassCastException (); 523 return makePattern(pattern, flags).matcher(str).find(); 524 525 527 } 528 529 public static String replace (Object input, String pattern, 530 String replacement) 531 { 532 return replace(input, pattern, replacement, ""); 533 } 534 535 public static String replace (Object arg, String pattern, 536 String replacement, String flags) 537 { 538 539 String str; 540 if (arg instanceof String || arg instanceof UntypedAtomic) 541 str = arg.toString(); 542 else if (arg == null || arg == Values.empty) 543 str = ""; 544 else 545 throw new ClassCastException (); 546 return makePattern(pattern, flags).matcher(str).replaceAll(replacement); 547 548 550 } 551 552 public static void tokenize$X (Object arg, String pattern, CallContext ctx) 553 { 554 tokenize$X(arg, pattern, "", ctx); 555 } 556 557 public static void tokenize$X (Object arg, String pattern, 558 String flags, CallContext ctx) 559 { 560 561 String str; 562 if (arg instanceof String || arg instanceof UntypedAtomic) 563 str = arg.toString(); 564 else if (arg == null || arg == Values.empty) 565 str = ""; 566 else 567 throw new ClassCastException (); 568 Consumer out = ctx.consumer; 569 Matcher matcher = makePattern(pattern, flags).matcher(str); 570 int len = str.length(); 571 if (len == 0) 572 return; 573 int start = 0; 574 for (;;) 575 { 576 boolean matched = matcher.find(); 577 if (! matched) 578 { 579 out.writeObject(str.substring(start)); 580 break; 581 } 582 int end = matcher.start(); 583 out.writeObject(str.substring(start, end)); 584 start = matcher.end(); 585 if (start == end) 586 throw new IllegalArgumentException ("pattern matches empty string"); 587 } 588 589 591 } 592 593 public static Object codepointEqual (Object arg1, Object arg2) 594 { 595 String str1 = coerceToString(arg1, "codepoint-equal", 1, null); 596 String str2 = coerceToString(arg2, "codepoint-equal", 2, null); 597 if (str1 == null || str2 == null) 598 return Values.empty; 599 return str1.equals(str2) ? Boolean.TRUE : Boolean.FALSE; 600 } 601 602 public static Object normalizeUnicode (Object arg) 603 { 604 return normalizeUnicode(arg, "NFC"); 605 } 606 607 public static Object normalizeUnicode (Object arg, String form) 608 { 609 String str = coerceToString(arg, "normalize-unicode", 1, ""); 610 form = form.trim().toUpperCase(); 611 if ("".equals(form)) 612 return str; 613 614 627 throw AbstractSequence.unsupportedException("normalize-unicode form "+form); 628 629 } 630 } 631 | Popular Tags |