1 31 32 package org.opencms.i18n; 33 34 import org.opencms.main.CmsLog; 35 import org.opencms.main.OpenCms; 36 import org.opencms.util.CmsStringUtil; 37 38 import java.io.UnsupportedEncodingException ; 39 import java.net.URLDecoder ; 40 import java.net.URLEncoder ; 41 import java.nio.CharBuffer ; 42 import java.nio.charset.Charset ; 43 import java.nio.charset.CharsetEncoder ; 44 import java.util.HashMap ; 45 import java.util.Map ; 46 import java.util.regex.Matcher ; 47 import java.util.regex.Pattern ; 48 49 import org.apache.commons.logging.Log; 50 51 72 public final class CmsEncoder { 73 74 75 public static final String ENCODING_ISO_8859_1 = "ISO-8859-1"; 76 77 78 public static final String ENCODING_US_ASCII = "US-ASCII"; 79 80 85 public static final String ENCODING_UTF_8 = "UTF-8"; 86 87 88 private static final Pattern ENTITIY_PATTERN = Pattern.compile("\\&#\\d+;"); 89 90 91 private static final String ENTITY_PREFIX = "&#"; 92 93 94 private static final String ENTITY_REPLACEMENT = "$$"; 95 96 97 private static final Log LOG = CmsLog.getLog(CmsEncoder.class); 98 99 100 private static Map m_encodingCache = new HashMap (16); 101 102 103 private static final String PLUS_ENTITY = ENTITY_PREFIX + "043;"; 104 105 108 private CmsEncoder() { 109 110 } 112 113 125 public static String adjustHtmlEncoding(String input, String encoding) { 126 127 return encodeHtmlEntities(decodeHtmlEntities(input, encoding), encoding); 128 } 129 130 138 public static byte[] changeEncoding(byte[] input, String oldEncoding, String newEncoding) { 139 140 if ((oldEncoding == null) || (newEncoding == null)) { 141 return input; 142 } 143 if (oldEncoding.trim().equalsIgnoreCase(newEncoding.trim())) { 144 return input; 145 } 146 byte[] result = input; 147 try { 148 result = (new String (input, oldEncoding)).getBytes(newEncoding); 149 } catch (UnsupportedEncodingException e) { 150 } 152 return result; 153 } 154 155 166 public static String createString(byte[] bytes, String encoding) { 167 168 if (encoding.intern() != OpenCms.getSystemInfo().getDefaultEncoding()) { 169 encoding = lookupEncoding(encoding, null); 170 } 171 if (encoding != null) { 172 try { 173 return new String (bytes, encoding); 174 } catch (UnsupportedEncodingException e) { 175 } 177 } else { 178 if (LOG.isWarnEnabled()) { 179 LOG.warn(Messages.get().getBundle().key(Messages.ERR_UNSUPPORTED_VM_ENCODING_1, encoding)); 180 } 181 encoding = OpenCms.getSystemInfo().getDefaultEncoding(); 182 try { 183 return new String (bytes, encoding); 184 } catch (UnsupportedEncodingException e) { 185 } 187 } 188 LOG.error(Messages.get().getBundle().key(Messages.ERR_ENCODING_ISSUES_1, encoding)); 190 return null; 191 } 192 193 200 public static String decode(String source) { 201 202 return decode(source, ENCODING_UTF_8); 203 } 204 205 218 public static String decode(String source, String encoding) { 219 220 if (source == null) { 221 return null; 222 } 223 if (encoding != null) { 224 try { 225 return URLDecoder.decode(source, encoding); 226 } catch (java.io.UnsupportedEncodingException e) { 227 } 229 } 230 try { 232 return URLDecoder.decode(source, ENCODING_UTF_8); 233 } catch (java.io.UnsupportedEncodingException e) { 234 } 236 return source; 237 } 238 239 249 public static String decodeHtmlEntities(String input, String encoding) { 250 251 Matcher matcher = ENTITIY_PATTERN.matcher(input); 252 StringBuffer result = new StringBuffer (input.length()); 253 Charset charset = Charset.forName(encoding); 254 CharsetEncoder encoder = charset.newEncoder(); 255 256 while (matcher.find()) { 257 String entity = matcher.group(); 258 String value = entity.substring(2, entity.length() - 1); 259 int c = Integer.valueOf(value).intValue(); 260 if (c < 128) { 261 entity = new String (new char[] {(char)c}); 263 } else if (encoder.canEncode((char)c)) { 266 entity = new String (new char[] {(char)c}); 268 } 269 matcher.appendReplacement(result, entity); 270 } 271 matcher.appendTail(result); 272 return result.toString(); 273 } 274 275 282 public static String decodeParameter(String input) { 283 284 String result = CmsStringUtil.substitute(input, ENTITY_REPLACEMENT, ENTITY_PREFIX); 285 return CmsEncoder.decodeHtmlEntities(result, OpenCms.getSystemInfo().getDefaultEncoding()); 286 } 287 288 295 public static String encode(String source) { 296 297 return encode(source, ENCODING_UTF_8); 298 } 299 300 313 public static String encode(String source, String encoding) { 314 315 if (source == null) { 316 return null; 317 } 318 if (encoding != null) { 319 try { 320 return URLEncoder.encode(source, encoding); 321 } catch (java.io.UnsupportedEncodingException e) { 322 } 324 } 325 try { 327 return URLEncoder.encode(source, ENCODING_UTF_8); 328 } catch (java.io.UnsupportedEncodingException e) { 329 } 331 return source; 332 } 333 334 348 public static String encodeHtmlEntities(String input, String encoding) { 349 350 StringBuffer result = new StringBuffer (input.length() * 2); 351 CharBuffer buffer = CharBuffer.wrap(input.toCharArray()); 352 Charset charset = Charset.forName(encoding); 353 CharsetEncoder encoder = charset.newEncoder(); 354 for (int i = 0; i < buffer.length(); i++) { 355 int c = buffer.get(i); 356 if (c < 128) { 357 result.append((char)c); 359 } else if (encoder.canEncode((char)c)) { 362 result.append((char)c); 364 } else { 365 result.append(ENTITY_PREFIX); 367 result.append(c); 368 result.append(";"); 369 } 370 } 371 return result.toString(); 372 } 373 374 385 public static String encodeParameter(String input) { 386 387 String result = CmsEncoder.encodeHtmlEntities(input, CmsEncoder.ENCODING_US_ASCII); 388 result = CmsStringUtil.substitute(result, "+", PLUS_ENTITY); 389 return CmsStringUtil.substitute(result, ENTITY_PREFIX, ENTITY_REPLACEMENT); 390 } 391 392 399 public static String escape(String source, String encoding) { 400 401 return CmsStringUtil.substitute(encode(source, encoding), "+", "%20"); 403 } 404 405 417 public static String escapeHtml(String source) { 418 419 int terminatorIndex; 420 if (source == null) { 421 return null; 422 } 423 StringBuffer result = new StringBuffer (source.length() * 2); 424 for (int i = 0; i < source.length(); i++) { 425 int ch = source.charAt(i); 426 if (ch == 38) { 428 terminatorIndex = source.indexOf(";", i); 429 if (terminatorIndex > 0) { 430 if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+|lt|gt|amp|quote")) { 431 result.append(source.substring(i, terminatorIndex + 1)); 432 i = terminatorIndex; 434 continue; 435 } 436 } 437 } 438 if ((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62))) { 439 result.append(ENTITY_PREFIX); 440 result.append(ch); 441 result.append(";"); 442 } else { 443 result.append((char)ch); 444 } 445 } 446 return new String (result); 447 } 448 449 461 public static String escapeNonAscii(String source) { 462 463 if (source == null) { 464 return null; 465 } 466 StringBuffer result = new StringBuffer (source.length() * 2); 467 for (int i = 0; i < source.length(); i++) { 468 int ch = source.charAt(i); 469 if (ch > 255) { 470 result.append(ENTITY_PREFIX); 471 result.append(ch); 472 result.append(";"); 473 } else { 474 result.append((char)ch); 475 } 476 } 477 return new String (result); 478 } 479 480 488 public static String escapeWBlanks(String source, String encoding) { 489 490 if (CmsStringUtil.isEmpty(source)) { 491 return source; 492 } 493 StringBuffer ret = new StringBuffer (source.length() * 2); 494 495 499 String enc = encode(source, encoding); 500 for (int z = 0; z < enc.length(); z++) { 501 char c = enc.charAt(z); 502 if (c == '+') { 503 ret.append("%20"); 504 } else { 505 ret.append(c); 506 } 507 } 508 return ret.toString(); 509 } 510 511 528 public static String escapeXml(String source) { 529 530 return escapeXml(source, false); 531 } 532 533 552 public static String escapeXml(String source, boolean doubleEscape) { 553 554 if (source == null) { 555 return null; 556 } 557 StringBuffer result = new StringBuffer (source.length() * 2); 558 559 for (int i = 0; i < source.length(); ++i) { 560 char ch = source.charAt(i); 561 switch (ch) { 562 case '<': 563 result.append("<"); 564 break; 565 case '>': 566 result.append(">"); 567 break; 568 case '&': 569 if (!doubleEscape) { 571 int terminatorIndex = source.indexOf(";", i); 572 if (terminatorIndex > 0) { 573 if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+")) { 574 result.append(ch); 575 break; 576 } 577 } 578 } 579 result.append("&"); 581 break; 582 case '"': 583 result.append("""); 584 break; 585 default: 586 result.append(ch); 587 } 588 } 589 return new String (result); 590 } 591 592 609 public static String lookupEncoding(String encoding, String fallback) { 610 611 String result = (String )m_encodingCache.get(encoding); 612 if (result != null) { 613 return result; 614 } 615 616 try { 617 result = Charset.forName(encoding).name(); 618 m_encodingCache.put(encoding, result); 619 return result; 620 } catch (Throwable t) { 621 } 623 624 return fallback; 625 } 626 627 637 public static String redecodeUriComponent(String input) { 638 639 if (input == null) { 640 return input; 641 } 642 return new String ( 643 changeEncoding(input.getBytes(), ENCODING_UTF_8, OpenCms.getSystemInfo().getDefaultEncoding())); 644 } 645 646 654 public static String unescape(String source, String encoding) { 655 656 if (source == null) { 657 return null; 658 } 659 int len = source.length(); 660 StringBuffer preparedSource = new StringBuffer (len); 662 for (int i = 0; i < len; i++) { 663 char c = source.charAt(i); 664 if (c == '+') { 665 preparedSource.append("%20"); 666 } else { 667 preparedSource.append(c); 668 } 669 } 670 return decode(preparedSource.toString(), encoding); 671 } 672 } | Popular Tags |