1 29 30 package com.caucho.vfs; 31 32 import com.caucho.util.CharBuffer; 33 import com.caucho.vfs.i18n.EncodingReader; 34 import com.caucho.vfs.i18n.EncodingWriter; 35 import com.caucho.vfs.i18n.ISO8859_1Writer; 36 import com.caucho.vfs.i18n.JDKReader; 37 import com.caucho.vfs.i18n.JDKWriter; 38 39 import java.io.InputStream ; 40 import java.io.Reader ; 41 import java.io.UnsupportedEncodingException ; 42 import java.util.HashMap ; 43 import java.util.Hashtable ; 44 import java.util.Locale ; 45 46 49 public class Encoding { 50 static HashMap <String ,String > _javaName; 51 static Hashtable <String ,String > _mimeName; 52 static HashMap <String ,String > _localeName; 53 54 static final HashMap <String ,EncodingReader> _readEncodingFactories = 56 new HashMap <String ,EncodingReader>(); 57 58 static final HashMap <String ,EncodingWriter> _writeEncodingFactories = 60 new HashMap <String ,EncodingWriter>(); 61 62 static final EncodingWriter _latin1Writer = new ISO8859_1Writer(); 63 64 67 private Encoding() {} 68 69 76 public static String getMimeName(String encoding) 77 { 78 if (encoding == null) 79 return null; 80 81 String value = _mimeName.get(encoding); 82 if (value != null) 83 return value; 84 85 String upper = normalize(encoding); 86 87 String lookup = _mimeName.get(upper); 88 89 value = lookup == null ? upper : lookup; 90 91 _mimeName.put(encoding, value); 92 93 return value; 94 } 95 96 103 public static String getMimeName(Locale locale) 104 { 105 if (locale == null) 106 return "ISO-8859-1"; 107 108 String mimeName = _localeName.get(locale.toString()); 109 if (mimeName == null) 110 mimeName = _localeName.get(locale.getLanguage()); 111 112 if (mimeName == null) 113 return "ISO-8859-1"; 114 else 115 return mimeName; 116 } 117 118 127 public static Reader getReadEncoding(InputStream is, String encoding) 128 throws UnsupportedEncodingException 129 { 130 EncodingReader factory = null; 131 132 synchronized (_readEncodingFactories) { 133 factory = _readEncodingFactories.get(encoding); 134 135 if (factory == null) { 136 try { 137 String javaEncoding = Encoding.getJavaName(encoding); 138 139 if (javaEncoding == null) 140 javaEncoding = "ISO8859_1"; 141 142 String className = "com.caucho.vfs.i18n." + javaEncoding + "Reader"; 143 144 Class cl = Class.forName(className); 145 146 factory = (EncodingReader) cl.newInstance(); 147 factory.setJavaEncoding(javaEncoding); 148 } catch (Throwable e) { 149 } 150 151 if (factory == null) { 152 String javaEncoding = Encoding.getJavaName(encoding); 153 154 if (javaEncoding == null) 155 javaEncoding = "ISO8859_1"; 156 157 factory = new JDKReader(); 158 factory.setJavaEncoding(javaEncoding); 159 } 160 161 _readEncodingFactories.put(encoding, factory); 162 } 163 } 164 165 return factory.create(is, factory.getJavaEncoding()); 166 } 167 168 175 public static EncodingWriter getWriteEncoding(String encoding) 176 { 177 EncodingWriter factory = _writeEncodingFactories.get(encoding); 178 179 if (factory != null) 180 return factory.create(); 181 182 synchronized (_writeEncodingFactories) { 183 factory = _writeEncodingFactories.get(encoding); 184 185 if (factory == null) { 186 try { 187 String javaEncoding = Encoding.getJavaName(encoding); 188 189 if (javaEncoding == null) 190 javaEncoding = "ISO8859_1"; 191 192 String className = "com.caucho.vfs.i18n." + javaEncoding + "Writer"; 193 194 Class cl = Class.forName(className); 195 196 factory = (EncodingWriter) cl.newInstance(); 197 factory.setJavaEncoding(javaEncoding); 198 } catch (Throwable e) { 199 } 200 201 if (factory == null) { 202 factory = new JDKWriter(); 203 String javaEncoding = Encoding.getJavaName(encoding); 204 205 if (javaEncoding == null) 206 javaEncoding = "ISO8859_1"; 207 factory.setJavaEncoding(javaEncoding); 208 } 209 210 _writeEncodingFactories.put(encoding, factory); 211 } 212 } 213 214 return factory.create(encoding); 217 } 218 219 222 public static EncodingWriter getLatin1Writer() 223 { 224 return _latin1Writer; 225 } 226 227 234 public static String getJavaName(String encoding) 235 { 236 if (encoding == null) 237 return null; 238 239 String upper = normalize(encoding); 240 241 String javaName = null; 242 243 javaName = _javaName.get(upper); 244 if (javaName != null) 245 return javaName; 246 247 String lookup = _mimeName.get(upper); 248 249 if (lookup != null) 250 javaName = _javaName.get(lookup); 251 252 return javaName == null ? upper : javaName; 253 } 254 255 262 public static String getJavaName(Locale locale) 263 { 264 if (locale == null) 265 return null; 266 267 return getJavaName(getMimeName(locale)); 268 } 269 270 273 private static String normalize(String name) 274 { 275 CharBuffer cb = CharBuffer.allocate(); 276 277 int len = name.length(); 278 for (int i = 0; i < len; i++) { 279 char ch = name.charAt(i); 280 281 if (Character.isLowerCase(ch)) 282 cb.append(Character.toUpperCase(ch)); 283 else if (ch == '_') 284 cb.append('-'); 285 else 286 cb.append(ch); 287 } 288 289 return cb.close(); 290 } 291 292 293 static { 294 _javaName = new HashMap <String ,String >(); 295 _mimeName = new Hashtable <String ,String >(); 296 _localeName = new HashMap <String ,String >(); 297 298 _mimeName.put("ANSI-X3.4-1968", "US-ASCII"); 299 _mimeName.put("ISO-IR-6", "US-ASCII"); 300 _mimeName.put("ISO-646.IRV:1991", "US-ASCII"); 301 _mimeName.put("ASCII", "US-ASCII"); 302 _mimeName.put("ISO646-US", "US-ASCII"); 303 _mimeName.put("US-ASCII", "US-ASCII"); 304 _mimeName.put("us", "US-ASCII"); 305 _mimeName.put("IBM367", "US-ASCII"); 306 _mimeName.put("CP367", "US-ASCII"); 307 _mimeName.put("CSASCII", "US-ASCII"); 308 _javaName.put("US-ASCII", "ISO8859_1"); 309 310 _mimeName.put("ISO-2022-KR", "ISO-2022-KR"); 311 _mimeName.put("CSISO2022KR", "ISO-2022-KR"); 312 _mimeName.put("ISO2022-KR", "ISO-2022-KR"); 313 _javaName.put("ISO-2022-KR", "ISO2022_KR"); 314 315 _mimeName.put("EUC-KR", "EUC-KR"); 316 _mimeName.put("CSEUCKR", "EUC-KR"); 317 _javaName.put("EUC-KR", "EUC_KR"); 318 319 _mimeName.put("ISO-2022-JP", "ISO-2022-JP"); 320 _mimeName.put("CSISO2022JP", "ISO-2022-JP"); 321 _mimeName.put("ISO2022-JP", "ISO-2022-JP"); 322 _javaName.put("ISO-2022-JP", "ISO2022JP"); 323 324 _mimeName.put("ISO-2022-JP-2", "ISO-2022-JP-2"); 325 _mimeName.put("CSISO2022JP2", "ISO-2022-JP-2"); 326 _mimeName.put("ISO2022-JP2", "ISO-2022-JP-2"); 327 _javaName.put("ISO-2022-JP-2", "ISO2022_JP2"); 328 329 _mimeName.put("ISO_8859-1:1987", "ISO-8859-1"); 330 _mimeName.put("ISO-IR-100", "ISO-8859-1"); 331 _mimeName.put("ISO-8859-1", "ISO-8859-1"); 332 _mimeName.put("LATIN1", "ISO-8859-1"); 333 _mimeName.put("LATIN-1", "ISO-8859-1"); 334 _mimeName.put("L1", "ISO-8859-1"); 335 _mimeName.put("IBM819", "ISO-8859-1"); 336 _mimeName.put("CP819", "ISO-8859-1"); 337 _mimeName.put("CSISOLATIN1", "ISO-8859-1"); 338 _mimeName.put("ISO8859-1", "ISO-8859-1"); 339 _mimeName.put("8859-1", "ISO-8859-1"); 340 _mimeName.put("8859_1", "ISO-8859-1"); 341 _javaName.put("ISO-8859-1", "ISO8859_1"); 342 343 _mimeName.put("ISO-8859-2:1987", "ISO-8859-2"); 344 _mimeName.put("ISO-IR-101", "ISO-8859-2"); 345 _mimeName.put("ISO-8859-2", "ISO-8859-2"); 346 _mimeName.put("LATIN2", "ISO-8859-2"); 347 _mimeName.put("LATIN-2", "ISO-8859-2"); 348 _mimeName.put("L2", "ISO-8859-2"); 349 _mimeName.put("CSISOLATIN2", "ISO-8859-2"); 350 _mimeName.put("ISO8859-2", "ISO-8859-2"); 351 _javaName.put("ISO-8859-2", "ISO8859_2"); 352 353 _mimeName.put("ISO-8859-3:1988", "ISO-8859-3"); 354 _mimeName.put("ISO-IR-109", "ISO-8859-3"); 355 _mimeName.put("ISO-8859-3", "ISO-8859-3"); 356 _mimeName.put("ISO-8859-3", "ISO-8859-3"); 357 _mimeName.put("LATIN3", "ISO-8859-3"); 358 _mimeName.put("LATIN-3", "ISO-8859-3"); 359 _mimeName.put("L3", "ISO-8859-3"); 360 _mimeName.put("CSISOLATIN3", "ISO-8859-3"); 361 _mimeName.put("ISO8859-3", "ISO-8859-3"); 362 _javaName.put("ISO-8859-3", "ISO8859_3"); 363 364 _mimeName.put("ISO-8859-4:1988", "ISO-8859-4"); 365 _mimeName.put("ISO-IR-110", "ISO-8859-4"); 366 _mimeName.put("ISO-8859-4", "ISO-8859-4"); 367 _mimeName.put("ISO-8859-4", "ISO-8859-4"); 368 _mimeName.put("LATIN4", "ISO-8859-4"); 369 _mimeName.put("LATIN-4", "ISO-8859-4"); 370 _mimeName.put("L4", "ISO-8859-4"); 371 _mimeName.put("CSISOLATIN4", "ISO-8859-4"); 372 _mimeName.put("ISO8859-4", "ISO-8859-4"); 373 _javaName.put("ISO-8859-4", "ISO8859_4"); 374 375 _mimeName.put("ISO-8859-5:1988", "ISO-8859-5"); 376 _mimeName.put("ISO-IR-144", "ISO-8859-5"); 377 _mimeName.put("ISO-8859-5", "ISO-8859-5"); 378 _mimeName.put("ISO-8859-5", "ISO-8859-5"); 379 _mimeName.put("CYRILLIC", "ISO-8859-5"); 380 _mimeName.put("CSISOLATINCYRILLIC", "ISO-8859-5"); 381 _mimeName.put("ISO8859-5", "ISO-8859-5"); 382 _javaName.put("ISO-8859-5", "ISO8859_5"); 383 384 _mimeName.put("ISO-8859-6:1987", "ISO-8859-6"); 385 _mimeName.put("ISO-IR-127", "ISO-8859-6"); 386 _mimeName.put("ISO-8859-6", "ISO-8859-6"); 387 _mimeName.put("ISO-8859-6", "ISO-8859-6"); 388 _mimeName.put("ECMA-114", "ISO-8859-6"); 389 _mimeName.put("ASMO-708", "ISO-8859-6"); 390 _mimeName.put("ARABIC", "ISO-8859-6"); 391 _mimeName.put("CSISOLATINARABIC", "ISO-8859-6"); 392 _mimeName.put("ISO8859-6", "ISO-8859-6"); 393 _javaName.put("ISO-8859-6", "ISO8859_6"); 394 395 _mimeName.put("ISO-8859-7:1987", "ISO-8859-7"); 396 _mimeName.put("ISO-IR-126", "ISO-8859-7"); 397 _mimeName.put("ISO-8859-7", "ISO-8859-7"); 398 _mimeName.put("ISO-8859-7", "ISO-8859-7"); 399 _mimeName.put("ELOT-928", "ISO-8859-7"); 400 _mimeName.put("ECMA-118", "ISO-8859-7"); 401 _mimeName.put("GREEK", "ISO-8859-7"); 402 _mimeName.put("GREEK8", "ISO-8859-7"); 403 _mimeName.put("CSISOLATINGREEN", "ISO-8859-7"); 404 _mimeName.put("ISO8859-7", "ISO-8859-7"); 405 _javaName.put("ISO-8859-7", "ISO8859_7"); 406 407 _mimeName.put("ISO-8859-8:1988", "ISO-8859-8"); 408 _mimeName.put("ISO-IR-138", "ISO-8859-8"); 409 _mimeName.put("ISO-8859-8", "ISO-8859-8"); 410 _mimeName.put("ISO-8859-8", "ISO-8859-8"); 411 _mimeName.put("HEBREW", "ISO-8859-8"); 412 _mimeName.put("CSISOLATINHEBREW", "ISO-8859-8"); 413 _mimeName.put("ISO8859-8", "ISO-8859-8"); 414 _javaName.put("ISO-8859-8", "ISO8859_8"); 415 416 _mimeName.put("ISO-8859-9:1989", "ISO-8859-9"); 417 _mimeName.put("ISO-IR-148", "ISO-8859-9"); 418 _mimeName.put("ISO-8859-9", "ISO-8859-9"); 419 _mimeName.put("ISO-8859-9", "ISO-8859-9"); 420 _mimeName.put("LATIN5", "ISO-8859-9"); 421 _mimeName.put("LATIN-5", "ISO-8859-9"); 422 _mimeName.put("L5", "ISO-8859-9"); 423 _mimeName.put("CSISOLATIN5", "ISO-8859-9"); 424 _mimeName.put("ISO8859-9", "ISO-8859-9"); 425 _javaName.put("ISO-8859-9", "ISO8859_9"); 426 427 _mimeName.put("ISO_8859-10:1992", "ISO-8859-10"); 428 _mimeName.put("iso-ir-157", "ISO-8859-10"); 429 _mimeName.put("I6", "ISO-8859-10"); 430 _mimeName.put("cslSOLatin6", "ISO-8859-10"); 431 _mimeName.put("latin6", "ISO-8859-10"); 432 _javaName.put("ISO-8859-10", "ISO8859_10"); 433 434 _mimeName.put("UTF-7", "UTF-7"); 435 _mimeName.put("UTF7", "UTF-7"); 436 _javaName.put("UTF-7", "UTF7"); 437 438 _mimeName.put("UTF-8", "UTF-8"); 439 _mimeName.put("UTF8", "UTF-8"); 440 _javaName.put("UTF-8", "UTF8"); 441 442 _mimeName.put("UTF-16", "UTF-16"); 443 _mimeName.put("UTF16", "UTF-16"); 444 _javaName.put("UTF-16", "UTF16"); 445 446 _mimeName.put("UTF-16-REV", "UTF-16-REV"); 447 _mimeName.put("UTF16-REV", "UTF-16-REV"); 448 _javaName.put("UTF-16-REV", "UTF16_REV"); 449 450 _mimeName.put("JIS-ENCODING", "JIS_Encoding"); 451 _mimeName.put("JIS-ENCODING", "JIS_Encoding"); 452 _mimeName.put("CSJISENCODING", "JIS_Encoding"); 453 _javaName.put("JIS_Encoding", "JIS_ENCODING"); 454 455 _mimeName.put("SHIFT-JIS", "Shift_JIS"); 456 _mimeName.put("SHIFT_JIS", "Shift_JIS"); 457 _mimeName.put("CSSHIFTJIS", "Shift_JIS"); 458 _mimeName.put("SJIS", "Shift_JIS"); 459 _javaName.put("Shift_JIS", "SJIS"); 460 461 _mimeName.put("EUC-JP", "EUC-JP"); 462 _mimeName.put("EUC-JP", "EUC-JP"); 463 _mimeName.put("EUCJP", "EUC-JP"); 464 _mimeName.put("EUC-JP-LINUX", "EUC-JP"); 465 _javaName.put("EUC-JP", "EUC_JP"); 466 467 _mimeName.put("GB2312", "GB2312"); 468 _mimeName.put("CSGB2312", "GB2312"); 469 _javaName.put("GB2312", "GB2312"); 470 471 _mimeName.put("GBK", "GBK"); 472 _javaName.put("GBK", "GBK"); 473 474 _mimeName.put("BIG5", "Big5"); 475 _mimeName.put("BIG-5", "Big5"); 476 _mimeName.put("CSBIG5", "Big5"); 477 _javaName.put("Big5", "BIG5"); 478 479 _mimeName.put("KOI8-R", "KOI8-R"); 480 _mimeName.put("KOI-8-R", "KOI8-R"); 481 _mimeName.put("KOI8-R", "KOI8-R"); 482 _javaName.put("KOI8-R", "KOI8-R"); 483 484 _mimeName.put("MS950", "ms950"); 485 _javaName.put("ms950", "MS950"); 486 487 _javaName.put("JAVA", "JAVA"); 488 489 _mimeName.put("windows-hack", "ISO-8859-1"); 490 _mimeName.put("WINDOWS-HACK", "ISO-8859-1"); 491 _javaName.put("WINDOWS-HACK", "WindowsHack"); 492 493 _mimeName.put("MACROMAN", "MacRoman"); 494 _javaName.put("MacRoman", "MacRoman"); 495 496 _mimeName.put("KS_C_5601-1987", "ks_c_5601-1987"); 497 _javaName.put("ks_c_5601-1987", "Cp949"); 498 499 _javaName.put("IBM500", "Cp500"); 500 501 String []cp = new String [] { 502 "037", "1006", "1025", "1026", "1046", "1097", 503 "1098", "1112", "1122", "1123", "1124", "1250", 504 "1251", "1252", "1253", "1254", "1255", "1256", 505 "1257", "1258", "1381", "273", "277", "278", "280", "284", 506 "285", "297", "33722", "420", "424", "437", "500", "737", 507 "775", "838", "850", "852", "855", "857", "860", "861", "862", 508 "863", "864", "865", "866", "868", "869", "870", "871", "874", 509 "875", "918", "921", "922", "930", "933", "935", "937", "939", 510 "942", "948", "949", "964", "970" 511 }; 512 513 for (int i = 0; i < cp.length; i++) { 514 _mimeName.put("CP" + cp[i], "windows-" + cp[i]); 515 _mimeName.put("WINDOWS-" + cp[i], "windows-" + cp[i]); 516 _javaName.put("windows-" + cp[i], "Cp" + cp[i]); 517 } 518 519 _localeName = new HashMap <String ,String >(); 521 _localeName.put("af", "ISO-8859-1"); 522 _localeName.put("sq", "ISO-8859-1"); 523 _localeName.put("ar", "ISO-8859-6"); 524 _localeName.put("eu", "ISO-8859-1"); 525 _localeName.put("bg", "ISO-8859-5"); 526 _localeName.put("be", "ISO-8859-5"); 527 _localeName.put("ca", "ISO-8859-1"); 528 _localeName.put("hr", "ISO-8859-2"); 529 _localeName.put("cs", "ISO-8859-2"); 530 _localeName.put("da", "ISO-8859-1"); 531 _localeName.put("nl", "ISO-8859-1"); 532 _localeName.put("en", "ISO-8859-1"); 533 _localeName.put("eo", "ISO-8859-3"); 534 _localeName.put("et", "ISO-8859-10"); 535 _localeName.put("fo", "ISO-8859-1"); 536 _localeName.put("fi", "ISO-8859-1"); 537 _localeName.put("fr", "ISO-8859-1"); 538 _localeName.put("gl", "ISO-8859-1"); 539 _localeName.put("de", "ISO-8859-1"); 540 _localeName.put("el", "ISO-8859-7"); 541 _localeName.put("iw", "ISO-8859-8"); 542 _localeName.put("hu", "ISO-8859-2"); 543 _localeName.put("is", "ISO-8859-1"); 544 _localeName.put("ga", "ISO-8859-1"); 545 _localeName.put("it", "ISO-8859-1"); 546 _localeName.put("ja", "Shift_JIS"); 547 _localeName.put("lv", "ISO-8859-10"); 548 _localeName.put("lt", "ISO-8859-10"); 549 _localeName.put("mk", "ISO-8859-5"); 550 _localeName.put("mt", "ISO-8859-3"); 551 _localeName.put("no", "ISO-8859-1"); 552 _localeName.put("pl", "ISO-8859-2"); 553 _localeName.put("pt", "ISO-8859-1"); 554 _localeName.put("ro", "ISO-8859-2"); 555 _localeName.put("ru", "ISO-8859-5"); 557 _localeName.put("gd", "ISO-8859-1"); 558 _localeName.put("sr", "ISO-8859-5"); 559 _localeName.put("sk", "ISO-8859-2"); 560 _localeName.put("sl", "ISO-8859-2"); 561 _localeName.put("es", "ISO-8859-1"); 562 _localeName.put("sv", "ISO-8859-1"); 563 _localeName.put("tr", "ISO-8859-9"); 564 _localeName.put("uk", "ISO-8859-5"); 565 566 _localeName.put("ko", "EUC-KR"); 567 _localeName.put("zh", "GB2312"); 568 _localeName.put("zh_TW", "Big5"); 569 } 570 } 571 | Popular Tags |