1 54 package org.w3c.tidy; 55 56 import java.util.HashMap ; 57 import java.util.Map ; 58 59 60 66 public abstract class EncodingNameMapper 67 { 68 69 72 private static Map encodingNameMap = new HashMap (); 73 74 static 75 { 76 encodingNameMap.put("ISO-8859-1", new String []{"ISO-8859-1", "ISO8859_1"}); 77 encodingNameMap.put("ISO8859_1", new String []{"ISO-8859-1", "ISO8859_1"}); 78 encodingNameMap.put("ISO-IR-100", new String []{"ISO-8859-1", "ISO8859_1"}); 79 encodingNameMap.put("LATIN1", new String []{"ISO-8859-1", "ISO8859_1"}); 80 encodingNameMap.put("CSISOLATIN1", new String []{"ISO-8859-1", "ISO8859_1"}); 81 encodingNameMap.put("L1", new String []{"ISO-8859-1", "ISO8859_1"}); 82 encodingNameMap.put("819", new String []{"ISO-8859-1", "ISO8859_1"}); 83 84 encodingNameMap.put("US-ASCII", new String []{"US-ASCII", "ASCII"}); 85 encodingNameMap.put("ASCII", new String []{"US-ASCII", "ASCII"}); 86 encodingNameMap.put("ISO-IR-6", new String []{"US-ASCII", "ASCII"}); 87 encodingNameMap.put("CSASCII", new String []{"US-ASCII", "ASCII"}); 88 encodingNameMap.put("ISO646-US", new String []{"US-ASCII", "ASCII"}); 89 encodingNameMap.put("US", new String []{"US-ASCII", "ASCII"}); 90 encodingNameMap.put("367", new String []{"US-ASCII", "ASCII"}); 91 92 encodingNameMap.put("UTF-8", new String []{"UTF-8", "UTF8"}); 93 encodingNameMap.put("UTF8", new String []{"UTF-8", "UTF8"}); 94 encodingNameMap.put("UTF-16", new String []{"UTF-16", "Unicode"}); 95 encodingNameMap.put("UNICODE", new String []{"UTF-16", "Unicode"}); 96 encodingNameMap.put("UTF16", new String []{"UTF-16", "Unicode"}); 97 encodingNameMap.put("UTF16", new String []{"UTF-16", "Unicode"}); 99 encodingNameMap.put("UTF-16BE", new String []{"UTF-16BE", "UnicodeBig"}); 100 encodingNameMap.put("UNICODEBIG", new String []{"UTF-16BE", "UnicodeBig"}); 101 encodingNameMap.put("UTF16-BE", new String []{"UTF-16BE", "UnicodeBig"}); 102 encodingNameMap.put("UTF-16LE", new String []{"UTF-16LE", "UnicodeLittle"}); 103 encodingNameMap.put("UNICODELITTLE", new String []{"UTF-16LE", "UnicodeLittle"}); 104 encodingNameMap.put("UTF16-LE", new String []{"UTF-16LE", "UnicodeLittle"}); 105 encodingNameMap.put("UTF16BE", new String []{"UTF-16BE", "UnicodeBig"}); encodingNameMap.put("UTF16LE", new String []{"UTF-16LE", "UnicodeLittle"}); 108 encodingNameMap.put("BIG5", new String []{"BIG5", "Big5"}); 109 encodingNameMap.put("CSBIG5", new String []{"BIG5", "Big5"}); 110 111 encodingNameMap.put("SJIS", new String []{"SHIFT_JIS", "SJIS"}); 112 encodingNameMap.put("SHIFT_JIS", new String []{"SHIFT_JIS", "SJIS"}); 113 encodingNameMap.put("CSSHIFTJIS", new String []{"CSSHIFTJIS", "SJIS"}); 114 encodingNameMap.put("MS_KANJI", new String []{"MS_KANJI", "SJIS"}); 115 encodingNameMap.put("SHIFTJIS", new String []{"SHIFT_JIS", "SJIS"}); 117 encodingNameMap.put("JIS", new String []{"ISO-2022-JP", "JIS"}); 118 encodingNameMap.put("ISO-2022-JP", new String []{"ISO-2022-JP", "JIS"}); 119 encodingNameMap.put("CSISO2022JP", new String []{"CSISO2022JP", "JIS"}); 120 encodingNameMap.put("ISO2022", new String []{"ISO-2022-JP", "JIS"}); 122 encodingNameMap.put("ISO2022KR", new String []{"ISO-2022-KR", "ISO2022KR"}); 123 encodingNameMap.put("ISO-2022-KR", new String []{"ISO-2022-KR", "ISO2022KR"}); 124 encodingNameMap.put("CSISO2022KR", new String []{"CSISO2022KR", "ISO2022KR"}); 125 encodingNameMap.put("ISO-2022-CN", new String []{"ISO-2022-CN", "ISO2022CN"}); 126 encodingNameMap.put("ISO2022CN", new String []{"ISO-2022-CN", "ISO2022CN"}); 127 128 encodingNameMap.put("MACROMAN", new String []{"macintosh", "MacRoman"}); encodingNameMap.put("MACINTOSH", new String []{"macintosh", "MacRoman"}); 130 encodingNameMap.put("MACINTOSH ROMAN", new String []{"macintosh", "MacRoman"}); 131 132 encodingNameMap.put("37", new String []{"IBM037", "CP037"}); 133 encodingNameMap.put("273", new String []{"IBM273", "CP273"}); 134 encodingNameMap.put("277", new String []{"IBM277", "CP277"}); 135 encodingNameMap.put("278", new String []{"IBM278", "CP278"}); 136 encodingNameMap.put("280", new String []{"IBM280", "CP280"}); 137 encodingNameMap.put("284", new String []{"IBM284", "CP284"}); 138 encodingNameMap.put("285", new String []{"IBM285", "CP285"}); 139 encodingNameMap.put("290", new String []{"IBM290", "CP290"}); 140 encodingNameMap.put("297", new String []{"IBM297", "CP297"}); 141 encodingNameMap.put("420", new String []{"IBM420", "CP420"}); 142 encodingNameMap.put("424", new String []{"IBM424", "CP424"}); 143 encodingNameMap.put("437", new String []{"IBM437", "CP437"}); 144 encodingNameMap.put("500", new String []{"IBM500", "CP500"}); 145 encodingNameMap.put("775", new String []{"IBM775", "CP775"}); 146 encodingNameMap.put("850", new String []{"IBM850", "CP850"}); 147 encodingNameMap.put("852", new String []{"IBM852", "CP852"}); 148 encodingNameMap.put("CSPCP852", new String []{"IBM852", "CP852"}); 149 encodingNameMap.put("855", new String []{"IBM855", "CP855"}); 150 encodingNameMap.put("857", new String []{"IBM857", "CP857"}); 151 encodingNameMap.put("858", new String []{"IBM00858", "Cp858"}); 152 encodingNameMap.put("0858", new String []{"IBM00858", "Cp858"}); 153 encodingNameMap.put("860", new String []{"IBM860", "CP860"}); 154 encodingNameMap.put("861", new String []{"IBM861", "CP861"}); 155 encodingNameMap.put("IS", new String []{"IBM861", "CP861"}); 156 encodingNameMap.put("862", new String []{"IBM862", "CP862"}); 157 encodingNameMap.put("863", new String []{"IBM863", "CP863"}); 158 encodingNameMap.put("864", new String []{"IBM864", "CP864"}); 159 encodingNameMap.put("865", new String []{"IBM865", "CP865"}); 160 encodingNameMap.put("866", new String []{"IBM866", "CP866"}); 161 encodingNameMap.put("868", new String []{"IBM868", "CP868"}); 162 encodingNameMap.put("AR", new String []{"IBM868", "CP868"}); 163 encodingNameMap.put("869", new String []{"IBM869", "CP869"}); 164 encodingNameMap.put("GR", new String []{"IBM869", "CP869"}); 165 encodingNameMap.put("870", new String []{"IBM870", "CP870"}); 166 encodingNameMap.put("871", new String []{"IBM871", "CP871"}); 167 encodingNameMap.put("EBCDIC-CP-IS", new String []{"IBM871", "CP871"}); 168 encodingNameMap.put("918", new String []{"CP918", "CP918"}); 169 encodingNameMap.put("924", new String []{"IBM00924", "CP924"}); 170 encodingNameMap.put("0924", new String []{"IBM00924", "CP924"}); 171 encodingNameMap.put("1026", new String []{"IBM1026", "CP1026"}); 172 encodingNameMap.put("1047", new String []{"IBM1047", "Cp1047"}); 173 encodingNameMap.put("1140", new String []{"IBM01140", "Cp1140"}); 174 encodingNameMap.put("1141", new String []{"IBM01141", "Cp1141"}); 175 encodingNameMap.put("1142", new String []{"IBM01142", "Cp1142"}); 176 encodingNameMap.put("1143", new String []{"IBM01143", "Cp1143"}); 177 encodingNameMap.put("1144", new String []{"IBM01144", "Cp1144"}); 178 encodingNameMap.put("1145", new String []{"IBM01145", "Cp1145"}); 179 encodingNameMap.put("1146", new String []{"IBM01146", "Cp1146"}); 180 encodingNameMap.put("1147", new String []{"IBM01147", "Cp1147"}); 181 encodingNameMap.put("1148", new String []{"IBM01148", "Cp1148"}); 182 encodingNameMap.put("1149", new String []{"IBM01149", "Cp1149"}); 183 encodingNameMap.put("1250", new String []{"WINDOWS-1250", "Cp1250"}); 184 encodingNameMap.put("1251", new String []{"WINDOWS-1251", "Cp1251"}); 185 encodingNameMap.put("1252", new String []{"WINDOWS-1252", "Cp1252"}); 186 encodingNameMap.put("WIN1252", new String []{"WINDOWS-1252", "Cp1252"}); encodingNameMap.put("1253", new String []{"WINDOWS-1253", "Cp1253"}); 188 encodingNameMap.put("1254", new String []{"WINDOWS-1254", "Cp1254"}); 189 encodingNameMap.put("1255", new String []{"WINDOWS-1255", "Cp1255"}); 190 encodingNameMap.put("1256", new String []{"WINDOWS-1256", "Cp1256"}); 191 encodingNameMap.put("1257", new String []{"WINDOWS-1257", "Cp1257"}); 192 encodingNameMap.put("1258", new String []{"WINDOWS-1258", "Cp1258"}); 193 194 encodingNameMap.put("EUC-JP", new String []{"EUC-JP", "EUCJIS"}); 195 encodingNameMap.put("EUCJIS", new String []{"EUC-JP", "EUCJIS"}); 196 encodingNameMap.put("EUC-KR", new String []{"EUC-KR", "KSC5601"}); 197 encodingNameMap.put("KSC5601", new String []{"EUC-KR", "KSC5601"}); 198 encodingNameMap.put("GB2312", new String []{"GB2312", "GB2312"}); 199 encodingNameMap.put("CSGB2312", new String []{"GB2312", "GB2312"}); 200 encodingNameMap.put("X0201", new String []{"X0201", "JIS0201"}); 201 encodingNameMap.put("JIS0201", new String []{"X0201", "JIS0201"}); 202 encodingNameMap.put("X0208", new String []{"X0208", "JIS0208"}); 203 encodingNameMap.put("JIS0208", new String []{"X0208", "JIS0208"}); 204 encodingNameMap.put("ISO-IR-87", new String []{"ISO-IR-87", "JIS0208"}); 205 encodingNameMap.put("JIS0208", new String []{"ISO-IR-87", "JIS0208"}); 206 encodingNameMap.put("X0212", new String []{"X0212", "JIS0212"}); 207 encodingNameMap.put("JIS0212", new String []{"X0212", "JIS0212"}); 208 encodingNameMap.put("ISO-IR-159", new String []{"X0212", "JIS0212"}); 209 encodingNameMap.put("GB18030", new String []{"GB18030", "GB18030"}); 210 211 encodingNameMap.put("936", new String []{"GBK", "GBK"}); 212 encodingNameMap.put("MS936", new String []{"GBK", "GBK"}); 213 214 encodingNameMap.put("MS932", new String []{"WINDOWS-31J", "MS932"}); 215 encodingNameMap.put("WINDOWS-31J", new String []{"WINDOWS-31J", "MS932"}); 216 encodingNameMap.put("CSWINDOWS31J", new String []{"WINDOWS-31J", "MS932"}); 217 encodingNameMap.put("TIS-620", new String []{"TIS-620", "TIS620"}); 218 encodingNameMap.put("TIS620", new String []{"TIS-620", "TIS620"}); 219 220 encodingNameMap.put("ISO-8859-2", new String []{"ISO-8859-2", "ISO8859_2"}); 221 encodingNameMap.put("ISO8859_2", new String []{"ISO-8859-2", "ISO8859_2"}); 222 encodingNameMap.put("ISO-IR-101", new String []{"ISO-8859-2", "ISO8859_2"}); 223 encodingNameMap.put("LATIN2", new String []{"ISO-8859-2", "ISO8859_2"}); 224 encodingNameMap.put("L2", new String []{"ISO-8859-2", "ISO8859_2"}); 225 226 encodingNameMap.put("ISO-8859-3", new String []{"ISO-8859-3", "ISO8859_3"}); 227 encodingNameMap.put("ISO8859_3", new String []{"ISO-8859-3", "ISO8859_3"}); 228 encodingNameMap.put("ISO-IR-109", new String []{"ISO-8859-3", "ISO8859_3"}); 229 encodingNameMap.put("LATIN3", new String []{"ISO-8859-3", "ISO8859_3"}); 230 encodingNameMap.put("L3", new String []{"ISO-8859-3", "ISO8859_3"}); 231 232 encodingNameMap.put("ISO-8859-4", new String []{"ISO-8859-4", "ISO8859_4"}); 233 encodingNameMap.put("ISO8859_4", new String []{"ISO-8859-4", "ISO8859_4"}); 234 encodingNameMap.put("ISO-IR-110", new String []{"ISO-8859-4", "ISO8859_4"}); 235 encodingNameMap.put("ISO-IR-110", new String []{"ISO-8859-4", "ISO8859_4"}); 236 encodingNameMap.put("L4", new String []{"ISO-8859-4", "ISO8859_4"}); 237 238 encodingNameMap.put("ISO-8859-5", new String []{"ISO-8859-5", "ISO8859_5"}); 239 encodingNameMap.put("ISO8859_5", new String []{"ISO-8859-5", "ISO8859_5"}); 240 encodingNameMap.put("ISO-IR-144", new String []{"ISO-8859-5", "ISO8859_5"}); 241 encodingNameMap.put("CYRILLIC", new String []{"ISO-8859-5", "ISO8859_5"}); 242 243 encodingNameMap.put("ISO-8859-6", new String []{"ISO-8859-6", "ISO8859_6"}); 244 encodingNameMap.put("ISO8859_6", new String []{"ISO-8859-6", "ISO8859_6"}); 245 encodingNameMap.put("ISO-IR-127", new String []{"ISO-8859-6", "ISO8859_6"}); 246 encodingNameMap.put("ARABIC", new String []{"ISO-8859-6", "ISO8859_6"}); 247 248 encodingNameMap.put("ISO-8859-7", new String []{"ISO-8859-7", "ISO8859_7"}); 249 encodingNameMap.put("ISO8859_7", new String []{"ISO-8859-7", "ISO8859_7"}); 250 encodingNameMap.put("ISO-IR-126", new String []{"ISO-8859-7", "ISO8859_7"}); 251 encodingNameMap.put("GREEK", new String []{"ISO-8859-7", "ISO8859_7"}); 252 253 encodingNameMap.put("ISO-8859-8", new String []{"ISO-8859-8", "ISO8859_8"}); 254 encodingNameMap.put("ISO8859_8", new String []{"ISO-8859-8", "ISO8859_8"}); 255 encodingNameMap.put("ISO-8859-8-I", new String []{"ISO-8859-8", "ISO8859_8"}); 256 encodingNameMap.put("ISO-IR-138", new String []{"ISO-8859-8", "ISO8859_8"}); 257 encodingNameMap.put("HEBREW", new String []{"ISO-8859-8", "ISO8859_8"}); 258 259 encodingNameMap.put("ISO-8859-9", new String []{"ISO-8859-9", "ISO8859_8"}); 260 encodingNameMap.put("ISO8859_8", new String []{"ISO-8859-9", "ISO8859_8"}); 261 encodingNameMap.put("CSISOLATINHEBREW", new String []{"ISO-8859-9", "ISO8859_9"}); 262 encodingNameMap.put("ISO-IR-148", new String []{"ISO-8859-9", "ISO8859_9"}); 263 encodingNameMap.put("LATIN5", new String []{"ISO-8859-9", "ISO8859_9"}); 264 encodingNameMap.put("CSISOLATIN5", new String []{"ISO-8859-9", "ISO8859_9"}); 265 encodingNameMap.put("L5", new String []{"ISO-8859-9", "ISO8859_9"}); 266 267 encodingNameMap.put("ISO-8859-15", new String []{"ISO-8859-15", "ISO8859_15"}); 268 encodingNameMap.put("ISO8859_15", new String []{"ISO-8859-15", "ISO8859_15"}); 269 270 encodingNameMap.put("KOI8-R", new String []{"KOI8-R", "KOI8_R"}); 271 encodingNameMap.put("KOI8_R", new String []{"CSKOI8R", "KOI8_R"}); 272 encodingNameMap.put("CSKOI8R", new String []{"CSKOI8R", "KOI8_R"}); 273 } 274 275 280 public static String toIana(String encoding) 281 { 282 if (encoding == null) 283 { 284 return null; 285 } 286 287 String [] values = (String []) encodingNameMap.get(handlecommonAlias(encoding)); 288 if (values != null) 289 { 290 return values[0]; 291 } 292 293 return null; 294 } 295 296 302 private static String handlecommonAlias(String encoding) 303 { 304 String key = encoding.toUpperCase(); 305 306 if (key.startsWith("CSIBM") || key.startsWith("CCSID")) 308 { 309 key = key.substring(5); 310 } 311 else if (key.startsWith("IBM-") || key.startsWith("IBM0") || key.startsWith("CP-0")) 312 { 313 key = key.substring(4); 314 } 315 else if (key.startsWith("IBM") || key.startsWith("CP0") || key.startsWith("CP-")) 316 { 317 key = key.substring(3); 318 } 319 else if (key.startsWith("CP")) 320 { 321 key = key.substring(2); 322 } 323 else if (key.startsWith("WINDOWS-")) 324 { 325 key = key.substring(8); 326 } 327 else if (key.startsWith("ISO_")) 328 { 329 key = "ISO-" + key.substring(4); 330 } 331 332 return key; 333 } 334 335 341 public static String toJava(String encoding) 342 { 343 if (encoding == null) 344 { 345 return null; 346 } 347 348 String [] values = (String []) encodingNameMap.get(handlecommonAlias(encoding)); 349 if (values != null) 350 { 351 return values[1]; 352 } 353 354 return null; 355 } 356 } 357 | Popular Tags |