1 23 package org.enhydra.xml.io; 24 25 import java.io.BufferedReader ; 26 import java.io.BufferedWriter ; 27 import java.io.FileReader ; 28 import java.io.FileWriter ; 29 import java.io.IOException ; 30 import java.io.PrintWriter ; 31 import java.util.ArrayList ; 32 import java.util.HashSet ; 33 34 36 52 public final class MkEncodingsTable { 53 56 private final String CHAR_SET_REGISTRY = "character-sets"; 57 58 61 private final String CHAR_SET_TABLE = "character-sets.tbl"; 62 63 66 private final String REG_NAME_FIELD = "Name:"; 67 private final String REG_ALIAS_FIELD = "Alias:"; 68 69 73 private static final String [] ENCODINGS_7BIT = { 74 "ANSI_X3.4-1968", 75 "T.61-7bit" 76 }; 77 78 81 private static final String [] ENCODINGS_8BIT = { 82 "T.61-8bit", 83 "UNKNOWN-8BIT", 84 "PC8-Danish-Norwegian", 85 "PC8-Turkish", 86 "ISO_8859-1:1987", 87 "ISO_8859-2:1987", 88 "ISO_8859-3:1988", 89 "ISO_8859-4:1988", 90 "ISO_8859-6:1987", 91 "ISO_8859-6-E", 92 "ISO_8859-6-I", 93 "ISO_8859-7:1987", 94 "ISO_8859-8:1988", 95 "ISO_8859-8-E", 96 "ISO_8859-8-I", 97 "ISO_8859-5:1988", 98 "ISO_8859-9:1989", 99 "ISO_8859-supp", 100 "ISO-8859-10", 101 "ISO-8859-15", 102 "ISO-8859-1-Windows-3.0-Latin-1", 103 "ISO-8859-1-Windows-3.1-Latin-1", 104 "ISO-8859-2-Windows-Latin-2", 105 "ISO-8859-9-Windows-Latin-5", 106 "latin-greek", 107 "Latin-greek-1" 108 }; 109 110 113 private static final HashSet f7BitEncodings = new HashSet (); 114 private static final HashSet f8BitEncodings = new HashSet (); 115 116 119 private static final String REG_MIME_PREFERRED = "preferred MIME name"; 120 121 124 private static final String [][] HACKED_ALIASES = { 125 {"UTF-8", "UTF8"}, 126 {"ANSI_X3.4-1968", "646"}, 127 }; 128 129 133 private static final String [][] HACKED_PREFIXES = { 134 {"windows-", "Cp"}, 135 {"ISO_8859-", "ISO8859-"}, 136 {"ISO-8859-", "ISO8859_"} 137 }; 138 139 142 private static String [] EXTRA_ENTRIES = { 143 "UnicodeBig 16 null", 144 "UnicodeBigUnmarked 16 null", 145 "UnicodeLittle 16 null", 146 "UnicodeLittleUnmarked 16 null", 147 "UTF-16 16 null UTF16" 148 }; 149 150 153 static { 154 for (int idx = 0; idx < ENCODINGS_7BIT.length; idx++) { 155 f7BitEncodings.add(ENCODINGS_7BIT[idx]); 156 } 157 for (int idx = 0; idx < ENCODINGS_8BIT.length; idx++) { 158 f8BitEncodings.add(ENCODINGS_8BIT[idx]); 159 } 160 } 161 162 165 private void ianaParseError(String msg, 166 String line) { 167 throw new XMLIOError(msg + "; parsing line in " + CHAR_SET_REGISTRY 168 + "\"" + line + "\""); 169 } 170 171 172 176 private int getCharSize(String encoding) { 177 if (f7BitEncodings.contains(encoding)) { 178 return 7; 179 } else if (f8BitEncodings.contains(encoding)) { 180 return 8; 181 } else { 182 return 16; 183 } 184 } 185 186 190 private String parseName(String line) { 191 int len = line.length(); 192 193 int startIdx = line.indexOf(':'); 195 if (startIdx < 0) { 196 ianaParseError("no `:' found", line); 197 } 198 startIdx++; 199 200 while ((startIdx < len) && (line.charAt(startIdx) == ' ')) { 202 startIdx++; 203 } 204 205 int endIdx = startIdx; 207 while ((endIdx < len) && (line.charAt(endIdx) != ' ')) { 208 endIdx++; 209 } 210 if (endIdx <= startIdx) { 211 return null; 212 } else { 213 return line.substring(startIdx, endIdx).intern(); 214 } 215 } 216 217 220 private boolean isMimePreferredEntry(String line) { 221 return (line.indexOf(REG_MIME_PREFERRED) >= 0); 222 } 223 224 228 private void addAlias(ArrayList aliases, 229 String alias) { 230 if ((alias != null) && !aliases.contains(alias)) { 231 aliases.add(alias); 232 } 233 } 234 235 239 private void makeHackedAliases(ArrayList aliases, 240 String name) { 241 for (int idx = 0; idx < HACKED_ALIASES.length; idx++) { 243 String [] mapping = HACKED_ALIASES[idx]; 244 if (name.equals(mapping[0])) { 245 addAlias(aliases, mapping[1]); 246 } 247 } 248 249 for (int idx = 0; idx < HACKED_PREFIXES.length; idx++) { 251 String [] mapping = HACKED_PREFIXES[idx]; 252 if (name.startsWith(mapping[0])) { 253 addAlias(aliases, 254 mapping[1] + name.substring(mapping[0].length())); 255 } 256 } 257 } 258 259 263 private boolean parseCharSetEntry(BufferedReader in, 264 PrintWriter out) throws IOException { 265 ArrayList aliases = new ArrayList (); 266 String mimePreferred = null; 267 String line = null; 268 269 while ((line = in.readLine()) != null) { 271 if (line.startsWith(REG_NAME_FIELD)) { 272 break; 273 } 274 } 275 if (line == null) { 276 return false; } 278 String name = parseName(line); 279 if (name == null) { 280 ianaParseError("no name parsed", line); 281 } 282 if (isMimePreferredEntry(line)){ 283 mimePreferred = name; 284 } 285 286 makeHackedAliases(aliases, name); 288 289 while (((line = in.readLine()) != null) 291 && (line.trim().length() > 0)) { 292 if (line.startsWith(REG_ALIAS_FIELD)) { 293 String alias = parseName(line); 294 if (alias != null) { 295 addAlias(aliases, alias); 296 makeHackedAliases(aliases, alias); 297 if (isMimePreferredEntry(line)){ 298 mimePreferred = alias; 299 } 300 } 301 } 302 } 303 304 out.print(name); 306 out.print(' '); 307 out.print(getCharSize(name)); 308 out.print(' '); 309 out.print(mimePreferred); 310 int len = aliases.size(); 311 for (int idx = 0; idx < len; idx++) { 312 out.print(' '); 313 out.print(aliases.get(idx)); 314 } 315 out.println(); 316 return true; 317 } 318 319 322 private void parseIanaRegistry(BufferedReader in, 323 PrintWriter out) throws IOException { 324 while (parseCharSetEntry(in, out)) { 325 } 327 } 328 329 332 private void parseIanaRegistry() throws IOException { 333 BufferedReader in = new BufferedReader (new FileReader (CHAR_SET_REGISTRY)); 334 PrintWriter out = new PrintWriter (new BufferedWriter (new FileWriter (CHAR_SET_TABLE))); 335 parseIanaRegistry(in, out); 336 337 for (int i = 0; i < EXTRA_ENTRIES.length; i++) { 338 out.println(EXTRA_ENTRIES[i]); 339 } 340 out.close(); 341 in.close(); 342 } 343 344 347 public static void main(String [] args) throws IOException { 348 new MkEncodingsTable().parseIanaRegistry(); 349 } 350 } 351 | Popular Tags |