1 10 package org.mmbase.util.transformers; 11 12 import java.io.Reader ; 13 import java.io.Writer ; 14 import java.util.*; 15 16 import org.mmbase.util.logging.*; 17 18 27 28 public class CP1252Surrogator extends ConfigurableReaderTransformer implements CharTransformer { 29 private static final Logger log = Logging.getLoggerInstance(CP1252Surrogator.class); 30 31 32 public static final int WELL_ENCODED = 0; 33 public static final int WRONG_ENCODED = 1; 34 35 36 public CP1252Surrogator() { 37 this(WELL_ENCODED); 38 } 39 public CP1252Surrogator(int conf) { 40 super(conf); 41 } 42 43 44 public Writer transform(Reader r, Writer w) { 45 try { 46 while (true) { 47 int c = r.read(); 48 if (c == -1) break; 49 int cp; 50 if (to == WELL_ENCODED) { cp = ("" + (char) c).getBytes("CP1252")[0] & 0xff; } else { 53 cp = c; 54 55 } 56 switch (cp) { 57 case 128: w.write("EURO"); break; case 129: w.write('?'); break; case 130: w.write(','); break; case 131: w.write('f'); break; case 132: w.write(",,"); break; case 133: w.write("..."); break; case 134: w.write('+'); break; case 135: w.write("++"); break; case 136: w.write('^'); break; case 137: w.write("0/00"); break; case 138: w.write('S'); break; case 139: w.write('<'); break; case 140: w.write("OE"); break; case 141: w.write('?'); break; case 142: w.write('Z'); break; case 143: w.write('?'); break; case 144: w.write('?'); break; case 145: w.write('\''); break; case 146: w.write('\''); break; case 147: w.write('\"'); break; case 148: w.write('\"'); break; case 149: w.write('-'); break; case 150: w.write('-'); break; case 151: w.write('-'); break; case 152: w.write('~'); break; case 153: w.write("(TM)"); break; case 154: w.write('s'); break; case 155: w.write('>'); break; case 156: w.write("oe"); break; case 157: w.write('?'); break; case 158: w.write('z'); break; case 159: w.write('Y'); break; default: w.write(c); 90 } 91 } 92 } catch (Exception e) { 93 log.error(e.toString()); 94 } 95 return w; 96 } 97 98 99 public Map transformers() { 100 Map h = new HashMap(); 101 h.put("CP1252_SURROGATOR", new Config(CP1252Surrogator.class, WELL_ENCODED, "Takes the java String, and surrogates the 32 characters of it which are in CP1252 but not in ISO-8859-1")); 102 h.put("CP1252_WRONG_SURROGATOR", new Config(CP1252Surrogator.class, WRONG_ENCODED, "Also surrogates the characters specific to CP1252, but supposed the String originally wrong encoded (it was suppoed to be ISO-8859-1, but actually was CP1252)")); 103 return h; 104 } 105 106 107 public String getEncoding() { 108 switch (to) { 109 case WELL_ENCODED: 110 return "CP1252_SURROGATOR"; 111 case WRONG_ENCODED: 112 return "CP1252_WRONG_SURROGATOR"; 113 default : 114 throw new UnknownCodingException(getClass(), to); 115 } 116 } 117 118 119 public static byte[] getTestBytes() { 120 byte[] testBytes = new byte[32]; 121 for (int i = 0; i < 32; i++) { 122 testBytes[i] = (byte) (-128 + i); 123 } 124 return testBytes; 125 } 126 127 public static String getTestString() { 128 try { 129 return new String (getTestBytes(), "CP1252"); 130 } catch (Exception e) { 131 return e.toString(); 132 } 133 } 134 135 149 public static void main(String [] args) { 150 151 String testStringCP1252 = "bla bla " + getTestString(); 153 String testStringISO1 = ""; 154 try { 155 testStringISO1 = "bla bla " + new String (getTestBytes(), "ISO-8859-1"); } catch (Exception e) { 157 log.error("", e); 158 } 159 160 CharTransformer transOk = new CP1252Surrogator(); 161 CharTransformer transNok = new CP1252Surrogator(WRONG_ENCODED); 162 CharTransformer unicode = new UnicodeEscaper(); 163 164 System.out.println("Test-string (CP1252): " + testStringCP1252); 165 167 System.out.println("Java-escaped (CP1252): " + unicode.transform(testStringCP1252)); 168 System.out.println("Java-escaped (ISO-1) : " + unicode.transform(testStringISO1)); 169 System.out.println("Surrogated test-string (CP1252): " + transOk.transform(testStringCP1252)); 170 System.out.println("Surrogated test-string (ISO-1) : " + transNok.transform(testStringISO1)); 172 173 } 174 175 176 } 177 | Popular Tags |