1 8 9 package org.python.modules; 10 11 import org.python.core.*; 12 import org.python.core.imp; 13 14 public class _codecs { 15 16 public static void register(PyObject search_function) { 17 codecs.register(search_function); 18 } 19 20 21 public static PyTuple lookup(String encoding) { 22 return codecs.lookup(encoding); 23 } 24 25 26 27 28 private static PyTuple codec_tuple(String s, int len) { 29 return new PyTuple(new PyObject[] { 30 Py.java2py(s), 31 Py.newInteger(len) 32 }); 33 } 34 35 36 37 38 public static PyTuple utf_8_decode(String str) { 39 return utf_8_decode(str, null); 40 } 41 42 public static PyTuple utf_8_decode(String str, String errors) { 43 int size = str.length(); 44 return codec_tuple(codecs.PyUnicode_DecodeUTF8(str, errors), size); 45 } 46 47 48 public static PyTuple utf_8_encode(String str) { 49 return utf_8_encode(str, null); 50 } 51 52 public static PyTuple utf_8_encode(String str, String errors) { 53 int size = str.length(); 54 return codec_tuple(codecs.PyUnicode_EncodeUTF8(str, errors), size); 55 } 56 57 58 59 60 61 public static PyTuple charmap_decode(String str, String errors, 62 PyObject mapping) { 63 int size = str.length(); 64 StringBuffer v = new StringBuffer (size); 65 66 for (int i = 0; i < size; i++) { 67 char ch = str.charAt(i); 68 if (ch > 0xFF) { 69 codecs.decoding_error("charmap", v, errors, 70 "ordinal not in range(255)"); 71 i++; 72 continue; 73 } 74 75 PyObject w = Py.newInteger(ch); 76 PyObject x = mapping.__finditem__(w); 77 if (x == null) { 78 79 v.append(ch); 80 continue; 81 } 82 83 84 if (x instanceof PyInteger) { 85 int value = ((PyInteger) x).getValue(); 86 if (value < 0 || value > 65535) 87 throw Py.TypeError( 88 "character mapping must be in range(65535)"); 89 v.append((char) value); 90 } else if (x == Py.None) { 91 codecs.decoding_error("charmap", v, errors, 92 "character maps to <undefined>"); 93 } else if (x instanceof PyString) { 94 v.append(x.toString()); 95 } 96 else { 97 98 throw Py.TypeError("character mapping must return integer, " + 99 "None or unicode"); 100 } 101 } 102 return codec_tuple(v.toString(), size); 103 } 104 105 106 107 108 109 public static PyTuple charmap_encode(String str, String errors, 110 PyObject mapping) { 111 int size = str.length(); 112 StringBuffer v = new StringBuffer (size); 113 114 for (int i = 0; i < size; i++) { 115 char ch = str.charAt(i); 116 PyObject w = Py.newInteger(ch); 117 PyObject x = mapping.__finditem__(w); 118 if (x == null) { 119 120 if (ch < 256) 121 v.append(ch); 122 else 123 codecs.encoding_error("charmap", v, errors, 124 "missing character mapping"); 125 continue; 126 } 127 if (x instanceof PyInteger) { 128 int value = ((PyInteger) x).getValue(); 129 if (value < 0 || value > 255) 130 throw Py.TypeError( 131 "character mapping must be in range(256)"); 132 v.append((char) value); 133 } else if (x == Py.None) { 134 codecs.encoding_error("charmap", v, errors, 135 "character maps to <undefined>"); 136 } else if (x instanceof PyString) { 137 v.append(x.toString()); 138 } 139 else { 140 141 throw Py.TypeError("character mapping must return " + 142 "integer, None or unicode"); 143 } 144 } 145 return codec_tuple(v.toString(), size); 146 } 147 148 149 150 151 152 public static PyTuple ascii_decode(String str) { 153 return ascii_decode(str, null); 154 } 155 156 public static PyTuple ascii_decode(String str, String errors) { 157 int size = str.length(); 158 return codec_tuple(codecs.PyUnicode_DecodeASCII(str, size, errors), 159 size); 160 } 161 162 163 public static PyTuple ascii_encode(String str) { 164 return ascii_encode(str, null); 165 } 166 167 public static PyTuple ascii_encode(String str, String errors) { 168 int size = str.length(); 169 return codec_tuple(codecs.PyUnicode_EncodeASCII(str, size, errors), 170 size); 171 } 172 173 174 175 176 public static PyTuple latin_1_decode(String str) { 177 return latin_1_decode(str, null); 178 } 179 180 public static PyTuple latin_1_decode(String str, String errors) { 181 int size = str.length(); 182 StringBuffer v = new StringBuffer (size); 183 184 for (int i = 0; i < size; i++) { 185 char ch = str.charAt(i); 186 if (ch < 256) { 187 v.append(ch); 188 } else { 189 codecs.decoding_error("latin-1", v, errors, 190 "ordinal not in range(256)"); 191 i++; 192 continue; 193 } 194 } 195 196 return codec_tuple(v.toString(), size); 197 } 198 199 200 public static PyTuple latin_1_encode(String str) { 201 return latin_1_encode(str, null); 202 } 203 204 public static PyTuple latin_1_encode(String str, String errors) { 205 int size = str.length(); 206 StringBuffer v = new StringBuffer (size); 207 208 for (int i = 0; i < size; i++) { 209 char ch = str.charAt(i); 210 if (ch >= 256) { 211 codecs.encoding_error("latin-1", v, errors, 212 "ordinal not in range(256)"); 213 } else 214 v.append(ch); 215 } 216 return codec_tuple(v.toString(), size); 217 } 218 219 220 221 222 223 public static PyTuple utf_16_encode(String str) { 224 return utf_16_encode(str, null); 225 } 226 227 public static PyTuple utf_16_encode(String str, String errors) { 228 return codec_tuple(encode_UTF16(str, errors, 0), str.length()); 229 } 230 231 public static PyTuple utf_16_encode(String str, String errors, 232 int byteorder) { 233 return codec_tuple(encode_UTF16(str, errors, byteorder), 234 str.length()); 235 } 236 237 public static PyTuple utf_16_le_encode(String str) { 238 return utf_16_le_encode(str, null); 239 } 240 241 public static PyTuple utf_16_le_encode(String str, String errors) { 242 return codec_tuple(encode_UTF16(str, errors, -1), str.length()); 243 } 244 245 public static PyTuple utf_16_be_encode(String str) { 246 return utf_16_be_encode(str, null); 247 } 248 249 public static PyTuple utf_16_be_encode(String str, String errors) { 250 return codec_tuple(encode_UTF16(str, errors, 1), str.length()); 251 } 252 253 254 private static String encode_UTF16(String str, String errors, 255 int byteorder) { 256 int size = str.length(); 257 StringBuffer v = new StringBuffer ((size + 258 (byteorder == 0 ? 1 : 0)) * 2); 259 260 if (byteorder == 0) { 261 v.append((char) 0xFE); 262 v.append((char) 0xFF); 263 } 264 265 if (byteorder == 0 || byteorder == 1) 266 for (int i = 0; i < size; i++) { 267 char ch = str.charAt(i); 268 v.append((char) ((ch >>> 8) & 0xFF)); 269 v.append((char) (ch & 0xFF)); 270 } 271 else { 272 for (int i = 0; i < size; i++) { 273 char ch = str.charAt(i); 274 v.append((char) (ch & 0xFF)); 275 v.append((char) ((ch >>> 8) & 0xFF)); 276 } 277 } 278 279 return v.toString(); 280 } 281 282 283 284 285 public static PyTuple utf_16_decode(String str) { 286 return utf_16_decode(str, null); 287 } 288 289 public static PyTuple utf_16_decode(String str, String errors) { 290 int[] bo = new int[] { 0 }; 291 return codec_tuple(decode_UTF16(str, errors, bo), str.length()); 292 } 293 294 public static PyTuple utf_16_decode(String str, String errors, 295 int byteorder) { 296 int[] bo = new int[] { byteorder }; 297 return codec_tuple(decode_UTF16(str, errors, bo), str.length()); 298 } 299 300 public static PyTuple utf_16_le_decode(String str) { 301 return utf_16_le_decode(str, null); 302 } 303 304 public static PyTuple utf_16_le_decode(String str, String errors) { 305 int[] bo = new int[] { -1 }; 306 return codec_tuple(decode_UTF16(str, errors, bo), str.length()); 307 } 308 309 public static PyTuple utf_16_be_decode(String str) { 310 return utf_16_be_decode(str, null); 311 } 312 313 public static PyTuple utf_16_be_decode(String str, String errors) { 314 int[] bo = new int[] { 1 }; 315 return codec_tuple(decode_UTF16(str, errors, bo), str.length()); 316 } 317 318 public static PyTuple utf_16_ex_decode(String str) { 319 return utf_16_ex_decode(str, null); 320 } 321 322 public static PyTuple utf_16_ex_decode(String str, String errors) { 323 return utf_16_ex_decode(str, errors, 0); 324 } 325 326 public static PyTuple utf_16_ex_decode(String str, String errors, 327 int byteorder) { 328 int[] bo = new int[] { 0 }; 329 String s = decode_UTF16(str, errors, bo); 330 return new PyTuple(new PyObject[] { 331 Py.newString(s), 332 Py.newInteger(str.length()), 333 Py.newInteger(bo[0]) 334 }); 335 } 336 337 private static String decode_UTF16(String str, String errors, 338 int[] byteorder) { 339 int bo = 0; 340 if (byteorder != null) 341 bo = byteorder[0]; 342 343 int size = str.length(); 344 345 if (size % 2 != 0) 346 codecs.decoding_error("UTF16", null, errors, "truncated data"); 347 348 StringBuffer v = new StringBuffer (size/2); 349 350 for (int i = 0; i < size; i += 2) { 351 char ch1 = str.charAt(i); 352 char ch2 = str.charAt(i+1); 353 if (ch1 == 0xFE && ch2 == 0xFF) { 354 bo = 1; 355 continue; 356 } else if (ch1 == 0xFF && ch2 == 0xFE) { 357 bo = -1; 358 continue; 359 } 360 361 char ch; 362 if (bo == -1) 363 ch = (char) (ch2 << 8 | ch1); 364 else 365 ch = (char) (ch1 << 8 | ch2); 366 367 if (ch < 0xD800 || ch > 0xDFFF) { 368 v.append(ch); 369 continue; 370 } 371 372 373 374 if (i == size-1) { 375 codecs.decoding_error("UTF-16", v, errors, 376 "unexpected end of data"); 377 continue; 378 } 379 380 ch = str.charAt(++i); 381 if (0xDC00 <= ch && ch <= 0xDFFF) { 382 ch = str.charAt(++i); 383 if (0xD800 <= ch && ch <= 0xDBFF) 384 388 codecs.decoding_error("UTF-16", v, errors, 389 "code pairs are not supported"); 390 continue; 391 } 392 codecs.decoding_error("UTF-16", v, errors, "illegal encoding"); 393 } 394 395 if (byteorder != null) 396 byteorder[0] = bo; 397 398 return v.toString(); 399 } 400 401 402 403 404 405 406 public static PyTuple raw_unicode_escape_encode(String str) { 407 return raw_unicode_escape_encode(str, null); 408 } 409 410 public static PyTuple raw_unicode_escape_encode(String str, 411 String errors) { 412 return codec_tuple(codecs.PyUnicode_EncodeRawUnicodeEscape(str, 413 errors, false), 414 str.length()); 415 } 416 417 418 public static PyTuple raw_unicode_escape_decode(String str) { 419 return raw_unicode_escape_decode(str, null); 420 } 421 422 public static PyTuple raw_unicode_escape_decode(String str, 423 String errors) { 424 return codec_tuple(codecs.PyUnicode_DecodeRawUnicodeEscape(str, 425 errors), 426 str.length()); 427 } 428 429 430 431 432 433 434 public static PyTuple unicode_escape_encode(String str) { 435 return unicode_escape_encode(str, null); 436 } 437 438 public static PyTuple unicode_escape_encode(String str, String errors) { 439 return codec_tuple(PyString.encode_UnicodeEscape(str, false), 440 str.length()); 441 } 442 443 public static PyTuple unicode_escape_decode(String str) { 444 return unicode_escape_decode(str, null); 445 } 446 447 public static PyTuple unicode_escape_decode(String str, String errors) { 448 int n = str.length(); 449 return codec_tuple(PyString.decode_UnicodeEscape(str, 450 0, n, errors, true), n); 451 } 452 453 454 455 456 457 458 public static PyTuple unicode_internal_encode(String str) { 459 return unicode_internal_encode(str, null); 460 } 461 462 public static PyTuple unicode_internal_encode(String str, String errors) { 463 return codec_tuple(str, str.length()); 464 } 465 466 public static PyTuple unicode_internal_decode(String str) { 467 return unicode_internal_decode(str, null); 468 } 469 470 public static PyTuple unicode_internal_decode(String str, String errors) { 471 return codec_tuple(str, str.length()); 472 } 473 474 } 475 476 | Popular Tags |