KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > python > modules > _codecs


1 /*
2  * Copyright 2000 Finn Bock
3  *
4  * This program contains material copyrighted by:
5  * Copyright (c) Corporation for National Research Initiatives.
6  * Originally written by Marc-Andre Lemburg (mal@lemburg.com).
7  */

8
9 package org.python.modules;
10
11 import org.python.core.*;
12 import org.python.core.imp;
13
14 public class _codecs {
15
16     public static void register(PyObject search_function) {
17         codecs.register(search_function);
18     }
19
20
21     public static PyTuple lookup(String JavaDoc encoding) {
22         return codecs.lookup(encoding);
23     }
24
25
26
27
28     private static PyTuple codec_tuple(String JavaDoc s, int len) {
29         return new PyTuple(new PyObject[] {
30             Py.java2py(s),
31             Py.newInteger(len)
32         });
33     }
34
35
36     /* --- UTF-8 Codec --------------------------------------------------- */
37
38     public static PyTuple utf_8_decode(String JavaDoc str) {
39         return utf_8_decode(str, null);
40     }
41
42     public static PyTuple utf_8_decode(String JavaDoc str, String JavaDoc errors) {
43         int size = str.length();
44         return codec_tuple(codecs.PyUnicode_DecodeUTF8(str, errors), size);
45     }
46
47
48     public static PyTuple utf_8_encode(String JavaDoc str) {
49         return utf_8_encode(str, null);
50     }
51
52     public static PyTuple utf_8_encode(String JavaDoc str, String JavaDoc errors) {
53         int size = str.length();
54         return codec_tuple(codecs.PyUnicode_EncodeUTF8(str, errors), size);
55     }
56
57
58
59     /* --- Character Mapping Codec --------------------------------------- */
60
61     public static PyTuple charmap_decode(String JavaDoc str, String JavaDoc errors,
62                                          PyObject mapping) {
63         int size = str.length();
64         StringBuffer JavaDoc v = new StringBuffer JavaDoc(size);
65
66         for (int i = 0; i < size; i++) {
67             char ch = str.charAt(i);
68             if (ch > 0xFF) {
69                 codecs.decoding_error("charmap", v, errors,
70                                       "ordinal not in range(255)");
71                 i++;
72                 continue;
73             }
74
75             PyObject w = Py.newInteger(ch);
76             PyObject x = mapping.__finditem__(w);
77             if (x == null) {
78                 /* No mapping found: default to Latin-1 mapping if possible */
79                 v.append(ch);
80                 continue;
81             }
82
83             /* Apply mapping */
84             if (x instanceof PyInteger) {
85                 int value = ((PyInteger) x).getValue();
86                 if (value < 0 || value > 65535)
87                     throw Py.TypeError(
88                              "character mapping must be in range(65535)");
89                 v.append((char) value);
90             } else if (x == Py.None) {
91                 codecs.decoding_error("charmap", v, errors,
92                                       "character maps to <undefined>");
93             } else if (x instanceof PyString) {
94                 v.append(x.toString());
95             }
96             else {
97                 /* wrong return value */
98                 throw Py.TypeError("character mapping must return integer, " +
99                                    "None or unicode");
100             }
101         }
102         return codec_tuple(v.toString(), size);
103     }
104
105
106
107
108
109     public static PyTuple charmap_encode(String JavaDoc str, String JavaDoc errors,
110                                          PyObject mapping) {
111         int size = str.length();
112         StringBuffer JavaDoc v = new StringBuffer JavaDoc(size);
113
114         for (int i = 0; i < size; i++) {
115             char ch = str.charAt(i);
116             PyObject w = Py.newInteger(ch);
117             PyObject x = mapping.__finditem__(w);
118             if (x == null) {
119                 /* No mapping found: default to Latin-1 mapping if possible */
120                 if (ch < 256)
121                     v.append(ch);
122                 else
123                     codecs.encoding_error("charmap", v, errors,
124                                           "missing character mapping");
125                 continue;
126             }
127             if (x instanceof PyInteger) {
128                 int value = ((PyInteger) x).getValue();
129                 if (value < 0 || value > 255)
130                     throw Py.TypeError(
131                             "character mapping must be in range(256)");
132                 v.append((char) value);
133             } else if (x == Py.None) {
134                 codecs.encoding_error("charmap", v, errors,
135                                       "character maps to <undefined>");
136             } else if (x instanceof PyString) {
137                 v.append(x.toString());
138             }
139             else {
140                 /* wrong return value */
141                 throw Py.TypeError("character mapping must return " +
142                                    "integer, None or unicode");
143             }
144         }
145         return codec_tuple(v.toString(), size);
146     }
147
148
149
150     /* --- 7-bit ASCII Codec -------------------------------------------- */
151
152     public static PyTuple ascii_decode(String JavaDoc str) {
153         return ascii_decode(str, null);
154     }
155
156     public static PyTuple ascii_decode(String JavaDoc str, String JavaDoc errors) {
157         int size = str.length();
158         return codec_tuple(codecs.PyUnicode_DecodeASCII(str, size, errors),
159                                                                         size);
160     }
161
162
163     public static PyTuple ascii_encode(String JavaDoc str) {
164         return ascii_encode(str, null);
165     }
166
167     public static PyTuple ascii_encode(String JavaDoc str, String JavaDoc errors) {
168         int size = str.length();
169         return codec_tuple(codecs.PyUnicode_EncodeASCII(str, size, errors),
170                                                                         size);
171     }
172
173
174     /* --- Latin-1 Codec -------------------------------------------- */
175
176     public static PyTuple latin_1_decode(String JavaDoc str) {
177         return latin_1_decode(str, null);
178     }
179
180     public static PyTuple latin_1_decode(String JavaDoc str, String JavaDoc errors) {
181         int size = str.length();
182         StringBuffer JavaDoc v = new StringBuffer JavaDoc(size);
183
184         for (int i = 0; i < size; i++) {
185             char ch = str.charAt(i);
186             if (ch < 256) {
187                 v.append(ch);
188             } else {
189                 codecs.decoding_error("latin-1", v, errors,
190                                       "ordinal not in range(256)");
191                 i++;
192                 continue;
193             }
194         }
195
196         return codec_tuple(v.toString(), size);
197     }
198
199
200     public static PyTuple latin_1_encode(String JavaDoc str) {
201         return latin_1_encode(str, null);
202     }
203
204     public static PyTuple latin_1_encode(String JavaDoc str, String JavaDoc errors) {
205         int size = str.length();
206         StringBuffer JavaDoc v = new StringBuffer JavaDoc(size);
207
208         for (int i = 0; i < size; i++) {
209             char ch = str.charAt(i);
210             if (ch >= 256) {
211                 codecs.encoding_error("latin-1", v, errors,
212                                       "ordinal not in range(256)");
213             } else
214                 v.append(ch);
215         }
216         return codec_tuple(v.toString(), size);
217     }
218
219
220     /* --- UTF16 Codec -------------------------------------------- */
221
222
223     public static PyTuple utf_16_encode(String JavaDoc str) {
224         return utf_16_encode(str, null);
225     }
226
227     public static PyTuple utf_16_encode(String JavaDoc str, String JavaDoc errors) {
228         return codec_tuple(encode_UTF16(str, errors, 0), str.length());
229     }
230
231     public static PyTuple utf_16_encode(String JavaDoc str, String JavaDoc errors,
232                                        int byteorder) {
233         return codec_tuple(encode_UTF16(str, errors, byteorder),
234                            str.length());
235     }
236
237     public static PyTuple utf_16_le_encode(String JavaDoc str) {
238         return utf_16_le_encode(str, null);
239     }
240
241     public static PyTuple utf_16_le_encode(String JavaDoc str, String JavaDoc errors) {
242         return codec_tuple(encode_UTF16(str, errors, -1), str.length());
243     }
244
245     public static PyTuple utf_16_be_encode(String JavaDoc str) {
246         return utf_16_be_encode(str, null);
247     }
248
249     public static PyTuple utf_16_be_encode(String JavaDoc str, String JavaDoc errors) {
250         return codec_tuple(encode_UTF16(str, errors, 1), str.length());
251     }
252
253
254     private static String JavaDoc encode_UTF16(String JavaDoc str, String JavaDoc errors,
255                                       int byteorder) {
256         int size = str.length();
257         StringBuffer JavaDoc v = new StringBuffer JavaDoc((size +
258                                        (byteorder == 0 ? 1 : 0)) * 2);
259
260         if (byteorder == 0) {
261             v.append((char) 0xFE);
262             v.append((char) 0xFF);
263         }
264
265         if (byteorder == 0 || byteorder == 1)
266             for (int i = 0; i < size; i++) {
267                 char ch = str.charAt(i);
268                 v.append((char) ((ch >>> 8) & 0xFF));
269                 v.append((char) (ch & 0xFF));
270             }
271         else {
272             for (int i = 0; i < size; i++) {
273                 char ch = str.charAt(i);
274                 v.append((char) (ch & 0xFF));
275                 v.append((char) ((ch >>> 8) & 0xFF));
276             }
277         }
278
279         return v.toString();
280     }
281
282
283
284
285     public static PyTuple utf_16_decode(String JavaDoc str) {
286         return utf_16_decode(str, null);
287     }
288
289     public static PyTuple utf_16_decode(String JavaDoc str, String JavaDoc errors) {
290         int[] bo = new int[] { 0 };
291         return codec_tuple(decode_UTF16(str, errors, bo), str.length());
292     }
293
294     public static PyTuple utf_16_decode(String JavaDoc str, String JavaDoc errors,
295                                         int byteorder) {
296         int[] bo = new int[] { byteorder };
297         return codec_tuple(decode_UTF16(str, errors, bo), str.length());
298     }
299
300     public static PyTuple utf_16_le_decode(String JavaDoc str) {
301         return utf_16_le_decode(str, null);
302     }
303
304     public static PyTuple utf_16_le_decode(String JavaDoc str, String JavaDoc errors) {
305         int[] bo = new int[] { -1 };
306         return codec_tuple(decode_UTF16(str, errors, bo), str.length());
307     }
308
309     public static PyTuple utf_16_be_decode(String JavaDoc str) {
310         return utf_16_be_decode(str, null);
311     }
312
313     public static PyTuple utf_16_be_decode(String JavaDoc str, String JavaDoc errors) {
314         int[] bo = new int[] { 1 };
315         return codec_tuple(decode_UTF16(str, errors, bo), str.length());
316     }
317
318     public static PyTuple utf_16_ex_decode(String JavaDoc str) {
319         return utf_16_ex_decode(str, null);
320     }
321
322     public static PyTuple utf_16_ex_decode(String JavaDoc str, String JavaDoc errors) {
323         return utf_16_ex_decode(str, errors, 0);
324     }
325
326     public static PyTuple utf_16_ex_decode(String JavaDoc str, String JavaDoc errors,
327                                            int byteorder) {
328         int[] bo = new int[] { 0 };
329         String JavaDoc s = decode_UTF16(str, errors, bo);
330         return new PyTuple(new PyObject[] {
331              Py.newString(s),
332              Py.newInteger(str.length()),
333              Py.newInteger(bo[0])
334         });
335     }
336
337     private static String JavaDoc decode_UTF16(String JavaDoc str, String JavaDoc errors,
338                                        int[] byteorder) {
339         int bo = 0;
340         if (byteorder != null)
341              bo = byteorder[0];
342
343         int size = str.length();
344
345         if (size % 2 != 0)
346             codecs.decoding_error("UTF16", null, errors, "truncated data");
347
348         StringBuffer JavaDoc v = new StringBuffer JavaDoc(size/2);
349
350         for (int i = 0; i < size; i += 2) {
351             char ch1 = str.charAt(i);
352             char ch2 = str.charAt(i+1);
353             if (ch1 == 0xFE && ch2 == 0xFF) {
354                 bo = 1;
355                 continue;
356             } else if (ch1 == 0xFF && ch2 == 0xFE) {
357                 bo = -1;
358                 continue;
359             }
360
361             char ch;
362             if (bo == -1)
363                 ch = (char) (ch2 << 8 | ch1);
364             else
365                 ch = (char) (ch1 << 8 | ch2);
366
367             if (ch < 0xD800 || ch > 0xDFFF) {
368                 v.append(ch);
369                 continue;
370             }
371
372
373             /* UTF-16 code pair: */
374             if (i == size-1) {
375                 codecs.decoding_error("UTF-16", v, errors,
376                                       "unexpected end of data");
377                 continue;
378             }
379
380             ch = str.charAt(++i);
381             if (0xDC00 <= ch && ch <= 0xDFFF) {
382                 ch = str.charAt(++i);
383                 if (0xD800 <= ch && ch <= 0xDBFF)
384                     /* This is valid data (a UTF-16 surrogate pair), but
385                        we are not able to store this information since our
386                        Py_UNICODE type only has 16 bits... this might
387                        change someday, even though it's unlikely. */

388                     codecs.decoding_error("UTF-16", v, errors,
389                                           "code pairs are not supported");
390                 continue;
391             }
392             codecs.decoding_error("UTF-16", v, errors, "illegal encoding");
393         }
394
395         if (byteorder != null)
396             byteorder[0] = bo;
397
398         return v.toString();
399     }
400
401
402
403     /* --- RawUnicodeEscape Codec ----------------------------------------- */
404
405
406     public static PyTuple raw_unicode_escape_encode(String JavaDoc str) {
407         return raw_unicode_escape_encode(str, null);
408     }
409
410     public static PyTuple raw_unicode_escape_encode(String JavaDoc str,
411                                                    String JavaDoc errors) {
412         return codec_tuple(codecs.PyUnicode_EncodeRawUnicodeEscape(str,
413                                                              errors, false),
414                            str.length());
415     }
416
417
418     public static PyTuple raw_unicode_escape_decode(String JavaDoc str) {
419         return raw_unicode_escape_decode(str, null);
420     }
421
422     public static PyTuple raw_unicode_escape_decode(String JavaDoc str,
423                                                     String JavaDoc errors) {
424         return codec_tuple(codecs.PyUnicode_DecodeRawUnicodeEscape(str,
425                                                              errors),
426                            str.length());
427     }
428
429
430
431     /* --- UnicodeEscape Codec -------------------------------------------- */
432
433
434     public static PyTuple unicode_escape_encode(String JavaDoc str) {
435         return unicode_escape_encode(str, null);
436     }
437
438     public static PyTuple unicode_escape_encode(String JavaDoc str, String JavaDoc errors) {
439         return codec_tuple(PyString.encode_UnicodeEscape(str, false),
440                            str.length());
441     }
442
443     public static PyTuple unicode_escape_decode(String JavaDoc str) {
444         return unicode_escape_decode(str, null);
445     }
446
447     public static PyTuple unicode_escape_decode(String JavaDoc str, String JavaDoc errors) {
448         int n = str.length();
449         return codec_tuple(PyString.decode_UnicodeEscape(str,
450                                                      0, n, errors, true), n);
451     }
452
453
454
455     /* --- UnicodeInternal Codec ------------------------------------------ */
456
457
458     public static PyTuple unicode_internal_encode(String JavaDoc str) {
459         return unicode_internal_encode(str, null);
460     }
461
462     public static PyTuple unicode_internal_encode(String JavaDoc str, String JavaDoc errors) {
463         return codec_tuple(str, str.length());
464     }
465
466     public static PyTuple unicode_internal_decode(String JavaDoc str) {
467         return unicode_internal_decode(str, null);
468     }
469
470     public static PyTuple unicode_internal_decode(String JavaDoc str, String JavaDoc errors) {
471         return codec_tuple(str, str.length());
472     }
473
474 }
475
476
Popular Tags