KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > util > ms > Cp1252


1 /* Cp1252
2 *
3 * Created on September 12, 2006
4 *
5 * Copyright (C) 2006 Internet Archive.
6 *
7 * This file is part of the Heritrix web crawler (crawler.archive.org).
8 *
9 * Heritrix is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * any later version.
13 *
14 * Heritrix is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser Public License
20 * along with Heritrix; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */

23 package org.archive.util.ms;
24
25
26 import java.io.UnsupportedEncodingException JavaDoc;
27
28
29 /**
30  * A fast implementation of code page 1252. This is used to convert bytes
31  * to characters in .doc files that don't use unicode.
32  *
33  * <p>The Java Charset APIs seemed like overkill for these translations,
34  * since 1 byte always translates into 1 character.
35  *
36  * @author pjack
37  */

38 public class Cp1252 {
39
40
41     /**
42      * The translation table. If x is an unsigned byte from a .doc
43      * text stream, then XLAT[x] is the Unicode character that byte
44      * represents.
45      */

46     final private static char[] XLAT = createTable();
47
48
49     /**
50      * Static utility library, do not instantiate.
51      */

52     private Cp1252() {
53     }
54
55
56     /**
57      * Generates the translation table. The Java String API is used for each
58      * possible byte to determine the corresponding Unicode character.
59      *
60      * @return the Cp1252 translation table
61      */

62     private static char[] createTable() {
63         char[] result = new char[256];
64         byte[] b = new byte[1];
65         for (int i = 0; i < 256; i++) try {
66             b[0] = (byte)i;
67             String JavaDoc s = new String JavaDoc(b, "Cp1252");
68             result[i] = s.charAt(0);
69         } catch (UnsupportedEncodingException JavaDoc e) {
70             throw new RuntimeException JavaDoc(e);
71         }
72         return result;
73     }
74
75
76     /**
77      * Returns the Unicode character for the given Cp1252 byte.
78      *
79      * @param b an unsigned byte from 0 to 255
80      * @return the Unicode character corresponding to that byte
81      */

82     public static char decode(int b) {
83         return XLAT[b];
84     }
85
86
87 }
88
Popular Tags