KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > fri > util > io > UnicodeReader


1 package fri.util.io;
2
3 import java.io.*;
4
5 /**
6  * Reads away UNICODE Byte Order Mark on construction. See
7  * http://www.unicode.org/unicode/faq/utf_bom.html
8  *
9  * <pre>
10  * 00 00 FE FF = UTF-32, big-endian
11  * FF FE 00 00 = UTF-32, little-endian
12  * FE FF = UTF-16, big-endian
13  * FF FE = UTF-16, little-endian
14  * EF BB BF = UTF-8
15  * </pre>
16  */

17 public class UnicodeReader extends Reader
18 {
19     private static final int BOM_MAX_SIZE = 4;
20
21     private InputStreamReader delegate;
22
23     public UnicodeReader(InputStream in) throws IOException {
24         init(in, null);
25     }
26
27     public UnicodeReader(InputStream in, String JavaDoc defaultEnc) throws IOException {
28         init(in, defaultEnc);
29     }
30
31     /** Returns the encoding that was read from byte order mark if there was one. */
32     public String JavaDoc getEncoding() {
33         return delegate.getEncoding();
34     }
35
36     /**
37      * Read-ahead four bytes and check for BOM marks. Extra bytes are unread back
38      * to the stream, only BOM bytes are skipped.
39      */

40     private void init(InputStream in, String JavaDoc defaultEnc) throws IOException {
41         String JavaDoc encoding;
42         byte bom[] = new byte[BOM_MAX_SIZE];
43         int n, unread;
44         PushbackInputStream internalIn = new PushbackInputStream(in, BOM_MAX_SIZE);
45         n = internalIn.read(bom, 0, bom.length);
46
47         if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
48             encoding = "UTF-8";
49             unread = n - 3;
50         }
51         else
52         if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
53             encoding = "UTF-16BE";
54             unread = n - 2;
55         }
56         else
57         if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
58             encoding = "UTF-16LE";
59             unread = n - 2;
60         }
61         else
62         if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
63             encoding = "UTF-32BE";
64             unread = n - 4;
65         }
66         else
67         if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
68             encoding = "UTF-32LE";
69             unread = n - 4;
70         }
71         else {
72             // Unicode BOM mark not found, unread all bytes
73
encoding = defaultEnc;
74             unread = n;
75         }
76
77         if (unread > 0)
78             internalIn.unread(bom, (n - unread), unread);
79         else
80         if (unread < -1)
81             internalIn.unread(bom, 0, 0);
82
83         // Use BOM or default encoding
84
if (encoding == null) {
85             delegate = new InputStreamReader(internalIn);
86         }
87         else {
88             delegate = new InputStreamReader(internalIn, encoding);
89         }
90     }
91
92     /** Overridden to use delegate reader. */
93     public void close() throws IOException {
94         delegate.close();
95     }
96
97     /** Overridden to use delegate reader. */
98     public int read(char[] cbuf, int off, int len) throws IOException {
99         return delegate.read(cbuf, off, len);
100     }
101
102 }
103
Popular Tags