KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > batik > util > io > UTF8Decoder


1 /*
2
3    Copyright 1999-2003 The Apache Software Foundation
4
5    Licensed under the Apache License, Version 2.0 (the "License");
6    you may not use this file except in compliance with the License.
7    You may obtain a copy of the License at
8
9        http://www.apache.org/licenses/LICENSE-2.0
10
11    Unless required by applicable law or agreed to in writing, software
12    distributed under the License is distributed on an "AS IS" BASIS,
13    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14    See the License for the specific language governing permissions and
15    limitations under the License.
16
17 */

18
19 package org.apache.batik.util.io;
20
21 import java.io.IOException JavaDoc;
22 import java.io.InputStream JavaDoc;
23
24 /**
25  * This class represents an object which decodes UTF-8 characters from
26  * a stream of bytes.
27  *
28  * @author <a HREF="mailto:stephane@hillion.org">Stephane Hillion</a>
29  * @version $Id: UTF8Decoder.java,v 1.4 2004/10/30 18:38:06 deweese Exp $
30  */

31 public class UTF8Decoder extends AbstractCharDecoder {
32     
33     /**
34      * The number of bytes of a UTF-8 sequence indexed by the first
35      * byte of the sequence.
36      */

37     protected final static byte[] UTF8_BYTES = {
38         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
39         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
40         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
41         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
42         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
43         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
44         2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
45         3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
46     };
47
48     /**
49      * The next char, in case of a 4 bytes sequence.
50      */

51     protected int nextChar = -1;
52
53     /**
54      * Creates a new UTF8Decoder.
55      */

56     public UTF8Decoder(InputStream JavaDoc is) {
57         super(is);
58     }
59
60     /**
61      * Reads the next character.
62      * @return a character or END_OF_STREAM.
63      */

64     public int readChar() throws IOException JavaDoc {
65         if (nextChar != -1) {
66             int result = nextChar;
67             nextChar = -1;
68             return result;
69         }
70         if (position == count) {
71             fillBuffer();
72         }
73         if (count == -1) {
74             return END_OF_STREAM;
75         }
76         int b1 = buffer[position++] & 0xff;
77         switch (UTF8_BYTES[b1]) {
78         default:
79             charError("UTF-8");
80
81         case 1:
82             return b1;
83
84         case 2:
85             if (position == count) {
86                 fillBuffer();
87             }
88             if (count == -1) {
89                 endOfStreamError("UTF-8");
90             }
91             return ((b1 & 0x1f) << 6) | (buffer[position++] & 0x3f);
92
93         case 3:
94             if (position == count) {
95                 fillBuffer();
96             }
97             if (count == -1) {
98                 endOfStreamError("UTF-8");
99             }
100             int b2 = buffer[position++];
101             if (position == count) {
102                 fillBuffer();
103             }
104             if (count == -1) {
105                 endOfStreamError("UTF-8");
106             }
107             int b3 = buffer[position++];
108             if ((b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80) {
109                 charError("UTF-8");
110             }
111             return ((b1 & 0x1f) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x1f);
112
113         case 4:
114             if (position == count) {
115                 fillBuffer();
116             }
117             if (count == -1) {
118                 endOfStreamError("UTF-8");
119             }
120             b2 = buffer[position++];
121             if (position == count) {
122                 fillBuffer();
123             }
124             if (count == -1) {
125                 endOfStreamError("UTF-8");
126             }
127             b3 = buffer[position++];
128             if (position == count) {
129                 fillBuffer();
130             }
131             if (count == -1) {
132                 endOfStreamError("UTF-8");
133             }
134             int b4 = buffer[position++];
135             if ((b2 & 0xc0) != 0x80 ||
136                 (b3 & 0xc0) != 0x80 ||
137                 (b4 & 0xc0) != 0x80) {
138                 charError("UTF-8");
139             }
140             int c = ((b1 & 0x1f) << 18)
141                 | ((b2 & 0x3f) << 12)
142                 | ((b3 & 0x1f) << 6)
143                 | (b4 & 0x1f);
144             nextChar = (c - 0x10000) % 0x400 + 0xdc00;
145             return (c - 0x10000) / 0x400 + 0xd800;
146         }
147     }
148 }
149
Popular Tags