KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > tomcat > util > buf > UTF8Decoder


1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18 package org.apache.tomcat.util.buf;
19
20 import java.io.IOException JavaDoc;
21
22 /**
23  * Moved from ByteChunk - code to convert from UTF8 bytes to chars.
24  * Not used in the current tomcat3.3 : the performance gain is not very
25  * big if the String is created, only if we avoid that and work only
26  * on char[]. Until than, it's better to be safe. ( I tested this code
27  * with 2 and 3 bytes chars, and it works fine in xerces )
28  *
29  * Cut from xerces' UTF8Reader.copyMultiByteCharData()
30  *
31  * @author Costin Manolache
32  * @author ( Xml-Xerces )
33  */

34 public final class UTF8Decoder extends B2CConverter {
35     
36     
37     private static org.apache.commons.logging.Log log=
38         org.apache.commons.logging.LogFactory.getLog(UTF8Decoder.class );
39     
40     // may have state !!
41

42     public UTF8Decoder() {
43
44     }
45     
46     public void recycle() {
47     }
48
49     public void convert(ByteChunk mb, CharChunk cb )
50     throws IOException JavaDoc
51     {
52     int bytesOff=mb.getOffset();
53     int bytesLen=mb.getLength();
54     byte bytes[]=mb.getBytes();
55     
56     int j=bytesOff;
57     int end=j+bytesLen;
58
59     while( j< end ) {
60         int b0=0xff & bytes[j];
61
62         if( (b0 & 0x80) == 0 ) {
63         cb.append((char)b0);
64         j++;
65         continue;
66         }
67         
68         // 2 byte ?
69
if( j++ >= end ) {
70         // ok, just ignore - we could throw exception
71
throw new IOException JavaDoc( "Conversion error - EOF " );
72         }
73         int b1=0xff & bytes[j];
74         
75         // ok, let's the fun begin - we're handling UTF8
76
if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx (0x80 to 0x7ff)
77
int ch = ((0x1f & b0)<<6) + (0x3f & b1);
78         if(debug>0)
79             log("Convert " + b0 + " " + b1 + " " + ch + ((char)ch));
80         
81         cb.append((char)ch);
82         j++;
83         continue;
84         }
85         
86         if( j++ >= end )
87         return ;
88         int b2=0xff & bytes[j];
89         
90         if( (b0 & 0xf0 ) == 0xe0 ) {
91         if ((b0 == 0xED && b1 >= 0xA0) ||
92             (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
93             if(debug>0)
94             log("Error " + b0 + " " + b1+ " " + b2 );
95
96             throw new IOException JavaDoc( "Conversion error 2");
97         }
98
99         int ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
100         cb.append((char)ch);
101         if(debug>0)
102             log("Convert " + b0 + " " + b1+ " " + b2 + " " + ch +
103             ((char)ch));
104         j++;
105         continue;
106         }
107
108         if( j++ >= end )
109         return ;
110         int b3=0xff & bytes[j];
111
112         if (( 0xf8 & b0 ) == 0xf0 ) {
113         if (b0 > 0xF4 || (b0 == 0xF4 && b1 >= 0x90)) {
114             if(debug>0)
115             log("Convert " + b0 + " " + b1+ " " + b2 + " " + b3);
116             throw new IOException JavaDoc( "Conversion error ");
117         }
118         int ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12) +
119             ((0x3f & b2)<<6) + (0x3f & b3);
120
121         if(debug>0)
122             log("Convert " + b0 + " " + b1+ " " + b2 + " " + b3 + " " +
123             ch + ((char)ch));
124
125         if (ch < 0x10000) {
126             cb.append( (char)ch );
127         } else {
128             cb.append((char)(((ch-0x00010000)>>10)+
129                            0xd800));
130             cb.append((char)(((ch-0x00010000)&0x3ff)+
131                            0xdc00));
132         }
133         j++;
134         continue;
135         } else {
136         // XXX Throw conversion exception !!!
137
if(debug>0)
138             log("Convert " + b0 + " " + b1+ " " + b2 + " " + b3);
139         throw new IOException JavaDoc( "Conversion error 4" );
140         }
141     }
142     }
143
144     private static int debug=1;
145     void log(String JavaDoc s ) {
146         if (log.isDebugEnabled())
147             log.debug("UTF8Decoder: " + s );
148     }
149     
150 }
151
Popular Tags