KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > xml > readers > Utf8Reader


1 /*
2  * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3  *
4  * This file is part of Resin(R) Open Source
5  *
6  * Each copy or derived work must preserve the copyright notice and this
7  * notice unmodified.
8  *
9  * Resin Open Source is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * Resin Open Source is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17  * of NON-INFRINGEMENT. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with Resin Open Source; if not, write to the
22  * Free SoftwareFoundation, Inc.
23  * 59 Temple Place, Suite 330
24  * Boston, MA 02111-1307 USA
25  *
26  * @author Scott Ferguson
27  */

28
29 package com.caucho.xml.readers;
30
31 import com.caucho.util.CharBuffer;
32 import com.caucho.vfs.ReadStream;
33 import com.caucho.xml.XmlParser;
34
35 import java.io.CharConversionException JavaDoc;
36 import java.io.EOFException JavaDoc;
37 import java.io.IOException JavaDoc;
38
39 /**
40  * A fast reader to convert bytes to characters for parsing XML.
41  */

42 public class Utf8Reader extends XmlReader {
43   /**
44    * Create a new reader.
45    */

46   public Utf8Reader()
47   {
48   }
49
50   /**
51    * Create a new reader with the given read stream.
52    */

53   public Utf8Reader(XmlParser parser, ReadStream is)
54   {
55     super(parser, is);
56   }
57
58   /**
59    * Read the next character, returning -1 on end of file..
60    */

61   public int read()
62     throws IOException JavaDoc
63   {
64     int ch1 = _is.read();
65
66     if (ch1 == '\n') {
67       _parser.setLine(++_line);
68       return ch1;
69     }
70     else if (ch1 == '\r') {
71       _parser.setLine(++_line);
72
73       int ch2 = _is.read();
74       if (ch2 == '\n')
75         return '\n';
76
77       if (ch2 < 0) {
78       }
79       else if (ch2 < 0x80)
80         _parser.unread(ch2);
81       else
82         _parser.unread(readSecond(ch2));
83       
84       return '\n';
85     }
86     else if (ch1 < 0x80)
87       return ch1;
88     else
89       return readSecond(ch1);
90   }
91     
92   private int readSecond(int ch1)
93     throws IOException JavaDoc
94   {
95     if ((ch1 & 0xe0) == 0xc0) {
96       int ch2 = _is.read();
97       if (ch2 < 0)
98         throw new EOFException JavaDoc("unexpected end of file in utf8 character");
99       else if ((ch2 & 0xc0) != 0x80)
100         throw error(L.l("illegal utf8 encoding {0}", hex(ch1)));
101       
102       return ((ch1 & 0x1f) << 6) + (ch2 & 0x3f);
103     }
104     else if ((ch1 & 0xf0) == 0xe0) {
105       int ch2 = _is.read();
106       int ch3 = _is.read();
107       
108       if (ch2 < 0)
109         throw new EOFException JavaDoc("unexpected end of file in utf8 character");
110       else if ((ch2 & 0xc0) != 0x80)
111         throw error(L.l("illegal utf8 encoding at {0} {1} {2}", hex(ch1), hex(ch2), hex(ch3)));
112       
113       if (ch3 < 0)
114         throw new EOFException JavaDoc("unexpected end of file in utf8 character");
115       else if ((ch3 & 0xc0) != 0x80)
116         throw error(L.l("illegal utf8 encoding {0} {1} {2}",
117                         hex(ch1), hex(ch2), hex(ch3)));
118
119       int ch = ((ch1 & 0x1f) << 12) + ((ch2 & 0x3f) << 6) + (ch3 & 0x3f);
120
121       if (ch == 0xfeff) // handle some writers, e.g. microsoft
122
return read();
123       else
124         return ch;
125     }
126     else
127       throw error(L.l("illegal utf8 encoding at {0}", hex(ch1)));
128   }
129
130   private String JavaDoc hex(int n)
131   {
132     n = n & 0xff;
133     
134     CharBuffer cb = CharBuffer.allocate();
135
136     cb.append("0x");
137
138     int d = n / 16;
139     if (d >= 0 && d <= 9)
140       cb.append((char) ('0' + d));
141     else
142       cb.append((char) ('a' + d - 10));
143     
144     d = n % 16;
145     if (d >= 0 && d <= 9)
146       cb.append((char) ('0' + d));
147     else
148       cb.append((char) ('a' + d - 10));
149
150     return cb.close();
151   }
152
153   private CharConversionException JavaDoc error(String JavaDoc msg)
154   {
155     String JavaDoc filename = _parser.getFilename();
156     int line = _parser.getLine();
157
158     if (filename != null)
159       return new CharConversionException JavaDoc(filename + ":" + line + ": " + msg);
160     else
161       return new CharConversionException JavaDoc(msg);
162   }
163 }
164
165
Popular Tags