KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > go > trove > io > UnicodeReader


1 /* ====================================================================
2  * Trove - Copyright (c) 1997-2000 Walt Disney Internet Group
3  * ====================================================================
4  * The Tea Software License, Version 1.1
5  *
6  * Copyright (c) 2000 Walt Disney Internet Group. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Walt Disney Internet Group (http://opensource.go.com/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Tea", "TeaServlet", "Kettle", "Trove" and "BeanDoc" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact opensource@dig.com.
31  *
32  * 5. Products derived from this software may not be called "Tea",
33  * "TeaServlet", "Kettle" or "Trove", nor may "Tea", "TeaServlet",
34  * "Kettle", "Trove" or "BeanDoc" appear in their name, without prior
35  * written permission of the Walt Disney Internet Group.
36  *
37  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40  * DISCLAIMED. IN NO EVENT SHALL THE WALT DISNEY INTERNET GROUP OR ITS
41  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
42  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
43  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
44  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
45  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48  * ====================================================================
49  *
50  * For more information about Tea, please see http://opensource.go.com/.
51  */

52
53 package com.go.trove.io;
54
55 import java.io.*;
56
57 /******************************************************************************
58  * This reader handles unicode escapes in a character stream as defined by
59  * <i>The Java Language Specification</i>.
60  *
61  * <p>A unicode escape consists of six characters: '\' and 'u' followed by
62  * four hexadecimal digits. If the format of the escape is not correct, then
63  * the escape is unprocessed. To prevent a correctly formatted unicode escape
64  * from being processed, preceed it with another '\'.
65  *
66  * @author Brian S O'Neill
67  * @version
68  * <!--$$Revision:--> 20 <!-- $-->, <!--$$JustDate:--> 12/11/00 <!-- $-->
69  */

70 public class UnicodeReader extends EscapeReader {
71     /** Just a temporary buffer for holding the four hexadecimal digits. */
72     private char[] mMinibuf = new char[4];
73
74     private boolean mEscaped;
75
76     /**
77      * A UnicodeReader needs an underlying source Reader.
78      *
79      * @param source the source PositionReader
80      */

81     public UnicodeReader(Reader source) {
82         super(source, 6);
83     }
84
85     public int read() throws IOException {
86         int c = mSource.read();
87
88         if (c != '\\' || !mEscapesEnabled) {
89             mEscaped = false;
90             return c;
91         }
92
93         c = mSource.read();
94
95         // Have scanned "\\"? (two backslashes)
96
if (c == '\\') {
97             mEscaped = !mEscaped;
98             mSource.unread();
99             return '\\';
100         }
101
102         // Have not scanned '\', 'u'?
103
if (c != 'u') {
104             mSource.unread();
105             return '\\';
106         }
107
108         // At this point, have scanned '\', 'u'.
109

110         // If previously escaped, then don't process unicode escape.
111
if (mEscaped) {
112             mEscaped = false;
113             mSource.unread();
114             return '\\';
115         }
116
117         int len = mSource.read(mMinibuf, 0, 4);
118         
119         if (len == 4) {
120             try {
121                 int val =
122                     Integer.valueOf(new String JavaDoc(mMinibuf, 0, 4), 16).intValue();
123
124                 return val;
125             }
126             catch (NumberFormatException JavaDoc e) {
127                 // If the number is not a parseable as hexadecimal, then
128
// treat this as a bad format and do not process the
129
// unicode escape.
130
}
131         }
132
133         // Unread the four hexadecimal characters and the leading 'u'.
134
if (len >= 0) {
135             mSource.unread(len + 1);
136         }
137
138         return '\\';
139     }
140
141     public static void main(String JavaDoc[] arg) throws Exception JavaDoc {
142         Tester.test(arg);
143     }
144
145     private static class Tester {
146         public static void test(String JavaDoc[] arg) throws Exception JavaDoc {
147             String JavaDoc str =
148                 "This is \\" + "u0061 test.\n" +
149                 "This is \\" + "u00612 test.\n" +
150                 "This is \\" + "u0061" + "\\" + "u0061" + " test.\n" +
151                 "This is \\" + "u061 test.\n" +
152                 "This is \\\\" + "u0061 test.\n" +
153                 "This is \\" + "a test.\n";
154
155             System.out.println("\nOriginal:\n");
156             
157             Reader reader = new StringReader(str);
158
159             int c;
160             while ( (c = reader.read()) >= 0 ) {
161                 System.out.print((char)c);
162             }
163
164             System.out.println("\nConverted:\n");
165             
166             reader = new StringReader(str);
167             reader = new UnicodeReader(reader);
168
169             while ( (c = reader.read()) != -1 ) {
170                 System.out.print((char)c);
171             }
172
173             System.out.println("\nUnread test 1:\n");
174             
175             reader = new StringReader(str);
176             PushbackPositionReader pr =
177                 new PushbackPositionReader(new UnicodeReader(reader), 1);
178
179             while ( (c = pr.read()) != -1 ) {
180                 pr.unread();
181                 c = pr.read();
182                 System.out.print((char)c);
183             }
184
185             System.out.println("\nUnread test 2:\n");
186             
187             reader = new StringReader(str);
188             pr = new PushbackPositionReader(new UnicodeReader(reader), 2);
189
190             int i = 0;
191             while ( (c = pr.read()) != -1 ) {
192                 if ( (i++ % 5) == 0 ) {
193                     c = pr.read();
194                     pr.unread();
195                     pr.unread();
196                     c = pr.read();
197                 }
198
199                 System.out.print((char)c);
200             }
201
202             System.out.println("\nUnread position test:\n");
203
204             reader = new StringReader(str);
205             pr = new PushbackPositionReader(new UnicodeReader(reader), 2);
206
207             System.out.print(pr.getNextPosition() + "\t");
208             i = 0;
209             while ( (c = pr.read()) != -1 ) {
210                 if ( (i++ % 5) == 0 ) {
211                     c = pr.read();
212                     pr.unread();
213                     pr.unread();
214                     c = pr.read();
215                 }
216
217                 System.out.println((char)c);
218                 System.out.print(pr.getNextPosition() + "\t");
219             }
220         }
221     }
222 }
223
Popular Tags