KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > vfs > i18n > WindowsHackReader


1 /*
2  * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3  *
4  * This file is part of Resin(R) Open Source
5  *
6  * Each copy or derived work must preserve the copyright notice and this
7  * notice unmodified.
8  *
9  * Resin Open Source is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * Resin Open Source is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17  * of NON-INFRINGEMENT. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with Resin Open Source; if not, write to the
22  * Free SoftwareFoundation, Inc.
23  * 59 Temple Place, Suite 330
24  * Boston, MA 02111-1307 USA
25  *
26  * @author Scott Ferguson
27  */

28
29 package com.caucho.vfs.i18n;
30
31 import java.io.IOException JavaDoc;
32 import java.io.InputStream JavaDoc;
33 import java.io.Reader JavaDoc;
34
35 /**
36  * Implements an encoding reader to convert the stupid
37  * windows "smart" quotes into ISO-8859-1 (Latin-1) characters.
38  *
39  * <p>The windows "smart" quotes actually do map into
40  * unicode characters. If that's what you want, use
41  * the window-1521 encoding instead. windows-hack converts
42  * to the closest latin-1 equivalent.
43  *
44  * <p>The three exceptions are the elipses '...', the
45  * trademark, and the per-mille characters. Those are translated into
46  * their unicode equivalents because there isn't a useful
47  * latin-1 equivalent.
48  */

49 public class WindowsHackReader extends EncodingReader {
50   private InputStream JavaDoc is;
51
52   /**
53    * Null-arg constructor for instantiation by com.caucho.vfs.Encoding only.
54    */

55   public WindowsHackReader()
56   {
57   }
58
59   /**
60    * Create a windows-hack reader based on the readStream.
61    */

62   private WindowsHackReader(InputStream JavaDoc is)
63   {
64     this.is = is;
65   }
66
67   /**
68    * Create a windows-hack reader based on the readStream.
69    *
70    * @param is the input stream providing the bytes.
71    * @param javaEncoding the JDK name for the encoding.
72    *
73    * @return the windows-hack reader.
74    */

75   public Reader JavaDoc create(InputStream JavaDoc is, String JavaDoc javaEncoding)
76   {
77     return new WindowsHackReader(is);
78   }
79
80   /**
81    * Reads into a character buffer using the correct encoding.
82    */

83   public int read()
84     throws IOException JavaDoc
85   {
86     int ch1 = is.read();
87
88     switch (ch1) {
89     case 130: // unicode 8218
90
return ',';
91         
92     case 131: // unicode 402
93
return 'f';
94         
95     case 132: // unicode 8222
96
return '"';
97         
98     case 133: // unicode 8230 "..."
99
return 8230;
100         
101     case 134: // unicode 8224 (dagger)
102
return '+';
103         
104     case 135: // unicode 8225 (double dagger)
105
return '+';
106         
107     case 136: // unicode 710
108
return '^';
109         
110     case 137: // unicode 8240 (per-mille 0/00)
111
return 8240;
112         
113     case 138: // unicode 352
114
return 'S';
115         
116     case 139: // unicode 8249
117
return '<';
118         
119     case 140: // unicode 338 (OE)
120
return 'O';
121         
122     case 145: // unicode 8216
123
case 146: // unicode 8217
124
return '\'';
125         
126     case 147: // unicode 8220
127
case 148: // unicode 8221
128
return '"';
129         
130     case 149: // unicode 8226 (bullet)
131
return '*';
132         
133     case 150: // unicode 8211
134
case 151: // unicode 8212
135
return '-';
136         
137     case 152: // unicode 732
138
return '~';
139         
140     case 153: // unicode 8482 (trademark)
141
return 8482;
142         
143     case 154: // unicode 353
144
return 's';
145         
146     case 155: // unicode 8250
147
return '>';
148         
149     case 156: // unicode 339 (oe)
150
return 'o';
151         
152     case 376: // unicode 376 (Y with umlaut)
153
return 'Y';
154         
155     default:
156       return ch1;
157     }
158   }
159
160   /**
161    * Reads into a character buffer using the correct encoding.
162    *
163    * @param cbuf character buffer receiving the data.
164    * @param off starting offset into the buffer.
165    * @param len number of characters to read.
166    *
167    * @return the number of characters read or -1 on end of file.
168    */

169   public int read(char []cbuf, int off, int len)
170     throws IOException JavaDoc
171   {
172     int i = 0;
173
174     for (i = 0; i < len; i++) {
175       int ch = is.read();
176
177       if (ch < 0)
178     return i == 0 ? -1 : i;
179
180       switch (ch) {
181       case -1:
182     return i == 0 ? -1 : i;
183         
184       case 130: // unicode 8218
185
cbuf[off + i] = ',';
186         break;
187         
188       case 131: // unicode 402
189
cbuf[off + i] = 'f';
190         break;
191         
192       case 132: // unicode 8222
193
cbuf[off + i] = '"';
194         break;
195         
196       case 133: // unicode 8230 "..."
197
cbuf[off + i] = (char) 8230;
198         break;
199         
200       case 134: // unicode 8224 (dagger)
201
cbuf[off + i] = '+';
202         break;
203         
204       case 135: // unicode 8225 (double dagger)
205
cbuf[off + i] = '+';
206         break;
207         
208       case 136: // unicode 710
209
cbuf[off + i] = '^';
210         break;
211         
212       case 137: // unicode 8240 (per-mille 0/00)
213
cbuf[off + i] = (char) 8240;
214         break;
215         
216       case 138: // unicode 352
217
cbuf[off + i] = 'S';
218         break;
219         
220       case 139: // unicode 8249
221
cbuf[off + i] = '<';
222         break;
223         
224       case 140: // unicode 338 (OE)
225
cbuf[off + i] = 'O';
226         break;
227         
228       case 145: // unicode 8216
229
case 146: // unicode 8217
230
cbuf[off + i] = '\'';
231         break;
232         
233       case 147: // unicode 8220
234
case 148: // unicode 8221
235
cbuf[off + i] = (char) '"';
236         break;
237         
238       case 149: // unicode 8226 (bullet)
239
cbuf[off + i] = (char) '*';
240         break;
241         
242       case 150: // unicode 8211
243
case 151: // unicode 8212
244
cbuf[off + i] = (char) '-';
245         break;
246         
247       case 152: // unicode 732
248
cbuf[off + i] = (char) '~';
249         break;
250         
251       case 153: // unicode 8482 (trademark)
252
cbuf[off + i] = (char) 8482;
253         break;
254         
255       case 154: // unicode 353
256
cbuf[off + i] = 's';
257         break;
258         
259       case 155: // unicode 8250
260
cbuf[off + i] = '>';
261         break;
262         
263       case 156: // unicode 339 (oe)
264
cbuf[off + i] = 'o';
265         break;
266         
267       case 376: // unicode 376 (Y with umlaut)
268
cbuf[off + i] = 'Y';
269         break;
270         
271       default:
272         cbuf[off + i] = (char) ch;
273       }
274     }
275
276     return i;
277   }
278 }
279
Popular Tags