KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > openedit > modules > edit > SpecialCharacter


1 /*
2 Copyright (c) 2003 eInnovation Inc. All rights reserved
3
4 This library is free software; you can redistribute it and/or modify it under the terms
5 of the GNU Lesser General Public License as published by the Free Software Foundation;
6 either version 2.1 of the License, or (at your option) any later version.
7
8 This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10 See the GNU Lesser General Public License for more details.
11 */

12
13 /*
14  * Created on Apr 22, 2003
15  *
16  * To change this generated comment go to
17  * Window>Preferences>Java>Code Generation>Code Template
18  */

19 package com.openedit.modules.edit;
20
21 import java.util.HashMap JavaDoc;
22 import java.util.Iterator JavaDoc;
23 import java.util.Map JavaDoc;
24
25
26 /**
27  * DOCUMENT ME!
28  *
29  * @author avery
30  */

31 public class SpecialCharacter
32 {
33     /**
34      * This function is a work around to deal with non UTF-8 encoding
35      * It will convert the most commonly found "strange" characters into
36      * standard HTML text
37      *
38      * @param s
39      *
40      * @return
41      */

42     public static String JavaDoc escapeSpecialCharacters(String JavaDoc s)
43     {
44         if ( s == null )
45         {
46             return null;
47         }
48         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
49         int n = s.length();
50
51         for (int i = 0; i < n; i++)
52         {
53             char c = s.charAt(i);
54
55             if (c > 127)
56             {
57                 switch (c)
58                 {
59                     // case '<': sb.append("&lt;"); break; //
60
// case '>': sb.append("&gt;"); break; // //
61
// case '&': sb.append("&amp;"); break; //
62
// case '"': sb.append("&quot;"); break; //
63
// case '\'': sb.append("&apos;"); break; //
64
// try alpha substitutions first
65
case '\u00E0':
66                         sb.append("&agrave;");
67
68                         break; // à
69

70                     case '\u00C0':
71                         sb.append("&Agrave;");
72
73                         break; // À
74

75                     case '\u00E2':
76                         sb.append("&acirc;");
77
78                         break; // â
79

80                     case '\u00C2':
81                         sb.append("&Acirc;");
82
83                         break; // Â
84

85                     case '\u00E4':
86                         sb.append("&auml;");
87
88                         break; // ä
89

90                     case '\u00C4':
91                         sb.append("&Auml;");
92
93                         break; // Ä
94

95                     case '\u00E5':
96                         sb.append("&aring;");
97
98                         break; // å
99

100                     case '\u00C5':
101                         sb.append("&Aring;");
102
103                         break; // Å
104

105                     case '\u00E6':
106                         sb.append("&aelig;");
107
108                         break; // æ
109

110                     case '\u00C6':
111                         sb.append("&AElig;");
112
113                         break; // Æ
114

115                     case '\u00E7':
116                         sb.append("&ccedil;");
117
118                         break; // ç
119

120                     case '\u00C7':
121                         sb.append("&Ccedil;");
122
123                         break; // Ç
124

125                     case '\u00E8':
126                         sb.append("&eacute;");
127
128                         break; // é
129

130                     case '\u00C8':
131                         sb.append("&Eacute;");
132
133                         break; // É
134

135                     case '\u00E9':
136                         sb.append("&egrave;");
137
138                         break; // è
139

140                     case '\u00C9':
141                         sb.append("&Egrave;");
142
143                         break; // È
144

145                     case '\u00EA':
146                         sb.append("&ecirc;");
147
148                         break; // ê
149

150                     case '\u00CA':
151                         sb.append("&Ecirc;");
152
153                         break; // Ê
154

155                     case '\u00EB':
156                         sb.append("&euml;");
157
158                         break; // ë
159

160                     case '\u00CB':
161                         sb.append("&Euml;");
162
163                         break; // Ë
164

165                     case '\u00EF':
166                         sb.append("&iuml;");
167
168                         break; // ï
169

170                     case '\u00CF':
171                         sb.append("&Iuml;");
172
173                         break; // Ï
174

175                     case '\u00F5':
176                         sb.append("&ocirc;");
177
178                         break; // ô
179

180                     case '\u00D5':
181                         sb.append("&Ocirc;");
182
183                         break; // Ô
184

185                     case '\u00F6':
186                         sb.append("&ouml;");
187
188                         break; // ö
189

190                     case '\u00D6':
191                         sb.append("&Ouml;");
192
193                         break; // Ö
194

195                     case '\u00F8':
196                         sb.append("&oslash;");
197
198                         break; // ø
199

200                     case '\u00D8':
201                         sb.append("&Oslash;");
202
203                         break; // Ø
204

205                     case '\u00DF':
206                         sb.append("&szlig;");
207
208                         break; // ß
209

210                     case '\u00F9':
211                         sb.append("&ugrave;");
212
213                         break; // ù
214

215                     case '\u00D9':
216                         sb.append("&Ugrave;");
217
218                         break; // Ù
219

220                     case '\u00FB':
221                         sb.append("&ucirc;");
222
223                         break; // û
224

225                     case '\u00DB':
226                         sb.append("&Ucirc;");
227
228                         break; // Û
229

230                     case '\u00FC':
231                         sb.append("&uuml;");
232
233                         break; // ü
234

235                     case '\u00DC':
236                         sb.append("&Uuml;");
237
238                         break; // Ü
239

240                     case '\u00AE':
241                         sb.append("&reg;");
242
243                         break; // ®
244

245                     case '\u00A9':
246                         sb.append("&copy;");
247
248                         break; // ©
249

250                     case '\u20AC':
251                         sb.append("&euro;");
252
253                         break; // euro
254

255                     case '\u2013':
256                         sb.append("&ndash;");
257
258                         break; // euro
259

260                     case '\u2014':
261                         sb.append("&mdash;");
262
263                         break; // euro
264

265                     case '\u2018':
266                         sb.append("&lsquo;");
267
268                         break; // euro
269

270                     case '\u2019':
271                         sb.append("&rsquo;");
272
273                         break; // euro
274

275                     case '\u201C':
276                         sb.append("&ldquo;");
277
278                         break; // euro
279

280                     case '\u201D':
281                         sb.append("&rdquo;");
282
283                         break; // euro
284

285                     default:
286                         sb.append("&#" + Integer.toString(c) + ";");
287
288                         break; //
289
}
290             }
291
292             // end if
293
else
294             {
295                 if ( c == '&' )
296                 {
297                     sb.append( "&amp;" );
298                 }
299                 else
300                 {
301                     sb.append(c);
302                 }
303             }
304         }
305
306         return sb.toString();
307     }
308
309     /**
310      * DOCUMENT ME!
311      *
312      * @param inString
313      *
314      * @return
315      */

316     public static String JavaDoc toUnicode(String JavaDoc inString)
317     {
318         Map JavaDoc matches = new HashMap JavaDoc();
319
320         // how big should this number be?
321
for (int i = 0; i < 10000; i++)
322         {
323             String JavaDoc symbol = "&#" + Integer.toString(i) + ";";
324
325             if (inString.indexOf(symbol) > -1)
326             {
327                 matches.put(symbol, Integer.toHexString(i));
328             }
329         }
330
331         addSpecialMatch(inString, matches, "&apos;", "'");
332         addSpecialMatch(inString, matches, "&lt;", "<");
333         addSpecialMatch(inString, matches, "&gt;", ">");
334         addSpecialMatch(inString, matches, "&quote;", "\"");
335         addSpecialMatch(inString, matches, "&amp;", "&");
336         addSpecialMatch(inString, matches, "&agrave;", "\u00E0"); // à
337
addSpecialMatch(inString, matches, "&Agrave;", "\u00C0"); // À
338
addSpecialMatch(inString, matches, "&acirc;", "\u00E2"); // â
339
addSpecialMatch(inString, matches, "&Acirc;", "\u00C2"); // Â
340
addSpecialMatch(inString, matches, "&auml;", "\u00E4"); // ä
341
addSpecialMatch(inString, matches, "&Auml;", "\u00C4"); // Ä
342
addSpecialMatch(inString, matches, "&aring;", "\u00E5"); // å
343
addSpecialMatch(inString, matches, "&Aring;", "\u00C5"); // Å
344
addSpecialMatch(inString, matches, "&aelig;", "\u00E6"); // æ
345
addSpecialMatch(inString, matches, "&AElig;", "\u00C6"); // Æ
346
addSpecialMatch(inString, matches, "&ccedil;", "\u00E7"); // ç
347
addSpecialMatch(inString, matches, "&Ccedil;", "\u00C7"); // Ç
348
addSpecialMatch(inString, matches, "&eacute;", "\u00E8"); // é
349
addSpecialMatch(inString, matches, "&Eacute;", "\u00C8"); // É
350
addSpecialMatch(inString, matches, "&egrave;", "\u00E9"); // è
351
addSpecialMatch(inString, matches, "&Egrave;", "\u00C9"); // È
352
addSpecialMatch(inString, matches, "&ecirc;", "\u00EA"); // ê
353
addSpecialMatch(inString, matches, "&Ecirc;", "\u00CA"); // Ê
354
addSpecialMatch(inString, matches, "&euml;", "\u00EB"); // ë
355
addSpecialMatch(inString, matches, "&Euml;", "\u00CB"); // Ë
356
addSpecialMatch(inString, matches, "&iuml;", "\u00EF"); // ï
357
addSpecialMatch(inString, matches, "&Iuml;", "\u00CF"); // Ï
358
addSpecialMatch(inString, matches, "&ocirc;", "\u00F5"); // ô
359
addSpecialMatch(inString, matches, "&Ocirc;", "\u00D5"); // Ô
360
addSpecialMatch(inString, matches, "&ouml;", "\u00F6"); // ö
361
addSpecialMatch(inString, matches, "&Ouml;", "\u00D6"); // Ö
362
addSpecialMatch(inString, matches, "&oslash;", "\u00F8"); // ø
363
addSpecialMatch(inString, matches, "&Oslash;", "\u00D8"); // Ø
364
addSpecialMatch(inString, matches, "&szlig;", "\u00DF"); // ß
365
addSpecialMatch(inString, matches, "&ugrave;", "\u00F9"); // ù
366
addSpecialMatch(inString, matches, "&Ugrave;", "\u00D9"); // Ù
367
addSpecialMatch(inString, matches, "&ucirc;", "\u00FB"); // û
368
addSpecialMatch(inString, matches, "&Ucirc;", "\u00DB"); // Û
369
addSpecialMatch(inString, matches, "&uuml;", "\u00FC"); // ü
370
addSpecialMatch(inString, matches, "&Uuml;", "\u00DC"); // Ü
371
addSpecialMatch(inString, matches, "&reg;", "\u00AE"); // ®
372
addSpecialMatch(inString, matches, "&copy;", "\u00A9"); // ©
373
addSpecialMatch(inString, matches, "&euro;", "\u20AC"); // euro
374

375         for (Iterator JavaDoc iter = matches.keySet().iterator(); iter.hasNext();)
376         {
377             String JavaDoc symbol = (String JavaDoc) iter.next();
378             inString = inString.replaceAll(symbol, (String JavaDoc) matches.get(symbol));
379         }
380
381         return inString;
382     }
383
384     private static void addSpecialMatch(
385         String JavaDoc inString, Map JavaDoc matches, String JavaDoc inSymbol, String JavaDoc inUnicode)
386     {
387         if (inString.indexOf(inSymbol) > -1)
388         {
389             matches.put(inSymbol, inUnicode);
390         }
391     }
392 }
393
Popular Tags