KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > enterprise > diagnostics > report > html > Escape


1 /*
2  * The contents of this file are subject to the terms
3  * of the Common Development and Distribution License
4  * (the License). You may not use this file except in
5  * compliance with the License.
6  *
7  * You can obtain a copy of the license at
8  * https://glassfish.dev.java.net/public/CDDLv1.0.html or
9  * glassfish/bootstrap/legal/CDDLv1.0.txt.
10  * See the License for the specific language governing
11  * permissions and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL
14  * Header Notice in each file and include the License file
15  * at glassfish/bootstrap/legal/CDDLv1.0.txt.
16  * If applicable, add the following below the CDDL Header,
17  * with the fields enclosed by brackets [] replaced by
18  * you own identifying information:
19  * "Portions Copyrighted [year] [name of copyright owner]"
20  *
21  * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
22  */

23 package com.sun.enterprise.diagnostics.report.html;
24
25 import java.io.ByteArrayOutputStream JavaDoc;
26 import java.io.IOException JavaDoc;
27 import java.util.HashMap JavaDoc;
28 import java.util.Map JavaDoc;
29
30 /**
31  * Implement HTML escapes. Additional escapes can be added.
32  * <p>
33  * This class is a singleton. If you subclass and override the
34  * <code>escape</code> methods, use <code>setInstance</code> to
35  * install your handler.
36  */

37 public class Escape {
38     
39     /** A value to signal an undefined entity. */
40     public static final int UNDEFINED = -1;
41     
42     /** The instance to use. */
43     private static Escape instance = null;
44     
45     /**
46      * If true, use hexadecimal character references. If false,
47      * use decimal character references.
48      */

49     private boolean useHex = false;
50     
51     /**
52      * These are the entities which are always replaced on output. Add
53      * entities which should always be recognized on input and always
54      * replaced on output here.
55      */

56     private final Map JavaDoc<Character JavaDoc,String JavaDoc> alwaysReplace =
57         new HashMap JavaDoc<Character JavaDoc,String JavaDoc>();
58     {
59         alwaysReplace.put(new Character JavaDoc('&'), "amp");
60         alwaysReplace.put(new Character JavaDoc('<'), "lt");
61         alwaysReplace.put(new Character JavaDoc('>'), "gt");
62         alwaysReplace.put(new Character JavaDoc('"'), "quot");
63         alwaysReplace.put(new Character JavaDoc('\''), "#039");
64         alwaysReplace.put(new Character JavaDoc('\u00A0'), "nbsp");
65     }
66     
67     /**
68      * This holds all entities. The map is generated by reversing
69      * the {@link #setEntity(String, char)} method.
70      */

71     private final Map JavaDoc<String JavaDoc,Character JavaDoc> entityToChar =
72         new HashMap JavaDoc<String JavaDoc,Character JavaDoc>();
73     
74     /**
75      * This holds all entities. Add entities which should be recognized
76      * on input but not (necessarily) generated on output here.
77      * <p>
78      * This set was automatically generated from the HTML 4.01 character
79      * entity specification. You can find it online at:
80      * <a HREF="http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html"
81      * >http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html</a>.
82      * <p>
83      * Note that this method is initialized using the
84      * {@link #setEntity(String, char)} method.
85      */

86     private final Map JavaDoc<Character JavaDoc,String JavaDoc> charToEntity = new HashMap JavaDoc<Character JavaDoc,String JavaDoc>();
87     {
88         /*
89          * The following are the basic escapes in the ISO 8859-1
90          * range.
91          */

92         /** no-break space = non-breaking space, U+00A0 ISOnum */
93         setEntity("nbsp", (char) 160);
94         /** inverted exclamation mark, U+00A1 ISOnum */
95         setEntity("iexcl", (char) 161);
96         /** cent sign, U+00A2 ISOnum */
97         setEntity("cent", (char) 162);
98         /** pound sign, U+00A3 ISOnum */
99         setEntity("pound", (char) 163);
100         /** currency sign, U+00A4 ISOnum */
101         setEntity("curren", (char) 164);
102         /** yen sign = yuan sign, U+00A5 ISOnum */
103         setEntity("yen", (char) 165);
104         /** broken bar = broken vertical bar, U+00A6 ISOnum */
105         setEntity("brvbar", (char) 166);
106         /** section sign, U+00A7 ISOnum */
107         setEntity("sect", (char) 167);
108         /** diaeresis = spacing diaeresis, U+00A8 ISOdia */
109         setEntity("uml", (char) 168);
110         /** copyright sign, U+00A9 ISOnum */
111         setEntity("copy", (char) 169);
112         /** feminine ordinal indicator, U+00AA ISOnum */
113         setEntity("ordf", (char) 170);
114         /** left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum */
115         setEntity("laquo", (char) 171);
116         /** not sign, U+00AC ISOnum */
117         setEntity("not", (char) 172);
118         /** soft hyphen = discretionary hyphen, U+00AD ISOnum */
119         setEntity("shy", (char) 173);
120         /** registered sign = registered trade mark sign, U+00AE ISOnum */
121         setEntity("reg", (char) 174);
122         /** macron = spacing macron = overline = APL overbar, U+00AF ISOdia */
123         setEntity("macr", (char) 175);
124         /** degree sign, U+00B0 ISOnum */
125         setEntity("deg", (char) 176);
126         /** plus-minus sign = plus-or-minus sign, U+00B1 ISOnum */
127         setEntity("plusmn", (char) 177);
128         /** superscript two = superscript digit two = squared, U+00B2 ISOnum */
129         setEntity("sup2", (char) 178);
130         /** superscript three = superscript digit three = cubed, U+00B3 ISOnum */
131         setEntity("sup3", (char) 179);
132         /** acute accent = spacing acute, U+00B4 ISOdia */
133         setEntity("acute", (char) 180);
134         /** micro sign, U+00B5 ISOnum */
135         setEntity("micro", (char) 181);
136         /** pilcrow sign = paragraph sign, U+00B6 ISOnum */
137         setEntity("para", (char) 182);
138         /** middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum */
139         setEntity("middot", (char) 183);
140         /** cedilla = spacing cedilla, U+00B8 ISOdia */
141         setEntity("cedil", (char) 184);
142         /** superscript one = superscript digit one, U+00B9 ISOnum */
143         setEntity("sup1", (char) 185);
144         /** masculine ordinal indicator, U+00BA ISOnum */
145         setEntity("ordm", (char) 186);
146         /** right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum */
147         setEntity("raquo", (char) 187);
148         /** vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum */
149         setEntity("frac14", (char) 188);
150         /** vulgar fraction one half = fraction one half, U+00BD ISOnum */
151         setEntity("frac12", (char) 189);
152         /** vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum */
153         setEntity("frac34", (char) 190);
154         /** inverted question mark = turned question mark, U+00BF ISOnum */
155         setEntity("iquest", (char) 191);
156         /** latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 */
157         setEntity("Agrave", (char) 192);
158         /** latin capital letter A with acute, U+00C1 ISOlat1 */
159         setEntity("Aacute", (char) 193);
160         /** latin capital letter A with circumflex, U+00C2 ISOlat1 */
161         setEntity("Acirc", (char) 194);
162         /** latin capital letter A with tilde, U+00C3 ISOlat1 */
163         setEntity("Atilde", (char) 195);
164         /** latin capital letter A with diaeresis, U+00C4 ISOlat1 */
165         setEntity("Auml", (char) 196);
166         /** latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 */
167         setEntity("Aring", (char) 197);
168         /** latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 */
169         setEntity("AElig", (char) 198);
170         /** latin capital letter C with cedilla, U+00C7 ISOlat1 */
171         setEntity("Ccedil", (char) 199);
172         /** latin capital letter E with grave, U+00C8 ISOlat1 */
173         setEntity("Egrave", (char) 200);
174         /** latin capital letter E with acute, U+00C9 ISOlat1 */
175         setEntity("Eacute", (char) 201);
176         /** latin capital letter E with circumflex, U+00CA ISOlat1 */
177         setEntity("Ecirc", (char) 202);
178         /** latin capital letter E with diaeresis, U+00CB ISOlat1 */
179         setEntity("Euml", (char) 203);
180         /** latin capital letter I with grave, U+00CC ISOlat1 */
181         setEntity("Igrave", (char) 204);
182         /** latin capital letter I with acute, U+00CD ISOlat1 */
183         setEntity("Iacute", (char) 205);
184         /** latin capital letter I with circumflex, U+00CE ISOlat1 */
185         setEntity("Icirc", (char) 206);
186         /** latin capital letter I with diaeresis, U+00CF ISOlat1 */
187         setEntity("Iuml", (char) 207);
188         /** latin capital letter ETH, U+00D0 ISOlat1 */
189         setEntity("ETH", (char) 208);
190         /** latin capital letter N with tilde, U+00D1 ISOlat1 */
191         setEntity("Ntilde", (char) 209);
192         /** latin capital letter O with grave, U+00D2 ISOlat1 */
193         setEntity("Ograve", (char) 210);
194         /** latin capital letter O with acute, U+00D3 ISOlat1 */
195         setEntity("Oacute", (char) 211);
196         /** latin capital letter O with circumflex, U+00D4 ISOlat1 */
197         setEntity("Ocirc", (char) 212);
198         /** latin capital letter O with tilde, U+00D5 ISOlat1 */
199         setEntity("Otilde", (char) 213);
200         /** latin capital letter O with diaeresis, U+00D6 ISOlat1 */
201         setEntity("Ouml", (char) 214);
202         /** multiplication sign, U+00D7 ISOnum */
203         setEntity("times", (char) 215);
204         /** latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 */
205         setEntity("Oslash", (char) 216);
206         /** latin capital letter U with grave, U+00D9 ISOlat1 */
207         setEntity("Ugrave", (char) 217);
208         /** latin capital letter U with acute, U+00DA ISOlat1 */
209         setEntity("Uacute", (char) 218);
210         /** latin capital letter U with circumflex, U+00DB ISOlat1 */
211         setEntity("Ucirc", (char) 219);
212         /** latin capital letter U with diaeresis, U+00DC ISOlat1 */
213         setEntity("Uuml", (char) 220);
214         /** latin capital letter Y with acute, U+00DD ISOlat1 */
215         setEntity("Yacute", (char) 221);
216         /** latin capital letter THORN, U+00DE ISOlat1 */
217         setEntity("THORN", (char) 222);
218         /** latin small letter sharp s = ess-zed, U+00DF ISOlat1 */
219         setEntity("szlig", (char) 223);
220         /** latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 */
221         setEntity("agrave", (char) 224);
222         /** latin small letter a with acute, U+00E1 ISOlat1 */
223         setEntity("aacute", (char) 225);
224         /** latin small letter a with circumflex, U+00E2 ISOlat1 */
225         setEntity("acirc", (char) 226);
226         /** latin small letter a with tilde, U+00E3 ISOlat1 */
227         setEntity("atilde", (char) 227);
228         /** latin small letter a with diaeresis, U+00E4 ISOlat1 */
229         setEntity("auml", (char) 228);
230         /** latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 */
231         setEntity("aring", (char) 229);
232         /** latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 */
233         setEntity("aelig", (char) 230);
234         /** latin small letter c with cedilla, U+00E7 ISOlat1 */
235         setEntity("ccedil", (char) 231);
236         /** latin small letter e with grave, U+00E8 ISOlat1 */
237         setEntity("egrave", (char) 232);
238         /** latin small letter e with acute, U+00E9 ISOlat1 */
239         setEntity("eacute", (char) 233);
240         /** latin small letter e with circumflex, U+00EA ISOlat1 */
241         setEntity("ecirc", (char) 234);
242         /** latin small letter e with diaeresis, U+00EB ISOlat1 */
243         setEntity("euml", (char) 235);
244         /** latin small letter i with grave, U+00EC ISOlat1 */
245         setEntity("igrave", (char) 236);
246         /** latin small letter i with acute, U+00ED ISOlat1 */
247         setEntity("iacute", (char) 237);
248         /** latin small letter i with circumflex, U+00EE ISOlat1 */
249         setEntity("icirc", (char) 238);
250         /** latin small letter i with diaeresis, U+00EF ISOlat1 */
251         setEntity("iuml", (char) 239);
252         /** latin small letter eth, U+00F0 ISOlat1 */
253         setEntity("eth", (char) 240);
254         /** latin small letter n with tilde, U+00F1 ISOlat1 */
255         setEntity("ntilde", (char) 241);
256         /** latin small letter o with grave, U+00F2 ISOlat1 */
257         setEntity("ograve", (char) 242);
258         /** latin small letter o with acute, U+00F3 ISOlat1 */
259         setEntity("oacute", (char) 243);
260         /** latin small letter o with circumflex, U+00F4 ISOlat1 */
261         setEntity("ocirc", (char) 244);
262         /** latin small letter o with tilde, U+00F5 ISOlat1 */
263         setEntity("otilde", (char) 245);
264         /** latin small letter o with diaeresis, U+00F6 ISOlat1 */
265         setEntity("ouml", (char) 246);
266         /** division sign, U+00F7 ISOnum */
267         setEntity("divide", (char) 247);
268         /** latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1 */
269         setEntity("oslash", (char) 248);
270         /** latin small letter u with grave, U+00F9 ISOlat1 */
271         setEntity("ugrave", (char) 249);
272         /** latin small letter u with acute, U+00FA ISOlat1 */
273         setEntity("uacute", (char) 250);
274         /** latin small letter u with circumflex, U+00FB ISOlat1 */
275         setEntity("ucirc", (char) 251);
276         /** latin small letter u with diaeresis, U+00FC ISOlat1 */
277         setEntity("uuml", (char) 252);
278         /** latin small letter y with acute, U+00FD ISOlat1 */
279         setEntity("yacute", (char) 253);
280         /** latin small letter thorn, U+00FE ISOlat1 */
281         setEntity("thorn", (char) 254);
282         /** latin small letter y with diaeresis, U+00FF ISOlat1 */
283         setEntity("yuml", (char) 255);
284
285         /*
286          * The following are the greek and mathematical symbols.
287          */

288         /** latin small f with hook = function = florin, U+0192 ISOtech */
289         setEntity("fnof", (char) 402);
290         /** greek capital letter alpha, U+0391 */
291         setEntity("Alpha", (char) 913);
292         /** greek capital letter beta, U+0392 */
293         setEntity("Beta", (char) 914);
294         /** greek capital letter gamma, U+0393 ISOgrk3 */
295         setEntity("Gamma", (char) 915);
296         /** greek capital letter delta, U+0394 ISOgrk3 */
297         setEntity("Delta", (char) 916);
298         /** greek capital letter epsilon, U+0395 */
299         setEntity("Epsilon", (char) 917);
300         /** greek capital letter zeta, U+0396 */
301         setEntity("Zeta", (char) 918);
302         /** greek capital letter eta, U+0397 */
303         setEntity("Eta", (char) 919);
304         /** greek capital letter theta, U+0398 ISOgrk3 */
305         setEntity("Theta", (char) 920);
306         /** greek capital letter iota, U+0399 */
307         setEntity("Iota", (char) 921);
308         /** greek capital letter kappa, U+039A */
309         setEntity("Kappa", (char) 922);
310         /** greek capital letter lambda, U+039B ISOgrk3 */
311         setEntity("Lambda", (char) 923);
312         /** greek capital letter mu, U+039C */
313         setEntity("Mu", (char) 924);
314         /** greek capital letter nu, U+039D */
315         setEntity("Nu", (char) 925);
316         /** greek capital letter xi, U+039E ISOgrk3 */
317         setEntity("Xi", (char) 926);
318         /** greek capital letter omicron, U+039F */
319         setEntity("Omicron", (char) 927);
320         /** greek capital letter pi, U+03A0 ISOgrk3 */
321         setEntity("Pi", (char) 928);
322         /** greek capital letter rho, U+03A1 */
323         setEntity("Rho", (char) 929);
324         /** greek capital letter sigma, U+03A3 ISOgrk3 */
325         setEntity("Sigma", (char) 931);
326         /** greek capital letter tau, U+03A4 */
327         setEntity("Tau", (char) 932);
328         /** greek capital letter upsilon, U+03A5 ISOgrk3 */
329         setEntity("Upsilon", (char) 933);
330         /** greek capital letter phi, U+03A6 ISOgrk3 */
331         setEntity("Phi", (char) 934);
332         /** greek capital letter chi, U+03A7 */
333         setEntity("Chi", (char) 935);
334         /** greek capital letter psi, U+03A8 ISOgrk3 */
335         setEntity("Psi", (char) 936);
336         /** greek capital letter omega, U+03A9 ISOgrk3 */
337         setEntity("Omega", (char) 937);
338         /** greek small letter alpha, U+03B1 ISOgrk3 */
339         setEntity("alpha", (char) 945);
340         /** greek small letter beta, U+03B2 ISOgrk3 */
341         setEntity("beta", (char) 946);
342         /** greek small letter gamma, U+03B3 ISOgrk3 */
343         setEntity("gamma", (char) 947);
344         /** greek small letter delta, U+03B4 ISOgrk3 */
345         setEntity("delta", (char) 948);
346         /** greek small letter epsilon, U+03B5 ISOgrk3 */
347         setEntity("epsilon", (char) 949);
348         /** greek small letter zeta, U+03B6 ISOgrk3 */
349         setEntity("zeta", (char) 950);
350         /** greek small letter eta, U+03B7 ISOgrk3 */
351         setEntity("eta", (char) 951);
352         /** greek small letter theta, U+03B8 ISOgrk3 */
353         setEntity("theta", (char) 952);
354         /** greek small letter iota, U+03B9 ISOgrk3 */
355         setEntity("iota", (char) 953);
356         /** greek small letter kappa, U+03BA ISOgrk3 */
357         setEntity("kappa", (char) 954);
358         /** greek small letter lambda, U+03BB ISOgrk3 */
359         setEntity("lambda", (char) 955);
360         /** greek small letter mu, U+03BC ISOgrk3 */
361         setEntity("mu", (char) 956);
362         /** greek small letter nu, U+03BD ISOgrk3 */
363         setEntity("nu", (char) 957);
364         /** greek small letter xi, U+03BE ISOgrk3 */
365         setEntity("xi", (char) 958);
366         /** greek small letter omicron, U+03BF NEW */
367         setEntity("omicron", (char) 959);
368         /** greek small letter pi, U+03C0 ISOgrk3 */
369         setEntity("pi", (char) 960);
370         /** greek small letter rho, U+03C1 ISOgrk3 */
371         setEntity("rho", (char) 961);
372         /** greek small letter final sigma, U+03C2 ISOgrk3 */
373         setEntity("sigmaf", (char) 962);
374         /** greek small letter sigma, U+03C3 ISOgrk3 */
375         setEntity("sigma", (char) 963);
376         /** greek small letter tau, U+03C4 ISOgrk3 */
377         setEntity("tau", (char) 964);
378         /** greek small letter upsilon, U+03C5 ISOgrk3 */
379         setEntity("upsilon", (char) 965);
380         /** greek small letter phi, U+03C6 ISOgrk3 */
381         setEntity("phi", (char) 966);
382         /** greek small letter chi, U+03C7 ISOgrk3 */
383         setEntity("chi", (char) 967);
384         /** greek small letter psi, U+03C8 ISOgrk3 */
385         setEntity("psi", (char) 968);
386         /** greek small letter omega, U+03C9 ISOgrk3 */
387         setEntity("omega", (char) 969);
388         /** greek small letter theta symbol, U+03D1 NEW */
389         setEntity("thetasym", (char) 977);
390         /** greek upsilon with hook symbol, U+03D2 NEW */
391         setEntity("upsih", (char) 978);
392         /** greek pi symbol, U+03D6 ISOgrk3 */
393         setEntity("piv", (char) 982);
394         /** bullet = black small circle, U+2022 ISOpub */
395         setEntity("bull", (char) 8226);
396         /** horizontal ellipsis = three dot leader, U+2026 ISOpub */
397         setEntity("hellip", (char) 8230);
398         /** prime = minutes = feet, U+2032 ISOtech */
399         setEntity("prime", (char) 8242);
400         /** double prime = seconds = inches, U+2033 ISOtech */
401         setEntity("Prime", (char) 8243);
402         /** overline = spacing overscore, U+203E NEW */
403         setEntity("oline", (char) 8254);
404         /** fraction slash, U+2044 NEW */
405         setEntity("frasl", (char) 8260);
406         /** script capital P = power set = Weierstrass p, U+2118 ISOamso */
407         setEntity("weierp", (char) 8472);
408         /** blackletter capital I = imaginary part, U+2111 ISOamso */
409         setEntity("image", (char) 8465);
410         /** blackletter capital R = real part symbol, U+211C ISOamso */
411         setEntity("real", (char) 8476);
412         /** trade mark sign, U+2122 ISOnum */
413         setEntity("trade", (char) 8482);
414         /** alef symbol = first transfinite cardinal, U+2135 NEW */
415         setEntity("alefsym", (char) 8501);
416         /** leftwards arrow, U+2190 ISOnum */
417         setEntity("larr", (char) 8592);
418         /** upwards arrow, U+2191 ISOnum*/
419         setEntity("uarr", (char) 8593);
420         /** rightwards arrow, U+2192 ISOnum */
421         setEntity("rarr", (char) 8594);
422         /** downwards arrow, U+2193 ISOnum */
423         setEntity("darr", (char) 8595);
424         /** left right arrow, U+2194 ISOamsa */
425         setEntity("harr", (char) 8596);
426         /** downwards arrow with corner leftwards = carriage return, U+21B5 NEW */
427         setEntity("crarr", (char) 8629);
428         /** leftwards double arrow, U+21D0 ISOtech */
429         setEntity("lArr", (char) 8656);
430         /** upwards double arrow, U+21D1 ISOamsa */
431         setEntity("uArr", (char) 8657);
432         /** rightwards double arrow, U+21D2 ISOtech */
433         setEntity("rArr", (char) 8658);
434         /** downwards double arrow, U+21D3 ISOamsa */
435         setEntity("dArr", (char) 8659);
436         /** left right double arrow, U+21D4 ISOamsa */
437         setEntity("hArr", (char) 8660);
438         /** for all, U+2200 ISOtech */
439         setEntity("forall", (char) 8704);
440         /** partial differential, U+2202 ISOtech */
441         setEntity("part", (char) 8706);
442         /** there exists, U+2203 ISOtech */
443         setEntity("exist", (char) 8707);
444         /** empty set = null set = diameter, U+2205 ISOamso */
445         setEntity("empty", (char) 8709);
446         /** nabla = backward difference, U+2207 ISOtech */
447         setEntity("nabla", (char) 8711);
448         /** element of, U+2208 ISOtech */
449         setEntity("isin", (char) 8712);
450         /** not an element of, U+2209 ISOtech */
451         setEntity("notin", (char) 8713);
452         /** contains as member, U+220B ISOtech */
453         setEntity("ni", (char) 8715);
454         /** n-ary product = product sign, U+220F ISOamsb */
455         setEntity("prod", (char) 8719);
456         /** n-ary sumation, U+2211 ISOamsb */
457         setEntity("sum", (char) 8721);
458         /** minus sign, U+2212 ISOtech */
459         setEntity("minus", (char) 8722);
460         /** asterisk operator, U+2217 ISOtech */
461         setEntity("lowast", (char) 8727);
462         /** square root = radical sign, U+221A ISOtech */
463         setEntity("radic", (char) 8730);
464         /** proportional to, U+221D ISOtech */
465         setEntity("prop", (char) 8733);
466         /** infinity, U+221E ISOtech */
467         setEntity("infin", (char) 8734);
468         /** angle, U+2220 ISOamso */
469         setEntity("ang", (char) 8736);
470         /** logical and = wedge, U+2227 ISOtech */
471         setEntity("and", (char) 8743);
472         /** logical or = vee, U+2228 ISOtech */
473         setEntity("or", (char) 8744);
474         /** intersection = cap, U+2229 ISOtech */
475         setEntity("cap", (char) 8745);
476         /** union = cup, U+222A ISOtech */
477         setEntity("cup", (char) 8746);
478         /** integral, U+222B ISOtech */
479         setEntity("int", (char) 8747);
480         /** therefore, U+2234 ISOtech */
481         setEntity("there4", (char) 8756);
482         /** tilde operator = varies with = similar to, U+223C ISOtech */
483         setEntity("sim", (char) 8764);
484         /** approximately equal to, U+2245 ISOtech */
485         setEntity("cong", (char) 8773);
486         /** almost equal to = asymptotic to, U+2248 ISOamsr */
487         setEntity("asymp", (char) 8776);
488         /** not equal to, U+2260 ISOtech */
489         setEntity("ne", (char) 8800);
490         /** identical to, U+2261 ISOtech */
491         setEntity("equiv", (char) 8801);
492         /** less-than or equal to, U+2264 ISOtech */
493         setEntity("le", (char) 8804);
494         /** greater-than or equal to, U+2265 ISOtech */
495         setEntity("ge", (char) 8805);
496         /** subset of, U+2282 ISOtech */
497         setEntity("sub", (char) 8834);
498         /** superset of, U+2283 ISOtech */
499         setEntity("sup", (char) 8835);
500         /** not a subset of, U+2284 ISOamsn */
501         setEntity("nsub", (char) 8836);
502         /** subset of or equal to, U+2286 ISOtech */
503         setEntity("sube", (char) 8838);
504         /** superset of or equal to, U+2287 ISOtech */
505         setEntity("supe", (char) 8839);
506         /** circled plus = direct sum, U+2295 ISOamsb */
507         setEntity("oplus", (char) 8853);
508         /** circled times = vector product, U+2297 ISOamsb */
509         setEntity("otimes", (char) 8855);
510         /** up tack = orthogonal to = perpendicular, U+22A5 ISOtech */
511         setEntity("perp", (char) 8869);
512         /** dot operator, U+22C5 ISOamsb */
513         setEntity("sdot", (char) 8901);
514         /** left ceiling = apl upstile, U+2308 ISOamsc */
515         setEntity("lceil", (char) 8968);
516         /** right ceiling, U+2309 ISOamsc */
517         setEntity("rceil", (char) 8969);
518         /** left floor = apl downstile, U+230A ISOamsc */
519         setEntity("lfloor", (char) 8970);
520         /** right floor, U+230B ISOamsc */
521         setEntity("rfloor", (char) 8971);
522         /** left-pointing angle bracket = bra, U+2329 ISOtech */
523         setEntity("lang", (char) 9001);
524         /** right-pointing angle bracket = ket, U+232A ISOtech */
525         setEntity("rang", (char) 9002);
526         /** lozenge, U+25CA ISOpub */
527         setEntity("loz", (char) 9674);
528         /** black spade suit, U+2660 ISOpub */
529         setEntity("spades", (char) 9824);
530         /** black club suit = shamrock, U+2663 ISOpub */
531         setEntity("clubs", (char) 9827);
532         /** black heart suit = valentine, U+2665 ISOpub */
533         setEntity("hearts", (char) 9829);
534         /** black diamond suit, U+2666 ISOpub */
535         setEntity("diams", (char) 9830);
536
537         /*
538          * The following are other special symbols included in the
539          * list of HTML character entities for 4.01.
540          */

541         /** quotation mark = APL quote, U+0022 ISOnum */
542         setEntity("quot", (char) 34);
543         /** ampersand, U+0026 ISOnum */
544         setEntity("amp", (char) 38);
545         /** less-than sign, U+003C ISOnum */
546         setEntity("lt", (char) 60);
547         /** greater-than sign, U+003E ISOnum */
548         setEntity("gt", (char) 62);
549         /** latin capital ligature OE, U+0152 ISOlat2 */
550         setEntity("OElig", (char) 338);
551         /** latin small ligature oe, U+0153 ISOlat2 */
552         setEntity("oelig", (char) 339);
553         /** latin capital letter S with caron, U+0160 ISOlat2 */
554         setEntity("Scaron", (char) 352);
555         /** latin small letter s with caron, U+0161 ISOlat2 */
556         setEntity("scaron", (char) 353);
557         /** latin capital letter Y with diaeresis, U+0178 ISOlat2 */
558         setEntity("Yuml", (char) 376);
559         /** modifier letter circumflex accent, U+02C6 ISOpub */
560         setEntity("circ", (char) 710);
561         /** small tilde, U+02DC ISOdia */
562         setEntity("tilde", (char) 732);
563         /** en space, U+2002 ISOpub */
564         setEntity("ensp", (char) 8194);
565         /** em space, U+2003 ISOpub */
566         setEntity("emsp", (char) 8195);
567         /** thin space, U+2009 ISOpub */
568         setEntity("thinsp", (char) 8201);
569         /** zero width non-joiner, U+200C NEW RFC 2070 */
570         setEntity("zwnj", (char) 8204);
571         /** zero width joiner, U+200D NEW RFC 2070 */
572         setEntity("zwj", (char) 8205);
573         /** left-to-right mark, U+200E NEW RFC 2070 */
574         setEntity("lrm", (char) 8206);
575         /** right-to-left mark, U+200F NEW RFC 2070 */
576         setEntity("rlm", (char) 8207);
577         /** en dash, U+2013 ISOpub */
578         setEntity("ndash", (char) 8211);
579         /** em dash, U+2014 ISOpub */
580         setEntity("mdash", (char) 8212);
581         /** left single quotation mark, U+2018 ISOnum */
582         setEntity("lsquo", (char) 8216);
583         /** right single quotation mark, U+2019 ISOnum */
584         setEntity("rsquo", (char) 8217);
585         /** single low-9 quotation mark, U+201A NEW */
586         setEntity("sbquo", (char) 8218);
587         /** left double quotation mark, U+201C ISOnum */
588         setEntity("ldquo", (char) 8220);
589         /** right double quotation mark, U+201D ISOnum */
590         setEntity("rdquo", (char) 8221);
591         /** double low-9 quotation mark, U+201E NEW */
592         setEntity("bdquo", (char) 8222);
593         /** dagger, U+2020 ISOpub */
594         setEntity("dagger", (char) 8224);
595         /** double dagger, U+2021 ISOpub */
596         setEntity("Dagger", (char) 8225);
597         /** per mille sign, U+2030 ISOtech */
598         setEntity("permil", (char) 8240);
599         /** single left-pointing angle quotation mark, U+2039 ISO proposed */
600         setEntity("lsaquo", (char) 8249);
601         /** single right-pointing angle quotation mark, U+203A ISO proposed */
602         setEntity("rsaquo", (char) 8250);
603         /** euro sign, U+20AC NEW */
604         setEntity("euro", (char) 8364);
605         charToEntity.put(new Character JavaDoc(';'), "semi");
606         charToEntity.put(new Character JavaDoc('\u00A0'), "nbsp");
607     }
608     
609     
610     /**
611      * This field holds the list of non-alphanumeric characters to
612      * preserve as-is in URLs.
613      */

614     private String JavaDoc preserve = "_-!.~#()*" + ",;:$&+=" + "?/[]@";
615     
616     
617     /**
618      * Make a new escape instance. This method is protected since only
619      * subclasses should use it. Do not create instances of this class
620      * directly; use {@link #getInstance()} to get the correct
621      * <code>Escape</code> instance to use.
622      */

623     protected Escape() {
624         super();
625     }
626     
627     
628     /**
629      * Get the escape instance to use to escape strings.
630      * @return The instance to use.
631      * @see #setInstance(Escape)
632      */

633     public static final Escape getInstance() {
634         if (instance == null) {
635             instance = new Escape();
636         }
637         return instance;
638     }
639     
640     
641     /**
642      * Set the instance to use to escape strings.
643      * @param escape The instance to use.
644      * @return The instance to use.
645      * @see #getInstance()
646      */

647     public static final Escape setInstance(Escape escape) {
648         if (escape == null) {
649             throw new NullPointerException JavaDoc("Escape instance is null.");
650         }
651         instance = escape;
652         return instance;
653     }
654     
655     
656     /**
657      * Given a character, return the appropriate entity if there
658      * is an entity representation for this character. Otherwise
659      * return a numeric character reference.
660      * @param ch The character to encode.
661      * @return The encoded string.
662      */

663     public String JavaDoc encodeAsEntity(char ch) {
664         String JavaDoc replacement = charToEntity.get(new Character JavaDoc(ch));
665         if (replacement == null) {
666             String JavaDoc value = "" + (int) ch;
667             for (int i = value.length(); i < 3; i++) {
668                 value = "0" + value;
669             } // Pad with zeros to length three.
670
return "&#" + value + ";";
671         } else {
672             return "&" + replacement + ";";
673         }
674     }
675     
676     
677     /**
678      * Decode an entity or numeric character reference, and return the
679      * appropriate character. Entity names are case-sensitive.
680      * @param name An entity or numeric character reference. It can
681      * include the ampersand and semicolon, or not.
682      * @return Either the character referenced, or the input
683      * string.
684      */

685     public String JavaDoc decodeAsEntity(String JavaDoc name) {
686         if (name == null) {
687             throw new NullPointerException JavaDoc("Entity name is null.");
688         }
689         
690         // This should just be the entity name. If the entity is
691
// decorated, remove the decorations.
692
if (name.startsWith("&") && name.endsWith(";")) {
693             name = name.substring(1, name.length()-1);
694         }
695         
696         // See if this is a numeric character reference (ISO 10646).
697
// If the entity name starts with a hash mark, it is. The
698
// next character determines if this is hex or decimal. If
699
// the next character is an x, then this is hex.
700
// Section 5.3.1
701
if (name.startsWith("#")) {
702             try {
703                 name = name.substring(1);
704                 if (name.startsWith("X")) {
705                     name = name.substring(1);
706                     return "" + Integer.parseInt(name, 16);
707                 } else {
708                     return "" + Integer.parseInt(name);
709                 }
710             } catch (NumberFormatException JavaDoc nfe) {
711                 return "&" + name + ";";
712             }
713         }
714         
715         // Get the entity's value, if it is defined.
716
Character JavaDoc value = entityToChar.get(name);
717         if (value == null) {
718             return "&" + name + ";";
719         } else {
720             return "" + value.charValue();
721         }
722     }
723     
724     
725     /**
726      * Encode a string by replacing characters with entity references
727      * or numeric character references, if there is no named entity.
728      * <p>
729      * The characters which will always be replaced are:
730      * <ul>
731      * <li>&amp;amp; (&amp;)</li>
732      * <li>&amp;lt; (&lt;)</li>
733      * <li>&amp;gt; (&gt;)</li>
734      * <li>&amp;quot; (&quot;)</li>
735      * <li>&amp;#039; (&#039;)</li>
736      * <li>&amp;nbsp; (&nbsp;)</li>
737      * </ul>
738      * Additionally, anything outside of the ISO 8859-1 range will be
739      * encoded. From what I've read, this is a good idea.
740      * @param cdata The string to encode.
741      * @param characters Additional characters which should be encoded.
742      * @return The encoded string.
743      * @see #encodeAsEntity(char)
744      */

745     public String JavaDoc encodeEntities(String JavaDoc cdata, String JavaDoc characters) {
746         if (cdata == null) {
747             throw new NullPointerException JavaDoc("The character data to " +
748                     "encode is null.");
749         }
750         if (characters == null) {
751             throw new NullPointerException JavaDoc("The list of additional " +
752                     "characters to encode is null.");
753         }
754         
755         // Traverse the string. Just replace the characters indicated
756
// in the argument, and any additional characters which should
757
// always be encoded.
758
StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
759         for (char ch : cdata.toCharArray()) {
760             if (ch >= 128 ||
761                     alwaysReplace.containsKey(new Character JavaDoc(ch)) ||
762                     characters.indexOf(ch) >= 0) {
763                 buf.append(encodeAsEntity(ch));
764             } else {
765                 buf.append(ch);
766             }
767         } // Traverse the string.
768

769         // Done.
770
return buf.toString();
771     }
772     
773     
774     /**
775      * Decode all entity references in the provided string. This also
776      * decodes any numeric character references of the form &amp;#N;,
777      * where N is a decimal number, or &amp;#xN;, where N is a hex
778      * number.
779      * @param cdata The string to decode.
780      * @return The decoded string.
781      * @see #decodeAsEntity(String)
782      */

783     public String JavaDoc decodeEntities(String JavaDoc cdata) {
784         if (cdata == null) {
785             throw new NullPointerException JavaDoc("The character data to " +
786                     "decode is null.");
787         }
788         
789         // Traverse the string. Replace all entity and numeric character
790
// references with the actual characters.
791
StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
792         int i = 0;
793         while (cdata.length() > 0) {
794             // Find the next ampersand.
795
i = cdata.indexOf('&');
796             if (i < 0) {
797                 buf.append(cdata);
798                 cdata = "";
799                 continue;
800             }
801             
802             // Extract the prefix.
803
buf.append(cdata.substring(0,i));
804             cdata = cdata.substring(i);
805             
806             // Find the ending semicolon.
807
i = cdata.indexOf(';');
808             if (i < 0) {
809                 buf.append(cdata);
810                 cdata = "";
811                 continue;
812             }
813             
814             // Extract the entity name.
815
String JavaDoc entity = cdata.substring(1,i);
816             cdata = cdata.substring(i+1);
817             
818             // Convert the entity to a character, if possible.
819
String JavaDoc replace = decodeAsEntity(entity);
820
821             // Add the replacement.
822
buf.append(replace);
823         } // Construct decoded string.
824

825         // Done.
826
return buf.toString();
827     }
828     
829     
830     /**
831      * Specify whether to use hexadecimal character references of the
832      * form <code>&amp;#xN;</code>, where N is the hex character code.
833      * The alternative is decimal character references of the form
834      * <code>&amp;#N;</code>, where N is the decimal character code.
835      * @param flag The setting.
836      * @return This escape.
837      * @see #encodeAsEntity(char)
838      */

839     public Escape setUseHex(boolean flag) {
840         useHex = flag;
841         return this;
842     }
843     
844     
845     /**
846      * Add a new entity to this escape.
847      * @param entity The entity name. There can be an ampersand at
848      * the start and a semicolon at the end, but these
849      * are optional.
850      * @param value The value of the entity, as a single character.
851      * @return This escape.
852      */

853     public Escape setEntity(String JavaDoc entity, char value) {
854         if (entity == null) {
855             throw new NullPointerException JavaDoc("The entity name is null.");
856         }
857         if (entity.startsWith("&")) {
858             entity = entity.substring(1, entity.length());
859         }
860         if (entity.endsWith(";")) {
861             entity = entity.substring(0, entity.length()-1);
862         }
863         charToEntity.put(new Character JavaDoc(value), entity);
864         entityToChar.put(entity, new Character JavaDoc(value));
865         return this;
866     }
867     
868     
869     /**
870      * Convert a character to a sequence of hex URL escapes.
871      * <p>
872      * Multibyte characters are handled in the default character encoding.
873      * @param ch The character to encode.
874      * @return The hex encoding, which may consist of more than
875      * one byte, and which is performed in the default
876      * character encoding.
877      */

878     public String JavaDoc hexEncode(char ch) {
879         // Some characters occupy more than one byte (multibyte).
880
// To account for this, convert the character to a string
881
// and then get the bytes for the string. I expect there
882
// is a better way to do this, which is dependent on character
883
// encodings, but for now this will have to work.
884
byte[] bytes = ("" + ch).getBytes();
885         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
886         for (byte bt : bytes) {
887             // Bytes are signed (why?) so this is necessary to prevent
888
// an undesirable number of one bits in the result. This
889
// essentially converts the byte to a signed value. I
890
// sometimes wish Java had an unsigned keyword.
891
int ibt = (int) bt & 0xFF;
892             buf.append('%');
893             String JavaDoc hex = Integer.toHexString(ibt);
894             if (hex.length() < 2) {
895                 buf.append('0');
896             }
897             buf.append(hex);
898         } // Traversing the bytes.
899

900         // Now return the encoded string.
901
return buf.toString();
902     }
903     
904     
905     /**
906      * Traverse the input string, and hex encode non-alphanumeric
907      * characters in the string, other than those in the provided set.
908      * Note that all non-ascii characters are encoded here.
909      * @param text The text to encode.
910      * @param characters Characters to preserve, unencoded.
911      * @return The encoded string.
912      */

913     public String JavaDoc hexEncode(String JavaDoc text, String JavaDoc characters) {
914         // Traverse the string and encode characters.
915
StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
916         for (char ch : text.toCharArray()) {
917             if (ch < 128 &&
918                     (Character.isLetterOrDigit(ch) ||
919                             characters.indexOf(ch) >= 0) ||
920                             preserve.indexOf(ch) >= 0) {
921                 buf.append(ch);
922             } else {
923                 buf.append(hexEncode(ch));
924             }
925         } // Loop over input string.
926

927         // Done.
928
return buf.toString();
929     }
930     
931     
932     /**
933      * Convert all URL hex escapes in the string to characters. This is
934      * complicated by the need to handle multibyte characters.
935      * <p>
936      * Multibyte characters are handled in the default character encoding.
937      * @param text The text to decode.
938      * @return The decoded text.
939      */

940     public String JavaDoc hexDecode(String JavaDoc text) {
941         if (text == null) {
942             throw new NullPointerException JavaDoc("The text to hex decode is null.");
943         }
944         
945         // Traverse the string and decode any hex escapes. These are
946
// turned into bytes, and added to the byte sequence.
947
ByteArrayOutputStream JavaDoc baos = new ByteArrayOutputStream JavaDoc();
948         int index = 0;
949         int length = text.length();
950         while (index < length) {
951             // If the next character is a percent sign, decode a hex
952
// escape. Otherwise, just add the bytes for the character.
953
char ch = text.charAt(index);
954             try {
955                 if (ch == '%') {
956                     // There must be two more characters in the text.
957
if (length - index <= 2) {
958                         // Too few characters in the text.
959
baos.write("%".getBytes());
960                         index++;
961                         continue;
962                     }
963                     
964                     // The next two characters must be hex.
965
String JavaDoc hex = text.substring(index+1, index+3);
966                     try {
967                         int value = Integer.parseInt(hex, 16);
968                         baos.write((byte) value);
969                         index += 3;
970                     } catch (NumberFormatException JavaDoc exception) {
971                         // Some characters are not hexadecimal.
972
baos.write("%".getBytes());
973                         index++;
974                     }
975                 } else {
976                     // Just add the character as-is.
977
baos.write(("" + ch).getBytes());
978                     index++;
979                 }
980             } catch (IOException JavaDoc exception) {
981                 // This should never happen. Ignore this.
982
}
983         } // Loop over input string.
984

985         // Return the result, in the default encoding.
986
return baos.toString();
987     }
988 }
989
Popular Tags