KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jboss > portal > format > util > EntityTable


1 /*****************************************
2  * *
3  * JBoss Portal: The OpenSource Portal *
4  * *
5  * Distributable under LGPL license. *
6  * See terms of license at gnu.org. *
7  * *
8  *****************************************/

9
10 package org.jboss.portal.format.util;
11
12 /**
13  * This table provides lookup for converting a char to
14  * its HTML entity representation.
15  */

16 public class EntityTable
17 {
18
19    /**
20     * All HTML entities.
21     */

22    public static final EntityTable FULL = new EntityTable();
23
24    /**
25     * All HTML entities except the HTML chars which are used to do HTML itself.
26     */

27    public static final EntityTable BASIC = new BasicEntityTable();
28
29    private static class BasicEntityTable extends EntityTable
30    {
31       public BasicEntityTable()
32       {
33          remove('<');
34          remove('>');
35          remove('"');
36          remove('&');
37       }
38    }
39
40    private String JavaDoc[] charToEntity = new String JavaDoc[65536];
41
42    protected EntityTable()
43    {
44       put(160, "nbsp");
45       put(161, "iexcl");
46       put(162, "cent");
47       put(163, "pound");
48       put(164, "curren");
49       put(165, "yen");
50       put(166, "brvbar");
51       put(167, "sect");
52       put(168, "uml");
53       put(169, "copy");
54       put(170, "ordf");
55       put(171, "laquo");
56       put(172, "not");
57       put(173, "shy");
58       put(174, "reg");
59       put(175, "macr");
60       put(176, "deg");
61       put(177, "plusmn");
62       put(178, "sup2");
63       put(179, "sup3");
64       put(180, "acute");
65       put(181, "micro");
66       put(182, "para");
67       put(183, "middot");
68       put(184, "cedil");
69       put(185, "sup1");
70       put(186, "ordm");
71       put(187, "raquo");
72       put(188, "frac14");
73       put(189, "frac12");
74       put(190, "frac34");
75       put(191, "iquest");
76       put(192, "Agrave");
77       put(193, "Aacute");
78       put(194, "Acirc");
79       put(195, "Atilde");
80       put(196, "Auml");
81       put(197, "Aring");
82       put(198, "AElig");
83       put(199, "Ccedil");
84       put(200, "Egrave");
85       put(201, "Eacute");
86       put(202, "Ecirc");
87       put(203, "Euml");
88       put(204, "Igrave");
89       put(205, "Iacute");
90       put(206, "Icirc");
91       put(207, "Iuml");
92       put(208, "ETH");
93       put(209, "Ntilde");
94       put(210, "Ograve");
95       put(211, "Oacute");
96       put(212, "Ocirc");
97       put(213, "Otilde");
98       put(214, "Ouml");
99       put(215, "times");
100       put(216, "Oslash");
101       put(217, "Ugrave");
102       put(218, "Uacute");
103       put(219, "Ucirc");
104       put(220, "Uuml");
105       put(221, "Yacute");
106       put(222, "THORN");
107       put(223, "szlig");
108       put(224, "agrave");
109       put(225, "aacute");
110       put(226, "acirc");
111       put(227, "atilde");
112       put(228, "auml");
113       put(229, "aring");
114       put(230, "aelig");
115       put(231, "ccedil");
116       put(232, "egrave");
117       put(233, "eacute");
118       put(234, "ecirc");
119       put(235, "euml");
120       put(236, "igrave");
121       put(237, "iacute");
122       put(238, "icirc");
123       put(239, "iuml");
124       put(240, "eth");
125       put(241, "ntilde");
126       put(242, "ograve");
127       put(243, "oacute");
128       put(244, "ocirc");
129       put(245, "otilde");
130       put(246, "ouml");
131       put(247, "divide");
132       put(248, "oslash");
133       put(249, "ugrave");
134       put(250, "uacute");
135       put(251, "ucirc");
136       put(252, "uuml");
137       put(253, "yacute");
138       put(254, "thorn");
139       put(255, "yuml");
140       put(402, "fnof");
141       put(913, "Alpha");
142       put(914, "Beta");
143       put(915, "Gamma");
144       put(916, "Delta");
145       put(917, "Epsilon");
146       put(918, "Zeta");
147       put(919, "Eta");
148       put(920, "Theta");
149       put(921, "Iota");
150       put(922, "Kappa");
151       put(923, "Lambda");
152       put(924, "Mu");
153       put(925, "Nu");
154       put(926, "Xi");
155       put(927, "Omicron");
156       put(928, "Pi");
157       put(929, "Rho");
158       put(931, "Sigma");
159       put(932, "Tau");
160       put(933, "Upsilon");
161       put(934, "Phi");
162       put(935, "Chi");
163       put(936, "Psi");
164       put(937, "Omega");
165       put(945, "alpha");
166       put(946, "beta");
167       put(947, "gamma");
168       put(948, "delta");
169       put(949, "epsilon");
170       put(950, "zeta");
171       put(951, "eta");
172       put(952, "theta");
173       put(953, "iota");
174       put(954, "kappa");
175       put(955, "lambda");
176       put(956, "mu");
177       put(957, "nu");
178       put(958, "xi");
179       put(959, "omicron");
180       put(960, "pi");
181       put(961, "rho");
182       put(962, "sigmaf");
183       put(963, "sigma");
184       put(964, "tau");
185       put(965, "upsilon");
186       put(966, "phi");
187       put(967, "chi");
188       put(968, "psi");
189       put(969, "omega");
190       put(977, "thetasym");
191       put(978, "upsih");
192       put(982, "piv");
193       put(8226, "bull");
194       put(8230, "hellip");
195       put(8242, "prime");
196       put(8243, "Prime");
197       put(8254, "oline");
198       put(8260, "frasl");
199       put(8472, "weierp");
200       put(8465, "image");
201       put(8476, "real");
202       put(8482, "trade");
203       put(8501, "alefsym");
204       put(8592, "larr");
205       put(8593, "uarr");
206       put(8594, "rarr");
207       put(8595, "darr");
208       put(8596, "harr");
209       put(8629, "crarr");
210       put(8656, "lArr");
211       put(8657, "uArr");
212       put(8658, "rArr");
213       put(8659, "dArr");
214       put(8660, "hArr");
215       put(8704, "forall");
216       put(8706, "part");
217       put(8707, "exist");
218       put(8709, "empty");
219       put(8711, "nabla");
220       put(8712, "isin");
221       put(8713, "notin");
222       put(8715, "ni");
223       put(8719, "prod");
224       put(8721, "sum");
225       put(8722, "minus");
226       put(8727, "lowast");
227       put(8730, "radic");
228       put(8733, "prop");
229       put(8734, "infin");
230       put(8736, "ang");
231       put(8743, "and");
232       put(8744, "or");
233       put(8745, "cap");
234       put(8746, "cup");
235       put(8747, "int");
236       put(8756, "there4");
237       put(8764, "sim");
238       put(8773, "cong");
239       put(8776, "asymp");
240       put(8800, "ne");
241       put(8801, "equiv");
242       put(8804, "le");
243       put(8805, "ge");
244       put(8834, "sub");
245       put(8835, "sup");
246       put(8836, "nsub");
247       put(8838, "sube");
248       put(8839, "supe");
249       put(8853, "oplus");
250       put(8855, "otimes");
251       put(8869, "perp");
252       put(8901, "sdot");
253       put(8968, "lceil");
254       put(8969, "rceil");
255       put(8970, "lfloor");
256       put(8971, "rfloor");
257       put(9001, "lang");
258       put(9002, "rang");
259       put(9674, "loz");
260       put(9824, "spades");
261       put(9827, "clubs");
262       put(9829, "hearts");
263       put(9830, "diams");
264       put(34, "quot");
265       put(38, "amp");
266       put(60, "lt");
267       put(62, "gt");
268       put(338, "OElig");
269       put(339, "oelig");
270       put(352, "Scaron");
271       put(353, "scaron");
272       put(376, "Yuml");
273       put(710, "circ");
274       put(732, "tilde");
275       put(8194, "ensp");
276       put(8195, "emsp");
277       put(8201, "thinsp");
278       put(8204, "zwnj");
279       put(8205, "zwj");
280       put(8206, "lrm");
281       put(8207, "rlm");
282       put(8211, "ndash");
283       put(8212, "mdash");
284       put(8216, "lsquo");
285       put(8217, "rsquo");
286       put(8218, "sbquo");
287       put(8220, "ldquo");
288       put(8221, "rdquo");
289       put(8222, "bdquo");
290       put(8224, "dagger");
291       put(8225, "Dagger");
292       put(8240, "permil");
293       put(8249, "lsaquo");
294       put(8250, "rsaquo");
295       put(8364, "euro");
296    }
297
298    protected final void put(int c, String JavaDoc entity)
299    {
300       charToEntity[c] = entity;
301    }
302
303    protected final void remove(int c)
304    {
305       charToEntity[c] = null;
306    }
307
308    /**
309     * Returns null if no entity is found or return the converted entity.
310     */

311    public final String JavaDoc lookup(char c)
312    {
313       return charToEntity[c];
314    }
315
316    public final String JavaDoc convertEntities(String JavaDoc txt)
317    {
318       // Get the chars it's faster
319
char[] chars = txt.toCharArray();
320
321       // The new result if any
322
StringBuffer JavaDoc result = null;
323
324       // The index of the last copied char
325
int previous = 0;
326
327       // Perform lookup char by char
328
for (int current = 0;current < chars.length;current++)
329       {
330          // Lookup
331
String JavaDoc replacement = lookup(chars[current]);
332
333          // Do we have a replacement
334
if (replacement != null)
335          {
336             // We lazy create the result
337
if (result == null)
338             {
339                // Allocate 1/2 more than the current txt size
340
result = new StringBuffer JavaDoc(txt.length() * 3 / 2);
341             }
342             // Append the previous chars if any
343
result.append(chars, previous, current - previous);
344             // Append the replaced entity
345
result.append('&').append(replacement).append(';');
346             // Update the previous pointer
347
previous = current + 1;
348          }
349       }
350
351       // If we have a result we need to complete it
352
if (result != null)
353       {
354          result.append(chars, previous, chars.length - previous);
355          return result.toString();
356       }
357       else
358       {
359          return txt;
360       }
361    }
362 }
363
Popular Tags