KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > lowagie > text > xml > simpleparser > EntitiesToUnicode


1 /*
2  * $Id: EntitiesToUnicode.java 2625 2007-02-27 13:02:57Z blowagie $
3  * $Name$
4  *
5  * Copyright 2003-2007 Paulo Soares and Bruno Lowagie.
6  *
7  * The contents of this file are subject to the Mozilla Public License Version 1.1
8  * (the "License"); you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
10  *
11  * Software distributed under the License is distributed on an "AS IS" basis,
12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13  * for the specific language governing rights and limitations under the License.
14  *
15  * The Original Code is 'iText, a free JAVA-PDF library'.
16  *
17  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
18  * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
19  * All Rights Reserved.
20  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
21  * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
22  *
23  * Contributor(s): all the names of the contributors are added in the source code
24  * where applicable.
25  *
26  * Alternatively, the contents of this file may be used under the terms of the
27  * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
28  * provisions of LGPL are applicable instead of those above. If you wish to
29  * allow use of your version of this file only under the terms of the LGPL
30  * License and not to allow others to use your version of this file under
31  * the MPL, indicate your decision by deleting the provisions above and
32  * replace them with the notice and other provisions required by the LGPL.
33  * If you do not delete the provisions above, a recipient may use your version
34  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
35  *
36  * This library is free software; you can redistribute it and/or modify it
37  * under the terms of the MPL as stated above or under the terms of the GNU
38  * Library General Public License as published by the Free Software Foundation;
39  * either version 2 of the License, or any later version.
40  *
41  * This library is distributed in the hope that it will be useful, but WITHOUT
42  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
43  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
44  * details.
45  *
46  * If you didn't download this code from the following link, you should check if
47  * you aren't using an obsolete version:
48  * http://www.lowagie.com/iText/
49  */

50
51 package com.lowagie.text.xml.simpleparser;
52
53 import java.util.HashMap JavaDoc;
54
55 /**
56  * This class contains entities that can be used in an entity tag.
57  */

58
59 public class EntitiesToUnicode {
60     
61     /**
62      * This is a map that contains the names of entities and their unicode value.
63      */

64     public static final HashMap JavaDoc map = new HashMap JavaDoc();
65     static {
66         map.put("nbsp", new Character JavaDoc('\u00a0')); // no-break space = non-breaking space, U+00A0 ISOnum
67
map.put("iexcl", new Character JavaDoc('\u00a1')); // inverted exclamation mark, U+00A1 ISOnum
68
map.put("cent", new Character JavaDoc('\u00a2')); // cent sign, U+00A2 ISOnum
69
map.put("pound", new Character JavaDoc('\u00a3')); // pound sign, U+00A3 ISOnum
70
map.put("curren", new Character JavaDoc('\u00a4')); // currency sign, U+00A4 ISOnum
71
map.put("yen", new Character JavaDoc('\u00a5')); // yen sign = yuan sign, U+00A5 ISOnum
72
map.put("brvbar", new Character JavaDoc('\u00a6')); // broken bar = broken vertical bar, U+00A6 ISOnum
73
map.put("sect", new Character JavaDoc('\u00a7')); // section sign, U+00A7 ISOnum
74
map.put("uml", new Character JavaDoc('\u00a8')); // diaeresis = spacing diaeresis, U+00A8 ISOdia
75
map.put("copy", new Character JavaDoc('\u00a9')); // copyright sign, U+00A9 ISOnum
76
map.put("ordf", new Character JavaDoc('\u00aa')); // feminine ordinal indicator, U+00AA ISOnum
77
map.put("laquo", new Character JavaDoc('\u00ab')); // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
78
map.put("not", new Character JavaDoc('\u00ac')); // not sign, U+00AC ISOnum
79
map.put("shy", new Character JavaDoc('\u00ad')); // soft hyphen = discretionary hyphen, U+00AD ISOnum
80
map.put("reg", new Character JavaDoc('\u00ae')); // registered sign = registered trade mark sign, U+00AE ISOnum
81
map.put("macr", new Character JavaDoc('\u00af')); // macron = spacing macron = overline = APL overbar, U+00AF ISOdia
82
map.put("deg", new Character JavaDoc('\u00b0')); // degree sign, U+00B0 ISOnum
83
map.put("plusmn", new Character JavaDoc('\u00b1')); // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
84
map.put("sup2", new Character JavaDoc('\u00b2')); // superscript two = superscript digit two = squared, U+00B2 ISOnum
85
map.put("sup3", new Character JavaDoc('\u00b3')); // superscript three = superscript digit three = cubed, U+00B3 ISOnum
86
map.put("acute", new Character JavaDoc('\u00b4')); // acute accent = spacing acute, U+00B4 ISOdia
87
map.put("micro", new Character JavaDoc('\u00b5')); // micro sign, U+00B5 ISOnum
88
map.put("para", new Character JavaDoc('\u00b6')); // pilcrow sign = paragraph sign, U+00B6 ISOnum
89
map.put("middot", new Character JavaDoc('\u00b7')); // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
90
map.put("cedil", new Character JavaDoc('\u00b8')); // cedilla = spacing cedilla, U+00B8 ISOdia
91
map.put("sup1", new Character JavaDoc('\u00b9')); // superscript one = superscript digit one, U+00B9 ISOnum
92
map.put("ordm", new Character JavaDoc('\u00ba')); // masculine ordinal indicator, U+00BA ISOnum
93
map.put("raquo", new Character JavaDoc('\u00bb')); // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
94
map.put("frac14", new Character JavaDoc('\u00bc')); // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
95
map.put("frac12", new Character JavaDoc('\u00bd')); // vulgar fraction one half = fraction one half, U+00BD ISOnum
96
map.put("frac34", new Character JavaDoc('\u00be')); // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
97
map.put("iquest", new Character JavaDoc('\u00bf')); // inverted question mark = turned question mark, U+00BF ISOnum
98
map.put("Agrave", new Character JavaDoc('\u00c0')); // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
99
map.put("Aacute", new Character JavaDoc('\u00c1')); // latin capital letter A with acute, U+00C1 ISOlat1
100
map.put("Acirc", new Character JavaDoc('\u00c2')); // latin capital letter A with circumflex, U+00C2 ISOlat1
101
map.put("Atilde", new Character JavaDoc('\u00c3')); // latin capital letter A with tilde, U+00C3 ISOlat1
102
map.put("Auml", new Character JavaDoc('\u00c4')); // latin capital letter A with diaeresis, U+00C4 ISOlat1
103
map.put("Aring", new Character JavaDoc('\u00c5')); // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
104
map.put("AElig", new Character JavaDoc('\u00c6')); // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
105
map.put("Ccedil", new Character JavaDoc('\u00c7')); // latin capital letter C with cedilla, U+00C7 ISOlat1
106
map.put("Egrave", new Character JavaDoc('\u00c8')); // latin capital letter E with grave, U+00C8 ISOlat1
107
map.put("Eacute", new Character JavaDoc('\u00c9')); // latin capital letter E with acute, U+00C9 ISOlat1
108
map.put("Ecirc", new Character JavaDoc('\u00ca')); // latin capital letter E with circumflex, U+00CA ISOlat1
109
map.put("Euml", new Character JavaDoc('\u00cb')); // latin capital letter E with diaeresis, U+00CB ISOlat1
110
map.put("Igrave", new Character JavaDoc('\u00cc')); // latin capital letter I with grave, U+00CC ISOlat1
111
map.put("Iacute", new Character JavaDoc('\u00cd')); // latin capital letter I with acute, U+00CD ISOlat1
112
map.put("Icirc", new Character JavaDoc('\u00ce')); // latin capital letter I with circumflex, U+00CE ISOlat1
113
map.put("Iuml", new Character JavaDoc('\u00cf')); // latin capital letter I with diaeresis, U+00CF ISOlat1
114
map.put("ETH", new Character JavaDoc('\u00d0')); // latin capital letter ETH, U+00D0 ISOlat1
115
map.put("Ntilde", new Character JavaDoc('\u00d1')); // latin capital letter N with tilde, U+00D1 ISOlat1
116
map.put("Ograve", new Character JavaDoc('\u00d2')); // latin capital letter O with grave, U+00D2 ISOlat1
117
map.put("Oacute", new Character JavaDoc('\u00d3')); // latin capital letter O with acute, U+00D3 ISOlat1
118
map.put("Ocirc", new Character JavaDoc('\u00d4')); // latin capital letter O with circumflex, U+00D4 ISOlat1
119
map.put("Otilde", new Character JavaDoc('\u00d5')); // latin capital letter O with tilde, U+00D5 ISOlat1
120
map.put("Ouml", new Character JavaDoc('\u00d6')); // latin capital letter O with diaeresis, U+00D6 ISOlat1
121
map.put("times", new Character JavaDoc('\u00d7')); // multiplication sign, U+00D7 ISOnum
122
map.put("Oslash", new Character JavaDoc('\u00d8')); // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
123
map.put("Ugrave", new Character JavaDoc('\u00d9')); // latin capital letter U with grave, U+00D9 ISOlat1
124
map.put("Uacute", new Character JavaDoc('\u00da')); // latin capital letter U with acute, U+00DA ISOlat1
125
map.put("Ucirc", new Character JavaDoc('\u00db')); // latin capital letter U with circumflex, U+00DB ISOlat1
126
map.put("Uuml", new Character JavaDoc('\u00dc')); // latin capital letter U with diaeresis, U+00DC ISOlat1
127
map.put("Yacute", new Character JavaDoc('\u00dd')); // latin capital letter Y with acute, U+00DD ISOlat1
128
map.put("THORN", new Character JavaDoc('\u00de')); // latin capital letter THORN, U+00DE ISOlat1
129
map.put("szlig", new Character JavaDoc('\u00df')); // latin small letter sharp s = ess-zed, U+00DF ISOlat1
130
map.put("agrave", new Character JavaDoc('\u00e0')); // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
131
map.put("aacute", new Character JavaDoc('\u00e1')); // latin small letter a with acute, U+00E1 ISOlat1
132
map.put("acirc", new Character JavaDoc('\u00e2')); // latin small letter a with circumflex, U+00E2 ISOlat1
133
map.put("atilde", new Character JavaDoc('\u00e3')); // latin small letter a with tilde, U+00E3 ISOlat1
134
map.put("auml", new Character JavaDoc('\u00e4')); // latin small letter a with diaeresis, U+00E4 ISOlat1
135
map.put("aring", new Character JavaDoc('\u00e5')); // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
136
map.put("aelig", new Character JavaDoc('\u00e6')); // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
137
map.put("ccedil", new Character JavaDoc('\u00e7')); // latin small letter c with cedilla, U+00E7 ISOlat1
138
map.put("egrave", new Character JavaDoc('\u00e8')); // latin small letter e with grave, U+00E8 ISOlat1
139
map.put("eacute", new Character JavaDoc('\u00e9')); // latin small letter e with acute, U+00E9 ISOlat1
140
map.put("ecirc", new Character JavaDoc('\u00ea')); // latin small letter e with circumflex, U+00EA ISOlat1
141
map.put("euml", new Character JavaDoc('\u00eb')); // latin small letter e with diaeresis, U+00EB ISOlat1
142
map.put("igrave", new Character JavaDoc('\u00ec')); // latin small letter i with grave, U+00EC ISOlat1
143
map.put("iacute", new Character JavaDoc('\u00ed')); // latin small letter i with acute, U+00ED ISOlat1
144
map.put("icirc", new Character JavaDoc('\u00ee')); // latin small letter i with circumflex, U+00EE ISOlat1
145
map.put("iuml", new Character JavaDoc('\u00ef')); // latin small letter i with diaeresis, U+00EF ISOlat1
146
map.put("eth", new Character JavaDoc('\u00f0')); // latin small letter eth, U+00F0 ISOlat1
147
map.put("ntilde", new Character JavaDoc('\u00f1')); // latin small letter n with tilde, U+00F1 ISOlat1
148
map.put("ograve", new Character JavaDoc('\u00f2')); // latin small letter o with grave, U+00F2 ISOlat1
149
map.put("oacute", new Character JavaDoc('\u00f3')); // latin small letter o with acute, U+00F3 ISOlat1
150
map.put("ocirc", new Character JavaDoc('\u00f4')); // latin small letter o with circumflex, U+00F4 ISOlat1
151
map.put("otilde", new Character JavaDoc('\u00f5')); // latin small letter o with tilde, U+00F5 ISOlat1
152
map.put("ouml", new Character JavaDoc('\u00f6')); // latin small letter o with diaeresis, U+00F6 ISOlat1
153
map.put("divide", new Character JavaDoc('\u00f7')); // division sign, U+00F7 ISOnum
154
map.put("oslash", new Character JavaDoc('\u00f8')); // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
155
map.put("ugrave", new Character JavaDoc('\u00f9')); // latin small letter u with grave, U+00F9 ISOlat1
156
map.put("uacute", new Character JavaDoc('\u00fa')); // latin small letter u with acute, U+00FA ISOlat1
157
map.put("ucirc", new Character JavaDoc('\u00fb')); // latin small letter u with circumflex, U+00FB ISOlat1
158
map.put("uuml", new Character JavaDoc('\u00fc')); // latin small letter u with diaeresis, U+00FC ISOlat1
159
map.put("yacute", new Character JavaDoc('\u00fd')); // latin small letter y with acute, U+00FD ISOlat1
160
map.put("thorn", new Character JavaDoc('\u00fe')); // latin small letter thorn, U+00FE ISOlat1
161
map.put("yuml", new Character JavaDoc('\u00ff')); // latin small letter y with diaeresis, U+00FF ISOlat1
162
// Latin Extended-B
163
map.put("fnof", new Character JavaDoc('\u0192')); // latin small f with hook = function = florin, U+0192 ISOtech
164
// Greek
165
map.put("Alpha", new Character JavaDoc('\u0391')); // greek capital letter alpha, U+0391
166
map.put("Beta", new Character JavaDoc('\u0392')); // greek capital letter beta, U+0392
167
map.put("Gamma", new Character JavaDoc('\u0393')); // greek capital letter gamma, U+0393 ISOgrk3
168
map.put("Delta", new Character JavaDoc('\u0394')); // greek capital letter delta, U+0394 ISOgrk3
169
map.put("Epsilon", new Character JavaDoc('\u0395')); // greek capital letter epsilon, U+0395
170
map.put("Zeta", new Character JavaDoc('\u0396')); // greek capital letter zeta, U+0396
171
map.put("Eta", new Character JavaDoc('\u0397')); // greek capital letter eta, U+0397
172
map.put("Theta", new Character JavaDoc('\u0398')); // greek capital letter theta, U+0398 ISOgrk3
173
map.put("Iota", new Character JavaDoc('\u0399')); // greek capital letter iota, U+0399
174
map.put("Kappa", new Character JavaDoc('\u039a')); // greek capital letter kappa, U+039A
175
map.put("Lambda", new Character JavaDoc('\u039b')); // greek capital letter lambda, U+039B ISOgrk3
176
map.put("Mu", new Character JavaDoc('\u039c')); // greek capital letter mu, U+039C
177
map.put("Nu", new Character JavaDoc('\u039d')); // greek capital letter nu, U+039D
178
map.put("Xi", new Character JavaDoc('\u039e')); // greek capital letter xi, U+039E ISOgrk3
179
map.put("Omicron", new Character JavaDoc('\u039f')); // greek capital letter omicron, U+039F
180
map.put("Pi", new Character JavaDoc('\u03a0')); // greek capital letter pi, U+03A0 ISOgrk3
181
map.put("Rho", new Character JavaDoc('\u03a1')); // greek capital letter rho, U+03A1
182
// there is no Sigmaf, and no U+03A2 character either
183
map.put("Sigma", new Character JavaDoc('\u03a3')); // greek capital letter sigma, U+03A3 ISOgrk3
184
map.put("Tau", new Character JavaDoc('\u03a4')); // greek capital letter tau, U+03A4
185
map.put("Upsilon", new Character JavaDoc('\u03a5')); // greek capital letter upsilon, U+03A5 ISOgrk3
186
map.put("Phi", new Character JavaDoc('\u03a6')); // greek capital letter phi, U+03A6 ISOgrk3
187
map.put("Chi", new Character JavaDoc('\u03a7')); // greek capital letter chi, U+03A7
188
map.put("Psi", new Character JavaDoc('\u03a8')); // greek capital letter psi, U+03A8 ISOgrk3
189
map.put("Omega", new Character JavaDoc('\u03a9')); // greek capital letter omega, U+03A9 ISOgrk3
190
map.put("alpha", new Character JavaDoc('\u03b1')); // greek small letter alpha, U+03B1 ISOgrk3
191
map.put("beta", new Character JavaDoc('\u03b2')); // greek small letter beta, U+03B2 ISOgrk3
192
map.put("gamma", new Character JavaDoc('\u03b3')); // greek small letter gamma, U+03B3 ISOgrk3
193
map.put("delta", new Character JavaDoc('\u03b4')); // greek small letter delta, U+03B4 ISOgrk3
194
map.put("epsilon", new Character JavaDoc('\u03b5')); // greek small letter epsilon, U+03B5 ISOgrk3
195
map.put("zeta", new Character JavaDoc('\u03b6')); // greek small letter zeta, U+03B6 ISOgrk3
196
map.put("eta", new Character JavaDoc('\u03b7')); // greek small letter eta, U+03B7 ISOgrk3
197
map.put("theta", new Character JavaDoc('\u03b8')); // greek small letter theta, U+03B8 ISOgrk3
198
map.put("iota", new Character JavaDoc('\u03b9')); // greek small letter iota, U+03B9 ISOgrk3
199
map.put("kappa", new Character JavaDoc('\u03ba')); // greek small letter kappa, U+03BA ISOgrk3
200
map.put("lambda", new Character JavaDoc('\u03bb')); // greek small letter lambda, U+03BB ISOgrk3
201
map.put("mu", new Character JavaDoc('\u03bc')); // greek small letter mu, U+03BC ISOgrk3
202
map.put("nu", new Character JavaDoc('\u03bd')); // greek small letter nu, U+03BD ISOgrk3
203
map.put("xi", new Character JavaDoc('\u03be')); // greek small letter xi, U+03BE ISOgrk3
204
map.put("omicron", new Character JavaDoc('\u03bf')); // greek small letter omicron, U+03BF NEW
205
map.put("pi", new Character JavaDoc('\u03c0')); // greek small letter pi, U+03C0 ISOgrk3
206
map.put("rho", new Character JavaDoc('\u03c1')); // greek small letter rho, U+03C1 ISOgrk3
207
map.put("sigmaf", new Character JavaDoc('\u03c2')); // greek small letter final sigma, U+03C2 ISOgrk3
208
map.put("sigma", new Character JavaDoc('\u03c3')); // greek small letter sigma, U+03C3 ISOgrk3
209
map.put("tau", new Character JavaDoc('\u03c4')); // greek small letter tau, U+03C4 ISOgrk3
210
map.put("upsilon", new Character JavaDoc('\u03c5')); // greek small letter upsilon, U+03C5 ISOgrk3
211
map.put("phi", new Character JavaDoc('\u03c6')); // greek small letter phi, U+03C6 ISOgrk3
212
map.put("chi", new Character JavaDoc('\u03c7')); // greek small letter chi, U+03C7 ISOgrk3
213
map.put("psi", new Character JavaDoc('\u03c8')); // greek small letter psi, U+03C8 ISOgrk3
214
map.put("omega", new Character JavaDoc('\u03c9')); // greek small letter omega, U+03C9 ISOgrk3
215
map.put("thetasym", new Character JavaDoc('\u03d1')); // greek small letter theta symbol, U+03D1 NEW
216
map.put("upsih", new Character JavaDoc('\u03d2')); // greek upsilon with hook symbol, U+03D2 NEW
217
map.put("piv", new Character JavaDoc('\u03d6')); // greek pi symbol, U+03D6 ISOgrk3
218
// General Punctuation
219
map.put("bull", new Character JavaDoc('\u2022')); // bullet = black small circle, U+2022 ISOpub
220
// bullet is NOT the same as bullet operator, U+2219
221
map.put("hellip", new Character JavaDoc('\u2026')); // horizontal ellipsis = three dot leader, U+2026 ISOpub
222
map.put("prime", new Character JavaDoc('\u2032')); // prime = minutes = feet, U+2032 ISOtech
223
map.put("Prime", new Character JavaDoc('\u2033')); // double prime = seconds = inches, U+2033 ISOtech
224
map.put("oline", new Character JavaDoc('\u203e')); // overline = spacing overscore, U+203E NEW
225
map.put("frasl", new Character JavaDoc('\u2044')); // fraction slash, U+2044 NEW
226
// Letterlike Symbols
227
map.put("weierp", new Character JavaDoc('\u2118')); // script capital P = power set = Weierstrass p, U+2118 ISOamso
228
map.put("image", new Character JavaDoc('\u2111')); // blackletter capital I = imaginary part, U+2111 ISOamso
229
map.put("real", new Character JavaDoc('\u211c')); // blackletter capital R = real part symbol, U+211C ISOamso
230
map.put("trade", new Character JavaDoc('\u2122')); // trade mark sign, U+2122 ISOnum
231
map.put("alefsym", new Character JavaDoc('\u2135')); // alef symbol = first transfinite cardinal, U+2135 NEW
232
// alef symbol is NOT the same as hebrew letter alef,
233
// U+05D0 although the same glyph could be used to depict both characters
234
// Arrows
235
map.put("larr", new Character JavaDoc('\u2190')); // leftwards arrow, U+2190 ISOnum
236
map.put("uarr", new Character JavaDoc('\u2191')); // upwards arrow, U+2191 ISOnum
237
map.put("rarr", new Character JavaDoc('\u2192')); // rightwards arrow, U+2192 ISOnum
238
map.put("darr", new Character JavaDoc('\u2193')); // downwards arrow, U+2193 ISOnum
239
map.put("harr", new Character JavaDoc('\u2194')); // left right arrow, U+2194 ISOamsa
240
map.put("crarr", new Character JavaDoc('\u21b5')); // downwards arrow with corner leftwards = carriage return, U+21B5 NEW
241
map.put("lArr", new Character JavaDoc('\u21d0')); // leftwards double arrow, U+21D0 ISOtech
242
// ISO 10646 does not say that lArr is the same as the 'is implied by' arrow
243
// but also does not have any other character for that function. So ? lArr can
244
// be used for 'is implied by' as ISOtech suggests
245
map.put("uArr", new Character JavaDoc('\u21d1')); // upwards double arrow, U+21D1 ISOamsa
246
map.put("rArr", new Character JavaDoc('\u21d2')); // rightwards double arrow, U+21D2 ISOtech
247
// ISO 10646 does not say this is the 'implies' character but does not have
248
// another character with this function so ?
249
// rArr can be used for 'implies' as ISOtech suggests
250
map.put("dArr", new Character JavaDoc('\u21d3')); // downwards double arrow, U+21D3 ISOamsa
251
map.put("hArr", new Character JavaDoc('\u21d4')); // left right double arrow, U+21D4 ISOamsa
252
// Mathematical Operators
253
map.put("forall", new Character JavaDoc('\u2200')); // for all, U+2200 ISOtech
254
map.put("part", new Character JavaDoc('\u2202')); // partial differential, U+2202 ISOtech
255
map.put("exist", new Character JavaDoc('\u2203')); // there exists, U+2203 ISOtech
256
map.put("empty", new Character JavaDoc('\u2205')); // empty set = null set = diameter, U+2205 ISOamso
257
map.put("nabla", new Character JavaDoc('\u2207')); // nabla = backward difference, U+2207 ISOtech
258
map.put("isin", new Character JavaDoc('\u2208')); // element of, U+2208 ISOtech
259
map.put("notin", new Character JavaDoc('\u2209')); // not an element of, U+2209 ISOtech
260
map.put("ni", new Character JavaDoc('\u220b')); // contains as member, U+220B ISOtech
261
// should there be a more memorable name than 'ni'?
262
map.put("prod", new Character JavaDoc('\u220f')); // n-ary product = product sign, U+220F ISOamsb
263
// prod is NOT the same character as U+03A0 'greek capital letter pi' though
264
// the same glyph might be used for both
265
map.put("sum", new Character JavaDoc('\u2211')); // n-ary sumation, U+2211 ISOamsb
266
// sum is NOT the same character as U+03A3 'greek capital letter sigma'
267
// though the same glyph might be used for both
268
map.put("minus", new Character JavaDoc('\u2212')); // minus sign, U+2212 ISOtech
269
map.put("lowast", new Character JavaDoc('\u2217')); // asterisk operator, U+2217 ISOtech
270
map.put("radic", new Character JavaDoc('\u221a')); // square root = radical sign, U+221A ISOtech
271
map.put("prop", new Character JavaDoc('\u221d')); // proportional to, U+221D ISOtech
272
map.put("infin", new Character JavaDoc('\u221e')); // infinity, U+221E ISOtech
273
map.put("ang", new Character JavaDoc('\u2220')); // angle, U+2220 ISOamso
274
map.put("and", new Character JavaDoc('\u2227')); // logical and = wedge, U+2227 ISOtech
275
map.put("or", new Character JavaDoc('\u2228')); // logical or = vee, U+2228 ISOtech
276
map.put("cap", new Character JavaDoc('\u2229')); // intersection = cap, U+2229 ISOtech
277
map.put("cup", new Character JavaDoc('\u222a')); // union = cup, U+222A ISOtech
278
map.put("int", new Character JavaDoc('\u222b')); // integral, U+222B ISOtech
279
map.put("there4", new Character JavaDoc('\u2234')); // therefore, U+2234 ISOtech
280
map.put("sim", new Character JavaDoc('\u223c')); // tilde operator = varies with = similar to, U+223C ISOtech
281
// tilde operator is NOT the same character as the tilde, U+007E,
282
// although the same glyph might be used to represent both
283
map.put("cong", new Character JavaDoc('\u2245')); // approximately equal to, U+2245 ISOtech
284
map.put("asymp", new Character JavaDoc('\u2248')); // almost equal to = asymptotic to, U+2248 ISOamsr
285
map.put("ne", new Character JavaDoc('\u2260')); // not equal to, U+2260 ISOtech
286
map.put("equiv", new Character JavaDoc('\u2261')); // identical to, U+2261 ISOtech
287
map.put("le", new Character JavaDoc('\u2264')); // less-than or equal to, U+2264 ISOtech
288
map.put("ge", new Character JavaDoc('\u2265')); // greater-than or equal to, U+2265 ISOtech
289
map.put("sub", new Character JavaDoc('\u2282')); // subset of, U+2282 ISOtech
290
map.put("sup", new Character JavaDoc('\u2283')); // superset of, U+2283 ISOtech
291
// note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
292
// font encoding and is not included. Should it be, for symmetry?
293
// It is in ISOamsn
294
map.put("nsub", new Character JavaDoc('\u2284')); // not a subset of, U+2284 ISOamsn
295
map.put("sube", new Character JavaDoc('\u2286')); // subset of or equal to, U+2286 ISOtech
296
map.put("supe", new Character JavaDoc('\u2287')); // superset of or equal to, U+2287 ISOtech
297
map.put("oplus", new Character JavaDoc('\u2295')); // circled plus = direct sum, U+2295 ISOamsb
298
map.put("otimes", new Character JavaDoc('\u2297')); // circled times = vector product, U+2297 ISOamsb
299
map.put("perp", new Character JavaDoc('\u22a5')); // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
300
map.put("sdot", new Character JavaDoc('\u22c5')); // dot operator, U+22C5 ISOamsb
301
// dot operator is NOT the same character as U+00B7 middle dot
302
// Miscellaneous Technical
303
map.put("lceil", new Character JavaDoc('\u2308')); // left ceiling = apl upstile, U+2308 ISOamsc
304
map.put("rceil", new Character JavaDoc('\u2309')); // right ceiling, U+2309 ISOamsc
305
map.put("lfloor", new Character JavaDoc('\u230a')); // left floor = apl downstile, U+230A ISOamsc
306
map.put("rfloor", new Character JavaDoc('\u230b')); // right floor, U+230B ISOamsc
307
map.put("lang", new Character JavaDoc('\u2329')); // left-pointing angle bracket = bra, U+2329 ISOtech
308
// lang is NOT the same character as U+003C 'less than'
309
// or U+2039 'single left-pointing angle quotation mark'
310
map.put("rang", new Character JavaDoc('\u232a')); // right-pointing angle bracket = ket, U+232A ISOtech
311
// rang is NOT the same character as U+003E 'greater than'
312
// or U+203A 'single right-pointing angle quotation mark'
313
// Geometric Shapes
314
map.put("loz", new Character JavaDoc('\u25ca')); // lozenge, U+25CA ISOpub
315
// Miscellaneous Symbols
316
map.put("spades", new Character JavaDoc('\u2660')); // black spade suit, U+2660 ISOpub
317
// black here seems to mean filled as opposed to hollow
318
map.put("clubs", new Character JavaDoc('\u2663')); // black club suit = shamrock, U+2663 ISOpub
319
map.put("hearts", new Character JavaDoc('\u2665')); // black heart suit = valentine, U+2665 ISOpub
320
map.put("diams", new Character JavaDoc('\u2666')); // black diamond suit, U+2666 ISOpub
321
// C0 Controls and Basic Latin
322
map.put("quot", new Character JavaDoc('\u0022')); // quotation mark = APL quote, U+0022 ISOnum
323
map.put("amp", new Character JavaDoc('\u0026')); // ampersand, U+0026 ISOnum
324
map.put("apos", new Character JavaDoc('\''));
325         map.put("lt", new Character JavaDoc('\u003c')); // less-than sign, U+003C ISOnum
326
map.put("gt", new Character JavaDoc('\u003e')); // greater-than sign, U+003E ISOnum
327
// Latin Extended-A
328
map.put("OElig", new Character JavaDoc('\u0152')); // latin capital ligature OE, U+0152 ISOlat2
329
map.put("oelig", new Character JavaDoc('\u0153')); // latin small ligature oe, U+0153 ISOlat2
330
// ligature is a misnomer, this is a separate character in some languages
331
map.put("Scaron", new Character JavaDoc('\u0160')); // latin capital letter S with caron, U+0160 ISOlat2
332
map.put("scaron", new Character JavaDoc('\u0161')); // latin small letter s with caron, U+0161 ISOlat2
333
map.put("Yuml", new Character JavaDoc('\u0178')); // latin capital letter Y with diaeresis, U+0178 ISOlat2
334
// Spacing Modifier Letters
335
map.put("circ", new Character JavaDoc('\u02c6')); // modifier letter circumflex accent, U+02C6 ISOpub
336
map.put("tilde", new Character JavaDoc('\u02dc')); // small tilde, U+02DC ISOdia
337
// General Punctuation
338
map.put("ensp", new Character JavaDoc('\u2002')); // en space, U+2002 ISOpub
339
map.put("emsp", new Character JavaDoc('\u2003')); // em space, U+2003 ISOpub
340
map.put("thinsp", new Character JavaDoc('\u2009')); // thin space, U+2009 ISOpub
341
map.put("zwnj", new Character JavaDoc('\u200c')); // zero width non-joiner, U+200C NEW RFC 2070
342
map.put("zwj", new Character JavaDoc('\u200d')); // zero width joiner, U+200D NEW RFC 2070
343
map.put("lrm", new Character JavaDoc('\u200e')); // left-to-right mark, U+200E NEW RFC 2070
344
map.put("rlm", new Character JavaDoc('\u200f')); // right-to-left mark, U+200F NEW RFC 2070
345
map.put("ndash", new Character JavaDoc('\u2013')); // en dash, U+2013 ISOpub
346
map.put("mdash", new Character JavaDoc('\u2014')); // em dash, U+2014 ISOpub
347
map.put("lsquo", new Character JavaDoc('\u2018')); // left single quotation mark, U+2018 ISOnum
348
map.put("rsquo", new Character JavaDoc('\u2019')); // right single quotation mark, U+2019 ISOnum
349
map.put("sbquo", new Character JavaDoc('\u201a')); // single low-9 quotation mark, U+201A NEW
350
map.put("ldquo", new Character JavaDoc('\u201c')); // left double quotation mark, U+201C ISOnum
351
map.put("rdquo", new Character JavaDoc('\u201d')); // right double quotation mark, U+201D ISOnum
352
map.put("bdquo", new Character JavaDoc('\u201e')); // double low-9 quotation mark, U+201E NEW
353
map.put("dagger", new Character JavaDoc('\u2020')); // dagger, U+2020 ISOpub
354
map.put("Dagger", new Character JavaDoc('\u2021')); // double dagger, U+2021 ISOpub
355
map.put("permil", new Character JavaDoc('\u2030')); // per mille sign, U+2030 ISOtech
356
map.put("lsaquo", new Character JavaDoc('\u2039')); // single left-pointing angle quotation mark, U+2039 ISO proposed
357
// lsaquo is proposed but not yet ISO standardized
358
map.put("rsaquo", new Character JavaDoc('\u203a')); // single right-pointing angle quotation mark, U+203A ISO proposed
359
// rsaquo is proposed but not yet ISO standardized
360
map.put("euro", new Character JavaDoc('\u20ac')); // euro sign, U+20AC NEW
361
}
362     
363
364     /**
365      * Translates an entity to a unicode character.
366      *
367      * @param name the name of the entity
368      * @return the corresponding unicode character
369      */

370     public static char decodeEntity(String JavaDoc name) {
371         if (name.startsWith("#x")) {
372             try {
373                 return (char)Integer.parseInt(name.substring(2),16);
374             }
375             catch(NumberFormatException JavaDoc nfe) {
376                 return '\0';
377             }
378         }
379         if (name.startsWith("#")) {
380             try {
381                 return (char)Integer.parseInt(name.substring(1));
382             }
383             catch(NumberFormatException JavaDoc nfe) {
384                 return '\0';
385             }
386         }
387         Character JavaDoc c = (Character JavaDoc)map.get(name);
388         if (c == null)
389             return '\0';
390         else
391             return c.charValue();
392     }
393     
394     /**
395      * Translates a String with entities (&...;) to a String without entities,
396      * replacing the entity with the right (unicode) character.
397      */

398     public static String JavaDoc decodeString(String JavaDoc s) {
399         int pos_amp = s.indexOf('&');
400         if (pos_amp == -1) return s;
401         
402         int pos_sc;
403         int pos_a;
404         StringBuffer JavaDoc buf = new StringBuffer JavaDoc(s.substring(0, pos_amp));
405         char replace;
406         while (true) {
407             pos_sc = s.indexOf(';', pos_amp);
408             if (pos_sc == -1) {
409                 buf.append(s.substring(pos_amp));
410                 return buf.toString();
411             }
412             pos_a = s.indexOf('&', pos_amp + 1);
413             while (pos_a != -1 && pos_a < pos_sc) {
414                 buf.append(s.substring(pos_amp, pos_a));
415                 pos_amp = pos_a;
416                 pos_a = s.indexOf('&', pos_amp + 1);
417             }
418             replace = decodeEntity(s.substring(pos_amp + 1, pos_sc));
419             if (s.length() < pos_sc + 1) {
420                 return buf.toString();
421             }
422             if (replace == '\0') {
423                 buf.append(s.substring(pos_amp, pos_sc + 1));
424             }
425             else {
426                 buf.append(replace);
427             }
428             pos_amp = s.indexOf('&', pos_sc);
429             if (pos_amp == -1) {
430                 buf.append(s.substring(pos_sc + 1));
431                 return buf.toString();
432             }
433             else {
434                 buf.append(s.substring(pos_sc + 1, pos_amp));
435             }
436         }
437     }
438 }
Popular Tags