KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > au > id > jericho > lib > html > CharacterEntityReference


1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2
// Version 2.2
3
// Copyright (C) 2006 Martin Jericho
4
// http://sourceforge.net/projects/jerichohtml/
5
//
6
// This library is free software; you can redistribute it and/or
7
// modify it under the terms of the GNU Lesser General Public
8
// License as published by the Free Software Foundation; either
9
// version 2.1 of the License, or (at your option) any later version.
10
// http://www.gnu.org/copyleft/lesser.html
11
//
12
// This library is distributed in the hope that it will be useful,
13
// but WITHOUT ANY WARRANTY; without even the implied warranty of
14
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
// Lesser General Public License for more details.
16
//
17
// You should have received a copy of the GNU Lesser General Public
18
// License along with this library; if not, write to the Free Software
19
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20

21 package au.id.jericho.lib.html;
22
23 import java.util.*;
24
25 /**
26  * Represents an HTML <a target="_blank" HREF="http://www.w3.org/TR/REC-html40/charset.html#h-5.3.2">Character Entity Reference</a>.
27  * <p>
28  * <b>Click <a HREF="#method_summary">here</a> to scroll down to the method summary.</b>
29  * <p>
30  * The full list of HTML character entity references can be found at the following URL:<br />
31  * <a target="_blank" HREF="http://www.w3.org/TR/REC-html40/sgml/entities.html">http://www.w3.org/TR/REC-html40/sgml/entities.html</a>.
32  * <p>
33  * There are a total of 253 HTML character entity references, ranging from codepoints U+0022 to U+2666.
34  * <p>
35  * Static methods to {@linkplain #encode(CharSequence) encode} and {@linkplain #decode(CharSequence) decode} strings
36  * and single characters can be found in the {@link CharacterReference} superclass.
37  * <p>
38  * The {@link #_apos &amp;apos;} entity reference is not defined for use in HTML.
39  * It is defined in the <a target="_blank" HREF="http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters">XHTML Special Characters Entity Set</a>,
40  * and is the only one that is not included in both HTML and XHTML.
41  * For this reason, the <code>&amp;apos;</code> entity reference is recognised by this library in decoding functions, but in encoding functions
42  * the numeric character reference <code>&amp;#39;</code> is used instead.
43  * Most modern browsers support it in both XHTML and HTML, with the notable exception
44  * of Microsoft Internet Explorer 6.0, which doesn't support it in either.
45  * <p>
46  * <code>CharacterEntityReference</code> instances are obtained using one of the following methods:
47  * <ul>
48  * <li>{@link CharacterReference#parse(CharSequence characterReferenceText)}
49  * <li>{@link Source#findNextCharacterReference(int pos)}
50  * <li>{@link Source#findPreviousCharacterReference(int pos)}
51  * <li>{@link Segment#findAllCharacterReferences()}
52  * </ul>
53  *
54  * @see CharacterReference
55  * @see NumericCharacterReference
56  */

57 public class CharacterEntityReference extends CharacterReference {
58     private String JavaDoc name;
59
60     /** <samp>&nbsp;</samp> <code>&amp;nbsp; = &amp;#160;</code> -- no-break space = non-breaking space, U+00A0 ISOnum. */
61     public static final char _nbsp='\u00A0';
62     /** <samp>&iexcl;</samp> <code>&amp;iexcl; = &amp;#161;</code> -- inverted exclamation mark, U+00A1 ISOnum. */
63     public static final char _iexcl='\u00A1';
64     /** <samp>&cent;</samp> <code>&amp;cent; = &amp;#162;</code> -- cent sign, U+00A2 ISOnum. */
65     public static final char _cent='\u00A2';
66     /** <samp>&pound;</samp> <code>&amp;pound; = &amp;#163;</code> -- pound sign, U+00A3 ISOnum. */
67     public static final char _pound='\u00A3';
68     /** <samp>&curren;</samp> <code>&amp;curren; = &amp;#164;</code> -- currency sign, U+00A4 ISOnum. */
69     public static final char _curren='\u00A4';
70     /** <samp>&yen;</samp> <code>&amp;yen; = &amp;#165;</code> -- yen sign = yuan sign, U+00A5 ISOnum. */
71     public static final char _yen='\u00A5';
72     /** <samp>&brvbar;</samp> <code>&amp;brvbar; = &amp;#166;</code> -- broken bar = broken vertical bar, U+00A6 ISOnum. */
73     public static final char _brvbar='\u00A6';
74     /** <samp>&sect;</samp> <code>&amp;sect; = &amp;#167;</code> -- section sign, U+00A7 ISOnum. */
75     public static final char _sect='\u00A7';
76     /** <samp>&uml;</samp> <code>&amp;uml; = &amp;#168;</code> -- diaeresis = spacing diaeresis, U+00A8 ISOdia. */
77     public static final char _uml='\u00A8';
78     /** <samp>&copy;</samp> <code>&amp;copy; = &amp;#169;</code> -- copyright sign, U+00A9 ISOnum. */
79     public static final char _copy='\u00A9';
80     /** <samp>&ordf;</samp> <code>&amp;ordf; = &amp;#170;</code> -- feminine ordinal indicator, U+00AA ISOnum. */
81     public static final char _ordf='\u00AA';
82     /** <samp>&laquo;</samp> <code>&amp;laquo; = &amp;#171;</code> -- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum. */
83     public static final char _laquo='\u00AB';
84     /** <samp>&not;</samp> <code>&amp;not; = &amp;#172;</code> -- not sign = angled dash, U+00AC ISOnum. */
85     public static final char _not='\u00AC';
86     /** <samp>&shy;</samp> <code>&amp;shy; = &amp;#173;</code> -- soft hyphen = discretionary hyphen, U+00AD ISOnum. */
87     public static final char _shy='\u00AD';
88     /** <samp>&reg;</samp> <code>&amp;reg; = &amp;#174;</code> -- registered sign = registered trade mark sign, U+00AE ISOnum. */
89     public static final char _reg='\u00AE';
90     /** <samp>&macr;</samp> <code>&amp;macr; = &amp;#175;</code> -- macron = spacing macron = overline = APL overbar, U+00AF ISOdia. */
91     public static final char _macr='\u00AF';
92     /** <samp>&deg;</samp> <code>&amp;deg; = &amp;#176;</code> -- degree sign, U+00B0 ISOnum. */
93     public static final char _deg='\u00B0';
94     /** <samp>&plusmn;</samp> <code>&amp;plusmn; = &amp;#177;</code> -- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum. */
95     public static final char _plusmn='\u00B1';
96     /** <samp>&sup2;</samp> <code>&amp;sup2; = &amp;#178;</code> -- superscript two = superscript digit two = squared, U+00B2 ISOnum. */
97     public static final char _sup2='\u00B2';
98     /** <samp>&sup3;</samp> <code>&amp;sup3; = &amp;#179;</code> -- superscript three = superscript digit three = cubed, U+00B3 ISOnum. */
99     public static final char _sup3='\u00B3';
100     /** <samp>&acute;</samp> <code>&amp;acute; = &amp;#180;</code> -- acute accent = spacing acute, U+00B4 ISOdia. */
101     public static final char _acute='\u00B4';
102     /** <samp>&micro;</samp> <code>&amp;micro; = &amp;#181;</code> -- micro sign, U+00B5 ISOnum. */
103     public static final char _micro='\u00B5';
104     /** <samp>&para;</samp> <code>&amp;para; = &amp;#182;</code> -- pilcrow sign = paragraph sign, U+00B6 ISOnum. */
105     public static final char _para='\u00B6';
106     /** <samp>&middot;</samp> <code>&amp;middot; = &amp;#183;</code> -- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum. */
107     public static final char _middot='\u00B7';
108     /** <samp>&cedil;</samp> <code>&amp;cedil; = &amp;#184;</code> -- cedilla = spacing cedilla, U+00B8 ISOdia. */
109     public static final char _cedil='\u00B8';
110     /** <samp>&sup1;</samp> <code>&amp;sup1; = &amp;#185;</code> -- superscript one = superscript digit one, U+00B9 ISOnum. */
111     public static final char _sup1='\u00B9';
112     /** <samp>&ordm;</samp> <code>&amp;ordm; = &amp;#186;</code> -- masculine ordinal indicator, U+00BA ISOnum. */
113     public static final char _ordm='\u00BA';
114     /** <samp>&raquo;</samp> <code>&amp;raquo; = &amp;#187;</code> -- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum. */
115     public static final char _raquo='\u00BB';
116     /** <samp>&frac14;</samp> <code>&amp;frac14; = &amp;#188;</code> -- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum. */
117     public static final char _frac14='\u00BC';
118     /** <samp>&frac12;</samp> <code>&amp;frac12; = &amp;#189;</code> -- vulgar fraction one half = fraction one half, U+00BD ISOnum. */
119     public static final char _frac12='\u00BD';
120     /** <samp>&frac34;</samp> <code>&amp;frac34; = &amp;#190;</code> -- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum. */
121     public static final char _frac34='\u00BE';
122     /** <samp>&iquest;</samp> <code>&amp;iquest; = &amp;#191;</code> -- inverted question mark = turned question mark, U+00BF ISOnum. */
123     public static final char _iquest='\u00BF';
124     /** <samp>&Agrave;</samp> <code>&amp;Agrave; = &amp;#192;</code> -- latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1. */
125     public static final char _Agrave='\u00C0';
126     /** <samp>&Aacute;</samp> <code>&amp;Aacute; = &amp;#193;</code> -- latin capital letter A with acute, U+00C1 ISOlat1. */
127     public static final char _Aacute='\u00C1';
128     /** <samp>&Acirc;</samp> <code>&amp;Acirc; = &amp;#194;</code> -- latin capital letter A with circumflex, U+00C2 ISOlat1. */
129     public static final char _Acirc='\u00C2';
130     /** <samp>&Atilde;</samp> <code>&amp;Atilde; = &amp;#195;</code> -- latin capital letter A with tilde, U+00C3 ISOlat1. */
131     public static final char _Atilde='\u00C3';
132     /** <samp>&Auml;</samp> <code>&amp;Auml; = &amp;#196;</code> -- latin capital letter A with diaeresis, U+00C4 ISOlat1. */
133     public static final char _Auml='\u00C4';
134     /** <samp>&Aring;</samp> <code>&amp;Aring; = &amp;#197;</code> -- latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1. */
135     public static final char _Aring='\u00C5';
136     /** <samp>&AElig;</samp> <code>&amp;AElig; = &amp;#198;</code> -- latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1. */
137     public static final char _AElig='\u00C6';
138     /** <samp>&Ccedil;</samp> <code>&amp;Ccedil; = &amp;#199;</code> -- latin capital letter C with cedilla, U+00C7 ISOlat1. */
139     public static final char _Ccedil='\u00C7';
140     /** <samp>&Egrave;</samp> <code>&amp;Egrave; = &amp;#200;</code> -- latin capital letter E with grave, U+00C8 ISOlat1. */
141     public static final char _Egrave='\u00C8';
142     /** <samp>&Eacute;</samp> <code>&amp;Eacute; = &amp;#201;</code> -- latin capital letter E with acute, U+00C9 ISOlat1. */
143     public static final char _Eacute='\u00C9';
144     /** <samp>&Ecirc;</samp> <code>&amp;Ecirc; = &amp;#202;</code> -- latin capital letter E with circumflex, U+00CA ISOlat1. */
145     public static final char _Ecirc='\u00CA';
146     /** <samp>&Euml;</samp> <code>&amp;Euml; = &amp;#203;</code> -- latin capital letter E with diaeresis, U+00CB ISOlat1. */
147     public static final char _Euml='\u00CB';
148     /** <samp>&Igrave;</samp> <code>&amp;Igrave; = &amp;#204;</code> -- latin capital letter I with grave, U+00CC ISOlat1. */
149     public static final char _Igrave='\u00CC';
150     /** <samp>&Iacute;</samp> <code>&amp;Iacute; = &amp;#205;</code> -- latin capital letter I with acute, U+00CD ISOlat1. */
151     public static final char _Iacute='\u00CD';
152     /** <samp>&Icirc;</samp> <code>&amp;Icirc; = &amp;#206;</code> -- latin capital letter I with circumflex, U+00CE ISOlat1. */
153     public static final char _Icirc='\u00CE';
154     /** <samp>&Iuml;</samp> <code>&amp;Iuml; = &amp;#207;</code> -- latin capital letter I with diaeresis, U+00CF ISOlat1. */
155     public static final char _Iuml='\u00CF';
156     /** <samp>&ETH;</samp> <code>&amp;ETH; = &amp;#208;</code> -- latin capital letter ETH, U+00D0 ISOlat1. */
157     public static final char _ETH='\u00D0';
158     /** <samp>&Ntilde;</samp> <code>&amp;Ntilde; = &amp;#209;</code> -- latin capital letter N with tilde, U+00D1 ISOlat1. */
159     public static final char _Ntilde='\u00D1';
160     /** <samp>&Ograve;</samp> <code>&amp;Ograve; = &amp;#210;</code> -- latin capital letter O with grave, U+00D2 ISOlat1. */
161     public static final char _Ograve='\u00D2';
162     /** <samp>&Oacute;</samp> <code>&amp;Oacute; = &amp;#211;</code> -- latin capital letter O with acute, U+00D3 ISOlat1. */
163     public static final char _Oacute='\u00D3';
164     /** <samp>&Ocirc;</samp> <code>&amp;Ocirc; = &amp;#212;</code> -- latin capital letter O with circumflex, U+00D4 ISOlat1. */
165     public static final char _Ocirc='\u00D4';
166     /** <samp>&Otilde;</samp> <code>&amp;Otilde; = &amp;#213;</code> -- latin capital letter O with tilde, U+00D5 ISOlat1. */
167     public static final char _Otilde='\u00D5';
168     /** <samp>&Ouml;</samp> <code>&amp;Ouml; = &amp;#214;</code> -- latin capital letter O with diaeresis, U+00D6 ISOlat1. */
169     public static final char _Ouml='\u00D6';
170     /** <samp>&times;</samp> <code>&amp;times; = &amp;#215;</code> -- multiplication sign, U+00D7 ISOnum. */
171     public static final char _times='\u00D7';
172     /** <samp>&Oslash;</samp> <code>&amp;Oslash; = &amp;#216;</code> -- latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1. */
173     public static final char _Oslash='\u00D8';
174     /** <samp>&Ugrave;</samp> <code>&amp;Ugrave; = &amp;#217;</code> -- latin capital letter U with grave, U+00D9 ISOlat1. */
175     public static final char _Ugrave='\u00D9';
176     /** <samp>&Uacute;</samp> <code>&amp;Uacute; = &amp;#218;</code> -- latin capital letter U with acute, U+00DA ISOlat1. */
177     public static final char _Uacute='\u00DA';
178     /** <samp>&Ucirc;</samp> <code>&amp;Ucirc; = &amp;#219;</code> -- latin capital letter U with circumflex, U+00DB ISOlat1. */
179     public static final char _Ucirc='\u00DB';
180     /** <samp>&Uuml;</samp> <code>&amp;Uuml; = &amp;#220;</code> -- latin capital letter U with diaeresis, U+00DC ISOlat1. */
181     public static final char _Uuml='\u00DC';
182     /** <samp>&Yacute;</samp> <code>&amp;Yacute; = &amp;#221;</code> -- latin capital letter Y with acute, U+00DD ISOlat1. */
183     public static final char _Yacute='\u00DD';
184     /** <samp>&THORN;</samp> <code>&amp;THORN; = &amp;#222;</code> -- latin capital letter THORN, U+00DE ISOlat1. */
185     public static final char _THORN='\u00DE';
186     /** <samp>&szlig;</samp> <code>&amp;szlig; = &amp;#223;</code> -- latin small letter sharp s = ess-zed, U+00DF ISOlat1. */
187     public static final char _szlig='\u00DF';
188     /** <samp>&agrave;</samp> <code>&amp;agrave; = &amp;#224;</code> -- latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1. */
189     public static final char _agrave='\u00E0';
190     /** <samp>&aacute;</samp> <code>&amp;aacute; = &amp;#225;</code> -- latin small letter a with acute, U+00E1 ISOlat1. */
191     public static final char _aacute='\u00E1';
192     /** <samp>&acirc;</samp> <code>&amp;acirc; = &amp;#226;</code> -- latin small letter a with circumflex, U+00E2 ISOlat1. */
193     public static final char _acirc='\u00E2';
194     /** <samp>&atilde;</samp> <code>&amp;atilde; = &amp;#227;</code> -- latin small letter a with tilde, U+00E3 ISOlat1. */
195     public static final char _atilde='\u00E3';
196     /** <samp>&auml;</samp> <code>&amp;auml; = &amp;#228;</code> -- latin small letter a with diaeresis, U+00E4 ISOlat1. */
197     public static final char _auml='\u00E4';
198     /** <samp>&aring;</samp> <code>&amp;aring; = &amp;#229;</code> -- latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1. */
199     public static final char _aring='\u00E5';
200     /** <samp>&aelig;</samp> <code>&amp;aelig; = &amp;#230;</code> -- latin small letter ae = latin small ligature ae, U+00E6 ISOlat1. */
201     public static final char _aelig='\u00E6';
202     /** <samp>&ccedil;</samp> <code>&amp;ccedil; = &amp;#231;</code> -- latin small letter c with cedilla, U+00E7 ISOlat1. */
203     public static final char _ccedil='\u00E7';
204     /** <samp>&egrave;</samp> <code>&amp;egrave; = &amp;#232;</code> -- latin small letter e with grave, U+00E8 ISOlat1. */
205     public static final char _egrave='\u00E8';
206     /** <samp>&eacute;</samp> <code>&amp;eacute; = &amp;#233;</code> -- latin small letter e with acute, U+00E9 ISOlat1. */
207     public static final char _eacute='\u00E9';
208     /** <samp>&ecirc;</samp> <code>&amp;ecirc; = &amp;#234;</code> -- latin small letter e with circumflex, U+00EA ISOlat1. */
209     public static final char _ecirc='\u00EA';
210     /** <samp>&euml;</samp> <code>&amp;euml; = &amp;#235;</code> -- latin small letter e with diaeresis, U+00EB ISOlat1. */
211     public static final char _euml='\u00EB';
212     /** <samp>&igrave;</samp> <code>&amp;igrave; = &amp;#236;</code> -- latin small letter i with grave, U+00EC ISOlat1. */
213     public static final char _igrave='\u00EC';
214     /** <samp>&iacute;</samp> <code>&amp;iacute; = &amp;#237;</code> -- latin small letter i with acute, U+00ED ISOlat1. */
215     public static final char _iacute='\u00ED';
216     /** <samp>&icirc;</samp> <code>&amp;icirc; = &amp;#238;</code> -- latin small letter i with circumflex, U+00EE ISOlat1. */
217     public static final char _icirc='\u00EE';
218     /** <samp>&iuml;</samp> <code>&amp;iuml; = &amp;#239;</code> -- latin small letter i with diaeresis, U+00EF ISOlat1. */
219     public static final char _iuml='\u00EF';
220     /** <samp>&eth;</samp> <code>&amp;eth; = &amp;#240;</code> -- latin small letter eth, U+00F0 ISOlat1. */
221     public static final char _eth='\u00F0';
222     /** <samp>&ntilde;</samp> <code>&amp;ntilde; = &amp;#241;</code> -- latin small letter n with tilde, U+00F1 ISOlat1. */
223     public static final char _ntilde='\u00F1';
224     /** <samp>&ograve;</samp> <code>&amp;ograve; = &amp;#242;</code> -- latin small letter o with grave, U+00F2 ISOlat1. */
225     public static final char _ograve='\u00F2';
226     /** <samp>&oacute;</samp> <code>&amp;oacute; = &amp;#243;</code> -- latin small letter o with acute, U+00F3 ISOlat1. */
227     public static final char _oacute='\u00F3';
228     /** <samp>&ocirc;</samp> <code>&amp;ocirc; = &amp;#244;</code> -- latin small letter o with circumflex, U+00F4 ISOlat1. */
229     public static final char _ocirc='\u00F4';
230     /** <samp>&otilde;</samp> <code>&amp;otilde; = &amp;#245;</code> -- latin small letter o with tilde, U+00F5 ISOlat1. */
231     public static final char _otilde='\u00F5';
232     /** <samp>&ouml;</samp> <code>&amp;ouml; = &amp;#246;</code> -- latin small letter o with diaeresis, U+00F6 ISOlat1. */
233     public static final char _ouml='\u00F6';
234     /** <samp>&divide;</samp> <code>&amp;divide; = &amp;#247;</code> -- division sign, U+00F7 ISOnum. */
235     public static final char _divide='\u00F7';
236     /** <samp>&oslash;</samp> <code>&amp;oslash; = &amp;#248;</code> -- latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1. */
237     public static final char _oslash='\u00F8';
238     /** <samp>&ugrave;</samp> <code>&amp;ugrave; = &amp;#249;</code> -- latin small letter u with grave, U+00F9 ISOlat1. */
239     public static final char _ugrave='\u00F9';
240     /** <samp>&uacute;</samp> <code>&amp;uacute; = &amp;#250;</code> -- latin small letter u with acute, U+00FA ISOlat1. */
241     public static final char _uacute='\u00FA';
242     /** <samp>&ucirc;</samp> <code>&amp;ucirc; = &amp;#251;</code> -- latin small letter u with circumflex, U+00FB ISOlat1. */
243     public static final char _ucirc='\u00FB';
244     /** <samp>&uuml;</samp> <code>&amp;uuml; = &amp;#252;</code> -- latin small letter u with diaeresis, U+00FC ISOlat1. */
245     public static final char _uuml='\u00FC';
246     /** <samp>&yacute;</samp> <code>&amp;yacute; = &amp;#253;</code> -- latin small letter y with acute, U+00FD ISOlat1. */
247     public static final char _yacute='\u00FD';
248     /** <samp>&thorn;</samp> <code>&amp;thorn; = &amp;#254;</code> -- latin small letter thorn, U+00FE ISOlat1. */
249     public static final char _thorn='\u00FE';
250     /** <samp>&yuml;</samp> <code>&amp;yuml; = &amp;#255;</code> -- latin small letter y with diaeresis, U+00FF ISOlat1. */
251     public static final char _yuml='\u00FF';
252     /** <samp>&fnof;</samp> <code>&amp;fnof; = &amp;#402;</code> -- latin small letter f with hook = function = florin, U+0192 ISOtech. */
253     public static final char _fnof='\u0192';
254     /** <samp>&Alpha;</samp> <code>&amp;Alpha; = &amp;#913;</code> -- greek capital letter alpha, U+0391. */
255     public static final char _Alpha='\u0391';
256     /** <samp>&Beta;</samp> <code>&amp;Beta; = &amp;#914;</code> -- greek capital letter beta, U+0392. */
257     public static final char _Beta='\u0392';
258     /** <samp>&Gamma;</samp> <code>&amp;Gamma; = &amp;#915;</code> -- greek capital letter gamma, U+0393 ISOgrk3. */
259     public static final char _Gamma='\u0393';
260     /** <samp>&Delta;</samp> <code>&amp;Delta; = &amp;#916;</code> -- greek capital letter delta, U+0394 ISOgrk3. */
261     public static final char _Delta='\u0394';
262     /** <samp>&Epsilon;</samp> <code>&amp;Epsilon; = &amp;#917;</code> -- greek capital letter epsilon, U+0395. */
263     public static final char _Epsilon='\u0395';
264     /** <samp>&Zeta;</samp> <code>&amp;Zeta; = &amp;#918;</code> -- greek capital letter zeta, U+0396. */
265     public static final char _Zeta='\u0396';
266     /** <samp>&Eta;</samp> <code>&amp;Eta; = &amp;#919;</code> -- greek capital letter eta, U+0397. */
267     public static final char _Eta='\u0397';
268     /** <samp>&Theta;</samp> <code>&amp;Theta; = &amp;#920;</code> -- greek capital letter theta, U+0398 ISOgrk3. */
269     public static final char _Theta='\u0398';
270     /** <samp>&Iota;</samp> <code>&amp;Iota; = &amp;#921;</code> -- greek capital letter iota, U+0399. */
271     public static final char _Iota='\u0399';
272     /** <samp>&Kappa;</samp> <code>&amp;Kappa; = &amp;#922;</code> -- greek capital letter kappa, U+039A. */
273     public static final char _Kappa='\u039A';
274     /** <samp>&Lambda;</samp> <code>&amp;Lambda; = &amp;#923;</code> -- greek capital letter lambda, U+039B ISOgrk3. */
275     public static final char _Lambda='\u039B';
276     /** <samp>&Mu;</samp> <code>&amp;Mu; = &amp;#924;</code> -- greek capital letter mu, U+039C. */
277     public static final char _Mu='\u039C';
278     /** <samp>&Nu;</samp> <code>&amp;Nu; = &amp;#925;</code> -- greek capital letter nu, U+039D. */
279     public static final char _Nu='\u039D';
280     /** <samp>&Xi;</samp> <code>&amp;Xi; = &amp;#926;</code> -- greek capital letter xi, U+039E ISOgrk3. */
281     public static final char _Xi='\u039E';
282     /** <samp>&Omicron;</samp> <code>&amp;Omicron; = &amp;#927;</code> -- greek capital letter omicron, U+039F. */
283     public static final char _Omicron='\u039F';
284     /** <samp>&Pi;</samp> <code>&amp;Pi; = &amp;#928;</code> -- greek capital letter pi, U+03A0 ISOgrk3. */
285     public static final char _Pi='\u03A0';
286     /** <samp>&Rho;</samp> <code>&amp;Rho; = &amp;#929;</code> -- greek capital letter rho, U+03A1. */
287     public static final char _Rho='\u03A1';
288     /** <samp>&Sigma;</samp> <code>&amp;Sigma; = &amp;#931;</code> -- greek capital letter sigma, U+03A3 ISOgrk3. */
289     public static final char _Sigma='\u03A3';
290     /** <samp>&Tau;</samp> <code>&amp;Tau; = &amp;#932;</code> -- greek capital letter tau, U+03A4. */
291     public static final char _Tau='\u03A4';
292     /** <samp>&Upsilon;</samp> <code>&amp;Upsilon; = &amp;#933;</code> -- greek capital letter upsilon, U+03A5 ISOgrk3. */
293     public static final char _Upsilon='\u03A5';
294     /** <samp>&Phi;</samp> <code>&amp;Phi; = &amp;#934;</code> -- greek capital letter phi, U+03A6 ISOgrk3. */
295     public static final char _Phi='\u03A6';
296     /** <samp>&Chi;</samp> <code>&amp;Chi; = &amp;#935;</code> -- greek capital letter chi, U+03A7. */
297     public static final char _Chi='\u03A7';
298     /** <samp>&Psi;</samp> <code>&amp;Psi; = &amp;#936;</code> -- greek capital letter psi, U+03A8 ISOgrk3. */
299     public static final char _Psi='\u03A8';
300     /** <samp>&Omega;</samp> <code>&amp;Omega; = &amp;#937;</code> -- greek capital letter omega, U+03A9 ISOgrk3. */
301     public static final char _Omega='\u03A9';
302     /** <samp>&alpha;</samp> <code>&amp;alpha; = &amp;#945;</code> -- greek small letter alpha, U+03B1 ISOgrk3. */
303     public static final char _alpha='\u03B1';
304     /** <samp>&beta;</samp> <code>&amp;beta; = &amp;#946;</code> -- greek small letter beta, U+03B2 ISOgrk3. */
305     public static final char _beta='\u03B2';
306     /** <samp>&gamma;</samp> <code>&amp;gamma; = &amp;#947;</code> -- greek small letter gamma, U+03B3 ISOgrk3. */
307     public static final char _gamma='\u03B3';
308     /** <samp>&delta;</samp> <code>&amp;delta; = &amp;#948;</code> -- greek small letter delta, U+03B4 ISOgrk3. */
309     public static final char _delta='\u03B4';
310     /** <samp>&epsilon;</samp> <code>&amp;epsilon; = &amp;#949;</code> -- greek small letter epsilon, U+03B5 ISOgrk3. */
311     public static final char _epsilon='\u03B5';
312     /** <samp>&zeta;</samp> <code>&amp;zeta; = &amp;#950;</code> -- greek small letter zeta, U+03B6 ISOgrk3. */
313     public static final char _zeta='\u03B6';
314     /** <samp>&eta;</samp> <code>&amp;eta; = &amp;#951;</code> -- greek small letter eta, U+03B7 ISOgrk3. */
315     public static final char _eta='\u03B7';
316     /** <samp>&theta;</samp> <code>&amp;theta; = &amp;#952;</code> -- greek small letter theta, U+03B8 ISOgrk3. */
317     public static final char _theta='\u03B8';
318     /** <samp>&iota;</samp> <code>&amp;iota; = &amp;#953;</code> -- greek small letter iota, U+03B9 ISOgrk3. */
319     public static final char _iota='\u03B9';
320     /** <samp>&kappa;</samp> <code>&amp;kappa; = &amp;#954;</code> -- greek small letter kappa, U+03BA ISOgrk3. */
321     public static final char _kappa='\u03BA';
322     /** <samp>&lambda;</samp> <code>&amp;lambda; = &amp;#955;</code> -- greek small letter lambda, U+03BB ISOgrk3. */
323     public static final char _lambda='\u03BB';
324     /** <samp>&mu;</samp> <code>&amp;mu; = &amp;#956;</code> -- greek small letter mu, U+03BC ISOgrk3. */
325     public static final char _mu='\u03BC';
326     /** <samp>&nu;</samp> <code>&amp;nu; = &amp;#957;</code> -- greek small letter nu, U+03BD ISOgrk3. */
327     public static final char _nu='\u03BD';
328     /** <samp>&xi;</samp> <code>&amp;xi; = &amp;#958;</code> -- greek small letter xi, U+03BE ISOgrk3. */
329     public static final char _xi='\u03BE';
330     /** <samp>&omicron;</samp> <code>&amp;omicron; = &amp;#959;</code> -- greek small letter omicron, U+03BF NEW. */
331     public static final char _omicron='\u03BF';
332     /** <samp>&pi;</samp> <code>&amp;pi; = &amp;#960;</code> -- greek small letter pi, U+03C0 ISOgrk3. */
333     public static final char _pi='\u03C0';
334     /** <samp>&rho;</samp> <code>&amp;rho; = &amp;#961;</code> -- greek small letter rho, U+03C1 ISOgrk3. */
335     public static final char _rho='\u03C1';
336     /** <samp>&sigmaf;</samp> <code>&amp;sigmaf; = &amp;#962;</code> -- greek small letter final sigma, U+03C2 ISOgrk3. */
337     public static final char _sigmaf='\u03C2';
338     /** <samp>&sigma;</samp> <code>&amp;sigma; = &amp;#963;</code> -- greek small letter sigma, U+03C3 ISOgrk3. */
339     public static final char _sigma='\u03C3';
340     /** <samp>&tau;</samp> <code>&amp;tau; = &amp;#964;</code> -- greek small letter tau, U+03C4 ISOgrk3. */
341     public static final char _tau='\u03C4';
342     /** <samp>&upsilon;</samp> <code>&amp;upsilon; = &amp;#965;</code> -- greek small letter upsilon, U+03C5 ISOgrk3. */
343     public static final char _upsilon='\u03C5';
344     /** <samp>&phi;</samp> <code>&amp;phi; = &amp;#966;</code> -- greek small letter phi, U+03C6 ISOgrk3. */
345     public static final char _phi='\u03C6';
346     /** <samp>&chi;</samp> <code>&amp;chi; = &amp;#967;</code> -- greek small letter chi, U+03C7 ISOgrk3. */
347     public static final char _chi='\u03C7';
348     /** <samp>&psi;</samp> <code>&amp;psi; = &amp;#968;</code> -- greek small letter psi, U+03C8 ISOgrk3. */
349     public static final char _psi='\u03C8';
350     /** <samp>&omega;</samp> <code>&amp;omega; = &amp;#969;</code> -- greek small letter omega, U+03C9 ISOgrk3. */
351     public static final char _omega='\u03C9';
352     /** <samp>&thetasym;</samp> <code>&amp;thetasym; = &amp;#977;</code> -- greek small letter theta symbol, U+03D1 NEW. */
353     public static final char _thetasym='\u03D1';
354     /** <samp>&upsih;</samp> <code>&amp;upsih; = &amp;#978;</code> -- greek upsilon with hook symbol, U+03D2 NEW. */
355     public static final char _upsih='\u03D2';
356     /** <samp>&piv;</samp> <code>&amp;piv; = &amp;#982;</code> -- greek pi symbol, U+03D6 ISOgrk3. */
357     public static final char _piv='\u03D6';
358     /** <samp>&bull;</samp> <code>&amp;bull; = &amp;#8226;</code> -- bullet = black small circle, U+2022 ISOpub<br />(see <a HREF="#_bull">comments</a>).<p>bullet is NOT the same as bullet operator, U+2219</p> */
359     public static final char _bull='\u2022';
360     /** <samp>&hellip;</samp> <code>&amp;hellip; = &amp;#8230;</code> -- horizontal ellipsis = three dot leader, U+2026 ISOpub. */
361     public static final char _hellip='\u2026';
362     /** <samp>&prime;</samp> <code>&amp;prime; = &amp;#8242;</code> -- prime = minutes = feet, U+2032 ISOtech. */
363     public static final char _prime='\u2032';
364     /** <samp>&Prime;</samp> <code>&amp;Prime; = &amp;#8243;</code> -- double prime = seconds = inches, U+2033 ISOtech. */
365     public static final char _Prime='\u2033';
366     /** <samp>&oline;</samp> <code>&amp;oline; = &amp;#8254;</code> -- overline = spacing overscore, U+203E NEW. */
367     public static final char _oline='\u203E';
368     /** <samp>&frasl;</samp> <code>&amp;frasl; = &amp;#8260;</code> -- fraction slash, U+2044 NEW. */
369     public static final char _frasl='\u2044';
370     /** <samp>&weierp;</samp> <code>&amp;weierp; = &amp;#8472;</code> -- script capital P = power set = Weierstrass p, U+2118 ISOamso. */
371     public static final char _weierp='\u2118';
372     /** <samp>&image;</samp> <code>&amp;image; = &amp;#8465;</code> -- black-letter capital I = imaginary part, U+2111 ISOamso. */
373     public static final char _image='\u2111';
374     /** <samp>&real;</samp> <code>&amp;real; = &amp;#8476;</code> -- black-letter capital R = real part symbol, U+211C ISOamso. */
375     public static final char _real='\u211C';
376     /** <samp>&trade;</samp> <code>&amp;trade; = &amp;#8482;</code> -- trade mark sign, U+2122 ISOnum. */
377     public static final char _trade='\u2122';
378     /** <samp>&alefsym;</samp> <code>&amp;alefsym; = &amp;#8501;</code> -- alef symbol = first transfinite cardinal, U+2135 NEW<br />(see <a HREF="#_alefsym">comments</a>).<p>alef symbol is NOT the same as hebrew letter alef, U+05D0 although the same glyph could be used to depict both characters</p> */
379     public static final char _alefsym='\u2135';
380     /** <samp>&larr;</samp> <code>&amp;larr; = &amp;#8592;</code> -- leftwards arrow, U+2190 ISOnum. */
381     public static final char _larr='\u2190';
382     /** <samp>&uarr;</samp> <code>&amp;uarr; = &amp;#8593;</code> -- upwards arrow, U+2191 ISOnum. */
383     public static final char _uarr='\u2191';
384     /** <samp>&rarr;</samp> <code>&amp;rarr; = &amp;#8594;</code> -- rightwards arrow, U+2192 ISOnum. */
385     public static final char _rarr='\u2192';
386     /** <samp>&darr;</samp> <code>&amp;darr; = &amp;#8595;</code> -- downwards arrow, U+2193 ISOnum. */
387     public static final char _darr='\u2193';
388     /** <samp>&harr;</samp> <code>&amp;harr; = &amp;#8596;</code> -- left right arrow, U+2194 ISOamsa. */
389     public static final char _harr='\u2194';
390     /** <samp>&crarr;</samp> <code>&amp;crarr; = &amp;#8629;</code> -- downwards arrow with corner leftwards = carriage return, U+21B5 NEW. */
391     public static final char _crarr='\u21B5';
392     /** <samp>&lArr;</samp> <code>&amp;lArr; = &amp;#8656;</code> -- leftwards double arrow, U+21D0 ISOtech<br />(see <a HREF="#_lArr">comments</a>).<p>ISO 10646 does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So &#63; lArr can be used for 'is implied by' as ISOtech suggests</p> */
393     public static final char _lArr='\u21D0';
394     /** <samp>&uArr;</samp> <code>&amp;uArr; = &amp;#8657;</code> -- upwards double arrow, U+21D1 ISOamsa. */
395     public static final char _uArr='\u21D1';
396     /** <samp>&rArr;</samp> <code>&amp;rArr; = &amp;#8658;</code> -- rightwards double arrow, U+21D2 ISOtech<br />(see <a HREF="#_rArr">comments</a>).<p>ISO 10646 does not say this is the 'implies' character but does not have another character with this function so &#63; rArr can be used for 'implies' as ISOtech suggests</p> */
397     public static final char _rArr='\u21D2';
398     /** <samp>&dArr;</samp> <code>&amp;dArr; = &amp;#8659;</code> -- downwards double arrow, U+21D3 ISOamsa. */
399     public static final char _dArr='\u21D3';
400     /** <samp>&hArr;</samp> <code>&amp;hArr; = &amp;#8660;</code> -- left right double arrow, U+21D4 ISOamsa. */
401     public static final char _hArr='\u21D4';
402     /** <samp>&forall;</samp> <code>&amp;forall; = &amp;#8704;</code> -- for all, U+2200 ISOtech. */
403     public static final char _forall='\u2200';
404     /** <samp>&part;</samp> <code>&amp;part; = &amp;#8706;</code> -- partial differential, U+2202 ISOtech. */
405     public static final char _part='\u2202';
406     /** <samp>&exist;</samp> <code>&amp;exist; = &amp;#8707;</code> -- there exists, U+2203 ISOtech. */
407     public static final char _exist='\u2203';
408     /** <samp>&empty;</samp> <code>&amp;empty; = &amp;#8709;</code> -- empty set = null set = diameter, U+2205 ISOamso. */
409     public static final char _empty='\u2205';
410     /** <samp>&nabla;</samp> <code>&amp;nabla; = &amp;#8711;</code> -- nabla = backward difference, U+2207 ISOtech. */
411     public static final char _nabla='\u2207';
412     /** <samp>&isin;</samp> <code>&amp;isin; = &amp;#8712;</code> -- element of, U+2208 ISOtech. */
413     public static final char _isin='\u2208';
414     /** <samp>&notin;</samp> <code>&amp;notin; = &amp;#8713;</code> -- not an element of, U+2209 ISOtech. */
415     public static final char _notin='\u2209';
416     /** <samp>&ni;</samp> <code>&amp;ni; = &amp;#8715;</code> -- contains as member, U+220B ISOtech<br />(see <a HREF="#_ni">comments</a>).<p>should there be a more memorable name than 'ni'&#63;</p> */
417     public static final char _ni='\u220B';
418     /** <samp>&prod;</samp> <code>&amp;prod; = &amp;#8719;</code> -- n-ary product = product sign, U+220F ISOamsb<br />(see <a HREF="#_prod">comments</a>).<p>prod is NOT the same character as U+03A0 'greek capital letter pi' though the same glyph might be used for both</p> */
419     public static final char _prod='\u220F';
420     /** <samp>&sum;</samp> <code>&amp;sum; = &amp;#8721;</code> -- n-ary summation, U+2211 ISOamsb<br />(see <a HREF="#_sum">comments</a>).<p>sum is NOT the same character as U+03A3 'greek capital letter sigma' though the same glyph might be used for both</p> */
421     public static final char _sum='\u2211';
422     /** <samp>&minus;</samp> <code>&amp;minus; = &amp;#8722;</code> -- minus sign, U+2212 ISOtech. */
423     public static final char _minus='\u2212';
424     /** <samp>&lowast;</samp> <code>&amp;lowast; = &amp;#8727;</code> -- asterisk operator, U+2217 ISOtech. */
425     public static final char _lowast='\u2217';
426     /** <samp>&radic;</samp> <code>&amp;radic; = &amp;#8730;</code> -- square root = radical sign, U+221A ISOtech. */
427     public static final char _radic='\u221A';
428     /** <samp>&prop;</samp> <code>&amp;prop; = &amp;#8733;</code> -- proportional to, U+221D ISOtech. */
429     public static final char _prop='\u221D';
430     /** <samp>&infin;</samp> <code>&amp;infin; = &amp;#8734;</code> -- infinity, U+221E ISOtech. */
431     public static final char _infin='\u221E';
432     /** <samp>&ang;</samp> <code>&amp;ang; = &amp;#8736;</code> -- angle, U+2220 ISOamso. */
433     public static final char _ang='\u2220';
434     /** <samp>&and;</samp> <code>&amp;and; = &amp;#8743;</code> -- logical and = wedge, U+2227 ISOtech. */
435     public static final char _and='\u2227';
436     /** <samp>&or;</samp> <code>&amp;or; = &amp;#8744;</code> -- logical or = vee, U+2228 ISOtech. */
437     public static final char _or='\u2228';
438     /** <samp>&cap;</samp> <code>&amp;cap; = &amp;#8745;</code> -- intersection = cap, U+2229 ISOtech. */
439     public static final char _cap='\u2229';
440     /** <samp>&cup;</samp> <code>&amp;cup; = &amp;#8746;</code> -- union = cup, U+222A ISOtech. */
441     public static final char _cup='\u222A';
442     /** <samp>&int;</samp> <code>&amp;int; = &amp;#8747;</code> -- integral, U+222B ISOtech. */
443     public static final char _int='\u222B';
444     /** <samp>&there4;</samp> <code>&amp;there4; = &amp;#8756;</code> -- therefore, U+2234 ISOtech. */
445     public static final char _there4='\u2234';
446     /** <samp>&sim;</samp> <code>&amp;sim; = &amp;#8764;</code> -- tilde operator = varies with = similar to, U+223C ISOtech<br />(see <a HREF="#_sim">comments</a>).<p>tilde operator is NOT the same character as the tilde, U+007E, although the same glyph might be used to represent both</p> */
447     public static final char _sim='\u223C';
448     /** <samp>&cong;</samp> <code>&amp;cong; = &amp;#8773;</code> -- approximately equal to, U+2245 ISOtech. */
449     public static final char _cong='\u2245';
450     /** <samp>&asymp;</samp> <code>&amp;asymp; = &amp;#8776;</code> -- almost equal to = asymptotic to, U+2248 ISOamsr. */
451     public static final char _asymp='\u2248';
452     /** <samp>&ne;</samp> <code>&amp;ne; = &amp;#8800;</code> -- not equal to, U+2260 ISOtech. */
453     public static final char _ne='\u2260';
454     /** <samp>&equiv;</samp> <code>&amp;equiv; = &amp;#8801;</code> -- identical to, U+2261 ISOtech. */
455     public static final char _equiv='\u2261';
456     /** <samp>&le;</samp> <code>&amp;le; = &amp;#8804;</code> -- less-than or equal to, U+2264 ISOtech. */
457     public static final char _le='\u2264';
458     /** <samp>&ge;</samp> <code>&amp;ge; = &amp;#8805;</code> -- greater-than or equal to, U+2265 ISOtech. */
459     public static final char _ge='\u2265';
460     /** <samp>&sub;</samp> <code>&amp;sub; = &amp;#8834;</code> -- subset of, U+2282 ISOtech. */
461     public static final char _sub='\u2282';
462     /** <samp>&sup;</samp> <code>&amp;sup; = &amp;#8835;</code> -- superset of, U+2283 ISOtech<br />(see <a HREF="#_sup">comments</a>).<p>note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font encoding and is not included. Should it be, for symmetry&#63; It is in ISOamsn</p> */
463     public static final char _sup='\u2283';
464     /** <samp>&nsub;</samp> <code>&amp;nsub; = &amp;#8836;</code> -- not a subset of, U+2284 ISOamsn. */
465     public static final char _nsub='\u2284';
466     /** <samp>&sube;</samp> <code>&amp;sube; = &amp;#8838;</code> -- subset of or equal to, U+2286 ISOtech. */
467     public static final char _sube='\u2286';
468     /** <samp>&supe;</samp> <code>&amp;supe; = &amp;#8839;</code> -- superset of or equal to, U+2287 ISOtech. */
469     public static final char _supe='\u2287';
470     /** <samp>&oplus;</samp> <code>&amp;oplus; = &amp;#8853;</code> -- circled plus = direct sum, U+2295 ISOamsb. */
471     public static final char _oplus='\u2295';
472     /** <samp>&otimes;</samp> <code>&amp;otimes; = &amp;#8855;</code> -- circled times = vector product, U+2297 ISOamsb. */
473     public static final char _otimes='\u2297';
474     /** <samp>&perp;</samp> <code>&amp;perp; = &amp;#8869;</code> -- up tack = orthogonal to = perpendicular, U+22A5 ISOtech. */
475     public static final char _perp='\u22A5';
476     /** <samp>&sdot;</samp> <code>&amp;sdot; = &amp;#8901;</code> -- dot operator, U+22C5 ISOamsb<br />(see <a HREF="#_sdot">comments</a>).<p>dot operator is NOT the same character as U+00B7 middle dot</p> */
477     public static final char _sdot='\u22C5';
478     /** <samp>&lceil;</samp> <code>&amp;lceil; = &amp;#8968;</code> -- left ceiling = APL upstile, U+2308 ISOamsc. */
479     public static final char _lceil='\u2308';
480     /** <samp>&rceil;</samp> <code>&amp;rceil; = &amp;#8969;</code> -- right ceiling, U+2309 ISOamsc. */
481     public static final char _rceil='\u2309';
482     /** <samp>&lfloor;</samp> <code>&amp;lfloor; = &amp;#8970;</code> -- left floor = APL downstile, U+230A ISOamsc. */
483     public static final char _lfloor='\u230A';
484     /** <samp>&rfloor;</samp> <code>&amp;rfloor; = &amp;#8971;</code> -- right floor, U+230B ISOamsc. */
485     public static final char _rfloor='\u230B';
486     /** <samp>&lang;</samp> <code>&amp;lang; = &amp;#9001;</code> -- left-pointing angle bracket = bra, U+2329 ISOtech<br />(see <a HREF="#_lang">comments</a>).<p>lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark'</p> */
487     public static final char _lang='\u2329';
488     /** <samp>&rang;</samp> <code>&amp;rang; = &amp;#9002;</code> -- right-pointing angle bracket = ket, U+232A ISOtech<br />(see <a HREF="#_rang">comments</a>).<p>rang is NOT the same character as U+003E 'greater than' or U+203A 'single right-pointing angle quotation mark'</p> */
489     public static final char _rang='\u232A';
490     /** <samp>&loz;</samp> <code>&amp;loz; = &amp;#9674;</code> -- lozenge, U+25CA ISOpub. */
491     public static final char _loz='\u25CA';
492     /** <samp>&spades;</samp> <code>&amp;spades; = &amp;#9824;</code> -- black spade suit, U+2660 ISOpub<br />(see <a HREF="#_spades">comments</a>).<p>black here seems to mean filled as opposed to hollow</p> */
493     public static final char _spades='\u2660';
494     /** <samp>&clubs;</samp> <code>&amp;clubs; = &amp;#9827;</code> -- black club suit = shamrock, U+2663 ISOpub. */
495     public static final char _clubs='\u2663';
496     /** <samp>&hearts;</samp> <code>&amp;hearts; = &amp;#9829;</code> -- black heart suit = valentine, U+2665 ISOpub. */
497     public static final char _hearts='\u2665';
498     /** <samp>&diams;</samp> <code>&amp;diams; = &amp;#9830;</code> -- black diamond suit, U+2666 ISOpub. */
499     public static final char _diams='\u2666';
500     /** <samp>&quot;</samp> <code>&amp;quot; = &amp;#34;</code> -- quotation mark = APL quote, U+0022 ISOnum. */
501     public static final char _quot='\u0022';
502     /** <samp>&amp;</samp> <code>&amp;amp; = &amp;#38;</code> -- ampersand, U+0026 ISOnum. */
503     public static final char _amp='\u0026';
504     /** <samp>&lt;</samp> <code>&amp;lt; = &amp;#60;</code> -- less-than sign, U+003C ISOnum. */
505     public static final char _lt='\u003C';
506     /** <samp>&gt;</samp> <code>&amp;gt; = &amp;#62;</code> -- greater-than sign, U+003E ISOnum. */
507     public static final char _gt='\u003E';
508     /** <samp>&OElig;</samp> <code>&amp;OElig; = &amp;#338;</code> -- latin capital ligature OE, U+0152 ISOlat2. */
509     public static final char _OElig='\u0152';
510     /** <samp>&oelig;</samp> <code>&amp;oelig; = &amp;#339;</code> -- latin small ligature oe, U+0153 ISOlat2<br />(see <a HREF="#_oelig">comments</a>).<p>ligature is a misnomer, this is a separate character in some languages</p> */
511     public static final char _oelig='\u0153';
512     /** <samp>&Scaron;</samp> <code>&amp;Scaron; = &amp;#352;</code> -- latin capital letter S with caron, U+0160 ISOlat2. */
513     public static final char _Scaron='\u0160';
514     /** <samp>&scaron;</samp> <code>&amp;scaron; = &amp;#353;</code> -- latin small letter s with caron, U+0161 ISOlat2. */
515     public static final char _scaron='\u0161';
516     /** <samp>&Yuml;</samp> <code>&amp;Yuml; = &amp;#376;</code> -- latin capital letter Y with diaeresis, U+0178 ISOlat2. */
517     public static final char _Yuml='\u0178';
518     /** <samp>&circ;</samp> <code>&amp;circ; = &amp;#710;</code> -- modifier letter circumflex accent, U+02C6 ISOpub. */
519     public static final char _circ='\u02C6';
520     /** <samp>&tilde;</samp> <code>&amp;tilde; = &amp;#732;</code> -- small tilde, U+02DC ISOdia. */
521     public static final char _tilde='\u02DC';
522     /** <samp>&ensp;</samp> <code>&amp;ensp; = &amp;#8194;</code> -- en space, U+2002 ISOpub. */
523     public static final char _ensp='\u2002';
524     /** <samp>&emsp;</samp> <code>&amp;emsp; = &amp;#8195;</code> -- em space, U+2003 ISOpub. */
525     public static final char _emsp='\u2003';
526     /** <samp>&thinsp;</samp> <code>&amp;thinsp; = &amp;#8201;</code> -- thin space, U+2009 ISOpub. */
527     public static final char _thinsp='\u2009';
528     /** <samp>&zwnj;</samp> <code>&amp;zwnj; = &amp;#8204;</code> -- zero width non-joiner, U+200C NEW RFC 2070. */
529     public static final char _zwnj='\u200C';
530     /** <samp>&zwj;</samp> <code>&amp;zwj; = &amp;#8205;</code> -- zero width joiner, U+200D NEW RFC 2070. */
531     public static final char _zwj='\u200D';
532     /** <samp>&lrm;</samp> <code>&amp;lrm; = &amp;#8206;</code> -- left-to-right mark, U+200E NEW RFC 2070. */
533     public static final char _lrm='\u200E';
534     /** <samp>&rlm;</samp> <code>&amp;rlm; = &amp;#8207;</code> -- right-to-left mark, U+200F NEW RFC 2070. */
535     public static final char _rlm='\u200F';
536     /** <samp>&ndash;</samp> <code>&amp;ndash; = &amp;#8211;</code> -- en dash, U+2013 ISOpub. */
537     public static final char _ndash='\u2013';
538     /** <samp>&mdash;</samp> <code>&amp;mdash; = &amp;#8212;</code> -- em dash, U+2014 ISOpub. */
539     public static final char _mdash='\u2014';
540     /** <samp>&lsquo;</samp> <code>&amp;lsquo; = &amp;#8216;</code> -- left single quotation mark, U+2018 ISOnum. */
541     public static final char _lsquo='\u2018';
542     /** <samp>&rsquo;</samp> <code>&amp;rsquo; = &amp;#8217;</code> -- right single quotation mark, U+2019 ISOnum. */
543     public static final char _rsquo='\u2019';
544     /** <samp>&sbquo;</samp> <code>&amp;sbquo; = &amp;#8218;</code> -- single low-9 quotation mark, U+201A NEW. */
545     public static final char _sbquo='\u201A';
546     /** <samp>&ldquo;</samp> <code>&amp;ldquo; = &amp;#8220;</code> -- left double quotation mark, U+201C ISOnum. */
547     public static final char _ldquo='\u201C';
548     /** <samp>&rdquo;</samp> <code>&amp;rdquo; = &amp;#8221;</code> -- right double quotation mark, U+201D ISOnum. */
549     public static final char _rdquo='\u201D';
550     /** <samp>&bdquo;</samp> <code>&amp;bdquo; = &amp;#8222;</code> -- double low-9 quotation mark, U+201E NEW. */
551     public static final char _bdquo='\u201E';
552     /** <samp>&dagger;</samp> <code>&amp;dagger; = &amp;#8224;</code> -- dagger, U+2020 ISOpub. */
553     public static final char _dagger='\u2020';
554     /** <samp>&Dagger;</samp> <code>&amp;Dagger; = &amp;#8225;</code> -- double dagger, U+2021 ISOpub. */
555     public static final char _Dagger='\u2021';
556     /** <samp>&permil;</samp> <code>&amp;permil; = &amp;#8240;</code> -- per mille sign, U+2030 ISOtech. */
557     public static final char _permil='\u2030';
558     /** <samp>&lsaquo;</samp> <code>&amp;lsaquo; = &amp;#8249;</code> -- single left-pointing angle quotation mark, U+2039 ISO proposed<br />(see <a HREF="#_lsaquo">comments</a>).<p>lsaquo is proposed but not yet ISO standardized</p> */
559     public static final char _lsaquo='\u2039';
560     /** <samp>&rsaquo;</samp> <code>&amp;rsaquo; = &amp;#8250;</code> -- single right-pointing angle quotation mark, U+203A ISO proposed<br />(see <a HREF="#_rsaquo">comments</a>).<p>rsaquo is proposed but not yet ISO standardized</p> */
561     public static final char _rsaquo='\u203A';
562     /** <samp>&euro;</samp> <code>&amp;euro; = &amp;#8364;</code> -- euro sign, U+20AC NEW. */
563     public static final char _euro='\u20AC';
564     /**
565      * <samp>&apos;</samp> <code>&amp;apos; = &amp;#39;</code> -- apostrophe = APL quote, U+0027 ISOnum<br />(see <a HREF="#_apos">comments</a>).<p>
566      * apos is only defined for use in XHTML
567      * (see the <a target="_blank" HREF="http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters">XHTML Special Characters Entity Set</a>),
568      * but not in HTML.
569      * @see Config#IsApostropheEncoded
570      */

571     public static final char _apos='\'';
572
573     private static Map NAME_TO_CODE_POINT_MAP=new HashMap(512,1.0F); // 253 entities in total
574
private static IntStringHashMap CODE_POINT_TO_NAME_MAP;
575
576     private static int MAX_NAME_LENGTH=0;
577
578     static {
579         NAME_TO_CODE_POINT_MAP.put("nbsp",new Integer JavaDoc(_nbsp));
580         NAME_TO_CODE_POINT_MAP.put("iexcl",new Integer JavaDoc(_iexcl));
581         NAME_TO_CODE_POINT_MAP.put("cent",new Integer JavaDoc(_cent));
582         NAME_TO_CODE_POINT_MAP.put("pound",new Integer JavaDoc(_pound));
583         NAME_TO_CODE_POINT_MAP.put("curren",new Integer JavaDoc(_curren));
584         NAME_TO_CODE_POINT_MAP.put("yen",new Integer JavaDoc(_yen));
585         NAME_TO_CODE_POINT_MAP.put("brvbar",new Integer JavaDoc(_brvbar));
586         NAME_TO_CODE_POINT_MAP.put("sect",new Integer JavaDoc(_sect));
587         NAME_TO_CODE_POINT_MAP.put("uml",new Integer JavaDoc(_uml));
588         NAME_TO_CODE_POINT_MAP.put("copy",new Integer JavaDoc(_copy));
589         NAME_TO_CODE_POINT_MAP.put("ordf",new Integer JavaDoc(_ordf));
590         NAME_TO_CODE_POINT_MAP.put("laquo",new Integer JavaDoc(_laquo));
591         NAME_TO_CODE_POINT_MAP.put("not",new Integer JavaDoc(_not));
592         NAME_TO_CODE_POINT_MAP.put("shy",new Integer JavaDoc(_shy));
593         NAME_TO_CODE_POINT_MAP.put("reg",new Integer JavaDoc(_reg));
594         NAME_TO_CODE_POINT_MAP.put("macr",new Integer JavaDoc(_macr));
595         NAME_TO_CODE_POINT_MAP.put("deg",new Integer JavaDoc(_deg));
596         NAME_TO_CODE_POINT_MAP.put("plusmn",new Integer JavaDoc(_plusmn));
597         NAME_TO_CODE_POINT_MAP.put("sup2",new Integer JavaDoc(_sup2));
598         NAME_TO_CODE_POINT_MAP.put("sup3",new Integer JavaDoc(_sup3));
599         NAME_TO_CODE_POINT_MAP.put("acute",new Integer JavaDoc(_acute));
600         NAME_TO_CODE_POINT_MAP.put("micro",new Integer JavaDoc(_micro));
601         NAME_TO_CODE_POINT_MAP.put("para",new Integer JavaDoc(_para));
602         NAME_TO_CODE_POINT_MAP.put("middot",new Integer JavaDoc(_middot));
603         NAME_TO_CODE_POINT_MAP.put("cedil",new Integer JavaDoc(_cedil));
604         NAME_TO_CODE_POINT_MAP.put("sup1",new Integer JavaDoc(_sup1));
605         NAME_TO_CODE_POINT_MAP.put("ordm",new Integer JavaDoc(_ordm));
606         NAME_TO_CODE_POINT_MAP.put("raquo",new Integer JavaDoc(_raquo));
607         NAME_TO_CODE_POINT_MAP.put("frac14",new Integer JavaDoc(_frac14));
608         NAME_TO_CODE_POINT_MAP.put("frac12",new Integer JavaDoc(_frac12));
609         NAME_TO_CODE_POINT_MAP.put("frac34",new Integer JavaDoc(_frac34));
610         NAME_TO_CODE_POINT_MAP.put("iquest",new Integer JavaDoc(_iquest));
611         NAME_TO_CODE_POINT_MAP.put("Agrave",new Integer JavaDoc(_Agrave));
612         NAME_TO_CODE_POINT_MAP.put("Aacute",new Integer JavaDoc(_Aacute));
613         NAME_TO_CODE_POINT_MAP.put("Acirc",new Integer JavaDoc(_Acirc));
614         NAME_TO_CODE_POINT_MAP.put("Atilde",new Integer JavaDoc(_Atilde));
615         NAME_TO_CODE_POINT_MAP.put("Auml",new Integer JavaDoc(_Auml));
616         NAME_TO_CODE_POINT_MAP.put("Aring",new Integer JavaDoc(_Aring));
617         NAME_TO_CODE_POINT_MAP.put("AElig",new Integer JavaDoc(_AElig));
618         NAME_TO_CODE_POINT_MAP.put("Ccedil",new Integer JavaDoc(_Ccedil));
619         NAME_TO_CODE_POINT_MAP.put("Egrave",new Integer JavaDoc(_Egrave));
620         NAME_TO_CODE_POINT_MAP.put("Eacute",new Integer JavaDoc(_Eacute));
621         NAME_TO_CODE_POINT_MAP.put("Ecirc",new Integer JavaDoc(_Ecirc));
622         NAME_TO_CODE_POINT_MAP.put("Euml",new Integer JavaDoc(_Euml));
623         NAME_TO_CODE_POINT_MAP.put("Igrave",new Integer JavaDoc(_Igrave));
624         NAME_TO_CODE_POINT_MAP.put("Iacute",new Integer JavaDoc(_Iacute));
625         NAME_TO_CODE_POINT_MAP.put("Icirc",new Integer JavaDoc(_Icirc));
626         NAME_TO_CODE_POINT_MAP.put("Iuml",new Integer JavaDoc(_Iuml));
627         NAME_TO_CODE_POINT_MAP.put("ETH",new Integer JavaDoc(_ETH));
628         NAME_TO_CODE_POINT_MAP.put("Ntilde",new Integer JavaDoc(_Ntilde));
629         NAME_TO_CODE_POINT_MAP.put("Ograve",new Integer JavaDoc(_Ograve));
630         NAME_TO_CODE_POINT_MAP.put("Oacute",new Integer JavaDoc(_Oacute));
631         NAME_TO_CODE_POINT_MAP.put("Ocirc",new Integer JavaDoc(_Ocirc));
632         NAME_TO_CODE_POINT_MAP.put("Otilde",new Integer JavaDoc(_Otilde));
633         NAME_TO_CODE_POINT_MAP.put("Ouml",new Integer JavaDoc(_Ouml));
634         NAME_TO_CODE_POINT_MAP.put("times",new Integer JavaDoc(_times));
635         NAME_TO_CODE_POINT_MAP.put("Oslash",new Integer JavaDoc(_Oslash));
636         NAME_TO_CODE_POINT_MAP.put("Ugrave",new Integer JavaDoc(_Ugrave));
637         NAME_TO_CODE_POINT_MAP.put("Uacute",new Integer JavaDoc(_Uacute));
638         NAME_TO_CODE_POINT_MAP.put("Ucirc",new Integer JavaDoc(_Ucirc));
639         NAME_TO_CODE_POINT_MAP.put("Uuml",new Integer JavaDoc(_Uuml));
640         NAME_TO_CODE_POINT_MAP.put("Yacute",new Integer JavaDoc(_Yacute));
641         NAME_TO_CODE_POINT_MAP.put("THORN",new Integer JavaDoc(_THORN));
642         NAME_TO_CODE_POINT_MAP.put("szlig",new Integer JavaDoc(_szlig));
643         NAME_TO_CODE_POINT_MAP.put("agrave",new Integer JavaDoc(_agrave));
644         NAME_TO_CODE_POINT_MAP.put("aacute",new Integer JavaDoc(_aacute));
645         NAME_TO_CODE_POINT_MAP.put("acirc",new Integer JavaDoc(_acirc));
646         NAME_TO_CODE_POINT_MAP.put("atilde",new Integer JavaDoc(_atilde));
647         NAME_TO_CODE_POINT_MAP.put("auml",new Integer JavaDoc(_auml));
648         NAME_TO_CODE_POINT_MAP.put("aring",new Integer JavaDoc(_aring));
649         NAME_TO_CODE_POINT_MAP.put("aelig",new Integer JavaDoc(_aelig));
650         NAME_TO_CODE_POINT_MAP.put("ccedil",new Integer JavaDoc(_ccedil));
651         NAME_TO_CODE_POINT_MAP.put("egrave",new Integer JavaDoc(_egrave));
652         NAME_TO_CODE_POINT_MAP.put("eacute",new Integer JavaDoc(_eacute));
653         NAME_TO_CODE_POINT_MAP.put("ecirc",new Integer JavaDoc(_ecirc));
654         NAME_TO_CODE_POINT_MAP.put("euml",new Integer JavaDoc(_euml));
655         NAME_TO_CODE_POINT_MAP.put("igrave",new Integer JavaDoc(_igrave));
656         NAME_TO_CODE_POINT_MAP.put("iacute",new Integer JavaDoc(_iacute));
657         NAME_TO_CODE_POINT_MAP.put("icirc",new Integer JavaDoc(_icirc));
658         NAME_TO_CODE_POINT_MAP.put("iuml",new Integer JavaDoc(_iuml));
659         NAME_TO_CODE_POINT_MAP.put("eth",new Integer JavaDoc(_eth));
660         NAME_TO_CODE_POINT_MAP.put("ntilde",new Integer JavaDoc(_ntilde));
661         NAME_TO_CODE_POINT_MAP.put("ograve",new Integer JavaDoc(_ograve));
662         NAME_TO_CODE_POINT_MAP.put("oacute",new Integer JavaDoc(_oacute));
663         NAME_TO_CODE_POINT_MAP.put("ocirc",new Integer JavaDoc(_ocirc));
664         NAME_TO_CODE_POINT_MAP.put("otilde",new Integer JavaDoc(_otilde));
665         NAME_TO_CODE_POINT_MAP.put("ouml",new Integer JavaDoc(_ouml));
666         NAME_TO_CODE_POINT_MAP.put("divide",new Integer JavaDoc(_divide));
667         NAME_TO_CODE_POINT_MAP.put("oslash",new Integer JavaDoc(_oslash));
668         NAME_TO_CODE_POINT_MAP.put("ugrave",new Integer JavaDoc(_ugrave));
669         NAME_TO_CODE_POINT_MAP.put("uacute",new Integer JavaDoc(_uacute));
670         NAME_TO_CODE_POINT_MAP.put("ucirc",new Integer JavaDoc(_ucirc));
671         NAME_TO_CODE_POINT_MAP.put("uuml",new Integer JavaDoc(_uuml));
672         NAME_TO_CODE_POINT_MAP.put("yacute",new Integer JavaDoc(_yacute));
673         NAME_TO_CODE_POINT_MAP.put("thorn",new Integer JavaDoc(_thorn));
674         NAME_TO_CODE_POINT_MAP.put("yuml",new Integer JavaDoc(_yuml));
675         NAME_TO_CODE_POINT_MAP.put("fnof",new Integer JavaDoc(_fnof));
676         NAME_TO_CODE_POINT_MAP.put("Alpha",new Integer JavaDoc(_Alpha));
677         NAME_TO_CODE_POINT_MAP.put("Beta",new Integer JavaDoc(_Beta));
678         NAME_TO_CODE_POINT_MAP.put("Gamma",new Integer JavaDoc(_Gamma));
679         NAME_TO_CODE_POINT_MAP.put("Delta",new Integer JavaDoc(_Delta));
680         NAME_TO_CODE_POINT_MAP.put("Epsilon",new Integer JavaDoc(_Epsilon));
681         NAME_TO_CODE_POINT_MAP.put("Zeta",new Integer JavaDoc(_Zeta));
682         NAME_TO_CODE_POINT_MAP.put("Eta",new Integer JavaDoc(_Eta));
683         NAME_TO_CODE_POINT_MAP.put("Theta",new Integer JavaDoc(_Theta));
684         NAME_TO_CODE_POINT_MAP.put("Iota",new Integer JavaDoc(_Iota));
685         NAME_TO_CODE_POINT_MAP.put("Kappa",new Integer JavaDoc(_Kappa));
686         NAME_TO_CODE_POINT_MAP.put("Lambda",new Integer JavaDoc(_Lambda));
687         NAME_TO_CODE_POINT_MAP.put("Mu",new Integer JavaDoc(_Mu));
688         NAME_TO_CODE_POINT_MAP.put("Nu",new Integer JavaDoc(_Nu));
689         NAME_TO_CODE_POINT_MAP.put("Xi",new Integer JavaDoc(_Xi));
690         NAME_TO_CODE_POINT_MAP.put("Omicron",new Integer JavaDoc(_Omicron));
691         NAME_TO_CODE_POINT_MAP.put("Pi",new Integer JavaDoc(_Pi));
692         NAME_TO_CODE_POINT_MAP.put("Rho",new Integer JavaDoc(_Rho));
693         NAME_TO_CODE_POINT_MAP.put("Sigma",new Integer JavaDoc(_Sigma));
694         NAME_TO_CODE_POINT_MAP.put("Tau",new Integer JavaDoc(_Tau));
695         NAME_TO_CODE_POINT_MAP.put("Upsilon",new Integer JavaDoc(_Upsilon));
696         NAME_TO_CODE_POINT_MAP.put("Phi",new Integer JavaDoc(_Phi));
697         NAME_TO_CODE_POINT_MAP.put("Chi",new Integer JavaDoc(_Chi));
698         NAME_TO_CODE_POINT_MAP.put("Psi",new Integer JavaDoc(_Psi));
699         NAME_TO_CODE_POINT_MAP.put("Omega",new Integer JavaDoc(_Omega));
700         NAME_TO_CODE_POINT_MAP.put("alpha",new Integer JavaDoc(_alpha));
701         NAME_TO_CODE_POINT_MAP.put("beta",new Integer JavaDoc(_beta));
702         NAME_TO_CODE_POINT_MAP.put("gamma",new Integer JavaDoc(_gamma));
703         NAME_TO_CODE_POINT_MAP.put("delta",new Integer JavaDoc(_delta));
704         NAME_TO_CODE_POINT_MAP.put("epsilon",new Integer JavaDoc(_epsilon));
705         NAME_TO_CODE_POINT_MAP.put("zeta",new Integer JavaDoc(_zeta));
706         NAME_TO_CODE_POINT_MAP.put("eta",new Integer JavaDoc(_eta));
707         NAME_TO_CODE_POINT_MAP.put("theta",new Integer JavaDoc(_theta));
708         NAME_TO_CODE_POINT_MAP.put("iota",new Integer JavaDoc(_iota));
709         NAME_TO_CODE_POINT_MAP.put("kappa",new Integer JavaDoc(_kappa));
710         NAME_TO_CODE_POINT_MAP.put("lambda",new Integer JavaDoc(_lambda));
711         NAME_TO_CODE_POINT_MAP.put("mu",new Integer JavaDoc(_mu));
712         NAME_TO_CODE_POINT_MAP.put("nu",new Integer JavaDoc(_nu));
713         NAME_TO_CODE_POINT_MAP.put("xi",new Integer JavaDoc(_xi));
714         NAME_TO_CODE_POINT_MAP.put("omicron",new Integer JavaDoc(_omicron));
715         NAME_TO_CODE_POINT_MAP.put("pi",new Integer JavaDoc(_pi));
716         NAME_TO_CODE_POINT_MAP.put("rho",new Integer JavaDoc(_rho));
717         NAME_TO_CODE_POINT_MAP.put("sigmaf",new Integer JavaDoc(_sigmaf));
718         NAME_TO_CODE_POINT_MAP.put("sigma",new Integer JavaDoc(_sigma));
719         NAME_TO_CODE_POINT_MAP.put("tau",new Integer JavaDoc(_tau));
720         NAME_TO_CODE_POINT_MAP.put("upsilon",new Integer JavaDoc(_upsilon));
721         NAME_TO_CODE_POINT_MAP.put("phi",new Integer JavaDoc(_phi));
722         NAME_TO_CODE_POINT_MAP.put("chi",new Integer JavaDoc(_chi));
723         NAME_TO_CODE_POINT_MAP.put("psi",new Integer JavaDoc(_psi));
724         NAME_TO_CODE_POINT_MAP.put("omega",new Integer JavaDoc(_omega));
725         NAME_TO_CODE_POINT_MAP.put("thetasym",new Integer JavaDoc(_thetasym));
726         NAME_TO_CODE_POINT_MAP.put("upsih",new Integer JavaDoc(_upsih));
727         NAME_TO_CODE_POINT_MAP.put("piv",new Integer JavaDoc(_piv));
728         NAME_TO_CODE_POINT_MAP.put("bull",new Integer JavaDoc(_bull));
729         NAME_TO_CODE_POINT_MAP.put("hellip",new Integer JavaDoc(_hellip));
730         NAME_TO_CODE_POINT_MAP.put("prime",new Integer JavaDoc(_prime));
731         NAME_TO_CODE_POINT_MAP.put("Prime",new Integer JavaDoc(_Prime));
732         NAME_TO_CODE_POINT_MAP.put("oline",new Integer JavaDoc(_oline));
733         NAME_TO_CODE_POINT_MAP.put("frasl",new Integer JavaDoc(_frasl));
734         NAME_TO_CODE_POINT_MAP.put("weierp",new Integer JavaDoc(_weierp));
735         NAME_TO_CODE_POINT_MAP.put("image",new Integer JavaDoc(_image));
736         NAME_TO_CODE_POINT_MAP.put("real",new Integer JavaDoc(_real));
737         NAME_TO_CODE_POINT_MAP.put("trade",new Integer JavaDoc(_trade));
738         NAME_TO_CODE_POINT_MAP.put("alefsym",new Integer JavaDoc(_alefsym));
739         NAME_TO_CODE_POINT_MAP.put("larr",new Integer JavaDoc(_larr));
740         NAME_TO_CODE_POINT_MAP.put("uarr",new Integer JavaDoc(_uarr));
741         NAME_TO_CODE_POINT_MAP.put("rarr",new Integer JavaDoc(_rarr));
742         NAME_TO_CODE_POINT_MAP.put("darr",new Integer JavaDoc(_darr));
743         NAME_TO_CODE_POINT_MAP.put("harr",new Integer JavaDoc(_harr));
744         NAME_TO_CODE_POINT_MAP.put("crarr",new Integer JavaDoc(_crarr));
745         NAME_TO_CODE_POINT_MAP.put("lArr",new Integer JavaDoc(_lArr));
746         NAME_TO_CODE_POINT_MAP.put("uArr",new Integer JavaDoc(_uArr));
747         NAME_TO_CODE_POINT_MAP.put("rArr",new Integer JavaDoc(_rArr));
748         NAME_TO_CODE_POINT_MAP.put("dArr",new Integer JavaDoc(_dArr));
749         NAME_TO_CODE_POINT_MAP.put("hArr",new Integer JavaDoc(_hArr));
750         NAME_TO_CODE_POINT_MAP.put("forall",new Integer JavaDoc(_forall));
751         NAME_TO_CODE_POINT_MAP.put("part",new Integer JavaDoc(_part));
752         NAME_TO_CODE_POINT_MAP.put("exist",new Integer JavaDoc(_exist));
753         NAME_TO_CODE_POINT_MAP.put("empty",new Integer JavaDoc(_empty));
754         NAME_TO_CODE_POINT_MAP.put("nabla",new Integer JavaDoc(_nabla));
755         NAME_TO_CODE_POINT_MAP.put("isin",new Integer JavaDoc(_isin));
756         NAME_TO_CODE_POINT_MAP.put("notin",new Integer JavaDoc(_notin));
757         NAME_TO_CODE_POINT_MAP.put("ni",new Integer JavaDoc(_ni));
758         NAME_TO_CODE_POINT_MAP.put("prod",new Integer JavaDoc(_prod));
759         NAME_TO_CODE_POINT_MAP.put("sum",new Integer JavaDoc(_sum));
760         NAME_TO_CODE_POINT_MAP.put("minus",new Integer JavaDoc(_minus));
761         NAME_TO_CODE_POINT_MAP.put("lowast",new Integer JavaDoc(_lowast));
762         NAME_TO_CODE_POINT_MAP.put("radic",new Integer JavaDoc(_radic));
763         NAME_TO_CODE_POINT_MAP.put("prop",new Integer JavaDoc(_prop));
764         NAME_TO_CODE_POINT_MAP.put("infin",new Integer JavaDoc(_infin));
765         NAME_TO_CODE_POINT_MAP.put("ang",new Integer JavaDoc(_ang));
766         NAME_TO_CODE_POINT_MAP.put("and",new Integer JavaDoc(_and));
767         NAME_TO_CODE_POINT_MAP.put("or",new Integer JavaDoc(_or));
768         NAME_TO_CODE_POINT_MAP.put("cap",new Integer JavaDoc(_cap));
769         NAME_TO_CODE_POINT_MAP.put("cup",new Integer JavaDoc(_cup));
770         NAME_TO_CODE_POINT_MAP.put("int",new Integer JavaDoc(_int));
771         NAME_TO_CODE_POINT_MAP.put("there4",new Integer JavaDoc(_there4));
772         NAME_TO_CODE_POINT_MAP.put("sim",new Integer JavaDoc(_sim));
773         NAME_TO_CODE_POINT_MAP.put("cong",new Integer JavaDoc(_cong));
774         NAME_TO_CODE_POINT_MAP.put("asymp",new Integer JavaDoc(_asymp));
775         NAME_TO_CODE_POINT_MAP.put("ne",new Integer JavaDoc(_ne));
776         NAME_TO_CODE_POINT_MAP.put("equiv",new Integer JavaDoc(_equiv));
777         NAME_TO_CODE_POINT_MAP.put("le",new Integer JavaDoc(_le));
778         NAME_TO_CODE_POINT_MAP.put("ge",new Integer JavaDoc(_ge));
779         NAME_TO_CODE_POINT_MAP.put("sub",new Integer JavaDoc(_sub));
780         NAME_TO_CODE_POINT_MAP.put("sup",new Integer JavaDoc(_sup));
781         NAME_TO_CODE_POINT_MAP.put("nsub",new Integer JavaDoc(_nsub));
782         NAME_TO_CODE_POINT_MAP.put("sube",new Integer JavaDoc(_sube));
783         NAME_TO_CODE_POINT_MAP.put("supe",new Integer JavaDoc(_supe));
784         NAME_TO_CODE_POINT_MAP.put("oplus",new Integer JavaDoc(_oplus));
785         NAME_TO_CODE_POINT_MAP.put("otimes",new Integer JavaDoc(_otimes));
786         NAME_TO_CODE_POINT_MAP.put("perp",new Integer JavaDoc(_perp));
787         NAME_TO_CODE_POINT_MAP.put("sdot",new Integer JavaDoc(_sdot));
788         NAME_TO_CODE_POINT_MAP.put("lceil",new Integer JavaDoc(_lceil));
789         NAME_TO_CODE_POINT_MAP.put("rceil",new Integer JavaDoc(_rceil));
790         NAME_TO_CODE_POINT_MAP.put("lfloor",new Integer JavaDoc(_lfloor));
791         NAME_TO_CODE_POINT_MAP.put("rfloor",new Integer JavaDoc(_rfloor));
792         NAME_TO_CODE_POINT_MAP.put("lang",new Integer JavaDoc(_lang));
793         NAME_TO_CODE_POINT_MAP.put("rang",new Integer JavaDoc(_rang));
794         NAME_TO_CODE_POINT_MAP.put("loz",new Integer JavaDoc(_loz));
795         NAME_TO_CODE_POINT_MAP.put("spades",new Integer JavaDoc(_spades));
796         NAME_TO_CODE_POINT_MAP.put("clubs",new Integer JavaDoc(_clubs));
797         NAME_TO_CODE_POINT_MAP.put("hearts",new Integer JavaDoc(_hearts));
798         NAME_TO_CODE_POINT_MAP.put("diams",new Integer JavaDoc(_diams));
799         NAME_TO_CODE_POINT_MAP.put("quot",new Integer JavaDoc(_quot));
800         NAME_TO_CODE_POINT_MAP.put("amp",new Integer JavaDoc(_amp));
801         NAME_TO_CODE_POINT_MAP.put("lt",new Integer JavaDoc(_lt));
802         NAME_TO_CODE_POINT_MAP.put("gt",new Integer JavaDoc(_gt));
803         NAME_TO_CODE_POINT_MAP.put("OElig",new Integer JavaDoc(_OElig));
804         NAME_TO_CODE_POINT_MAP.put("oelig",new Integer JavaDoc(_oelig));
805         NAME_TO_CODE_POINT_MAP.put("Scaron",new Integer JavaDoc(_Scaron));
806         NAME_TO_CODE_POINT_MAP.put("scaron",new Integer JavaDoc(_scaron));
807         NAME_TO_CODE_POINT_MAP.put("Yuml",new Integer JavaDoc(_Yuml));
808         NAME_TO_CODE_POINT_MAP.put("circ",new Integer JavaDoc(_circ));
809         NAME_TO_CODE_POINT_MAP.put("tilde",new Integer JavaDoc(_tilde));
810         NAME_TO_CODE_POINT_MAP.put("ensp",new Integer JavaDoc(_ensp));
811         NAME_TO_CODE_POINT_MAP.put("emsp",new Integer JavaDoc(_emsp));
812         NAME_TO_CODE_POINT_MAP.put("thinsp",new Integer JavaDoc(_thinsp));
813         NAME_TO_CODE_POINT_MAP.put("zwnj",new Integer JavaDoc(_zwnj));
814         NAME_TO_CODE_POINT_MAP.put("zwj",new Integer JavaDoc(_zwj));
815         NAME_TO_CODE_POINT_MAP.put("lrm",new Integer JavaDoc(_lrm));
816         NAME_TO_CODE_POINT_MAP.put("rlm",new Integer JavaDoc(_rlm));
817         NAME_TO_CODE_POINT_MAP.put("ndash",new Integer JavaDoc(_ndash));
818         NAME_TO_CODE_POINT_MAP.put("mdash",new Integer JavaDoc(_mdash));
819         NAME_TO_CODE_POINT_MAP.put("lsquo",new Integer JavaDoc(_lsquo));
820         NAME_TO_CODE_POINT_MAP.put("rsquo",new Integer JavaDoc(_rsquo));
821         NAME_TO_CODE_POINT_MAP.put("sbquo",new Integer JavaDoc(_sbquo));
822         NAME_TO_CODE_POINT_MAP.put("ldquo",new Integer JavaDoc(_ldquo));
823         NAME_TO_CODE_POINT_MAP.put("rdquo",new Integer JavaDoc(_rdquo));
824         NAME_TO_CODE_POINT_MAP.put("bdquo",new Integer JavaDoc(_bdquo));
825         NAME_TO_CODE_POINT_MAP.put("dagger",new Integer JavaDoc(_dagger));
826         NAME_TO_CODE_POINT_MAP.put("Dagger",new Integer JavaDoc(_Dagger));
827         NAME_TO_CODE_POINT_MAP.put("permil",new Integer JavaDoc(_permil));
828         NAME_TO_CODE_POINT_MAP.put("lsaquo",new Integer JavaDoc(_lsaquo));
829         NAME_TO_CODE_POINT_MAP.put("rsaquo",new Integer JavaDoc(_rsaquo));
830         NAME_TO_CODE_POINT_MAP.put("euro",new Integer JavaDoc(_euro));
831         NAME_TO_CODE_POINT_MAP.put("apos",new Integer JavaDoc(_apos));
832
833         CODE_POINT_TO_NAME_MAP=new IntStringHashMap((int)(NAME_TO_CODE_POINT_MAP.size()/0.75F),1.0F);
834         for (final Iterator i=NAME_TO_CODE_POINT_MAP.entrySet().iterator(); i.hasNext();) {
835             Map.Entry entry=(Map.Entry)i.next();
836             String JavaDoc name=(String JavaDoc)entry.getKey();
837             if (MAX_NAME_LENGTH<name.length()) MAX_NAME_LENGTH=name.length();
838             CODE_POINT_TO_NAME_MAP.put(((Integer JavaDoc)entry.getValue()).intValue(),name);
839         }
840     }
841
842     private CharacterEntityReference(final Source source, final int begin, final int end, final int codePoint) {
843         super(source,begin,end,codePoint);
844         name=getName(codePoint);
845     }
846
847     /**
848      * Returns the name of this character entity reference.
849      * <p>
850      * <dl>
851      * <dt>Example:</dt>
852      * <dd><code>((CharacterEntityReference)CharacterReference.parse("&amp;gt;")).getName()</code> returns "<code>gt</code>"</dd>
853      * </dl>
854      * @return the name of this character entity reference.
855      * @see #getName(int codePoint)
856      */

857     public String JavaDoc getName() {
858         return name;
859     }
860
861     /**
862      * Returns the character entity reference name of the specified character.
863      * <p>
864      * Since all character entity references represent unicode <a target="_blank" HREF="http://www.unicode.org/glossary/#bmp_code_point">BMP</a> code points,
865      * the functionality of this method is identical to that of {@link #getName(int codePoint)}.
866      * <p>
867      * <dl>
868      * <dt>Example:</dt>
869      * <dd><code>CharacterEntityReference.getName('>')</code> returns "<code>gt</code>"</dd>
870      * </dl>
871      * @return the character entity reference name of the specified character, or <code>null</code> if none exists.
872      */

873     public static String JavaDoc getName(final char ch) {
874         return getName((int)ch);
875     }
876
877     /**
878      * Returns the character entity reference name of the specified unicode code point.
879      * <p>
880      * Since all character entity references represent unicode <a target="_blank" HREF="http://www.unicode.org/glossary/#bmp_code_point">BMP</a> code points,
881      * the functionality of this method is identical to that of {@link #getName(char ch)}.
882      * <p>
883      * <dl>
884      * <dt>Example:</dt>
885      * <dd><code>CharacterEntityReference.getName(62)</code> returns "<code>gt</code>"</dd>
886      * </dl>
887      * @return the character entity reference name of the specified unicode code point, or <code>null</code> if none exists.
888      */

889     public static String JavaDoc getName(final int codePoint) {
890         return CODE_POINT_TO_NAME_MAP.get(codePoint);
891     }
892
893     /**
894      * Returns the unicode code point of the specified character entity reference name.
895      * <p>
896      * If the string does not represent a valid character entity reference name, this method returns {@link #INVALID_CODE_POINT INVALID_CODE_POINT}.
897      * <p>
898      * Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case,
899      * some browsers also recognise them in a case-insensitive way.
900      * For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case.
901      * <p>
902      * <dl>
903      * <dt>Example:</dt>
904      * <dd><code>CharacterEntityReference.getCodePointFromName("gt")</code> returns <code>62</code></dd>
905      * </dl>
906      * @return the unicode code point of the specified character entity reference name, or {@link #INVALID_CODE_POINT INVALID_CODE_POINT} if the string does not represent a valid character entity reference name.
907      */

908     public static int getCodePointFromName(final String JavaDoc name) {
909         Integer JavaDoc codePoint=(Integer JavaDoc)NAME_TO_CODE_POINT_MAP.get(name);
910         if (codePoint==null) {
911             // Most browsers recognise character entity references even if they have the wrong case, so check for this as well:
912
final String JavaDoc lowerCaseName=name.toLowerCase();
913             if (lowerCaseName!=name) codePoint=(Integer JavaDoc)NAME_TO_CODE_POINT_MAP.get(lowerCaseName);
914         }
915         return (codePoint!=null) ? codePoint.intValue() : INVALID_CODE_POINT;
916     }
917
918     /**
919      * Returns the correct encoded form of this character entity reference.
920      * <p>
921      * Note that the returned string is not necessarily the same as the original source text used to create this object.
922      * This library recognises certain invalid forms of character references, as detailed in the {@link #decode(CharSequence) decode(String encodedString)} method.
923      * <p>
924      * To retrieve the original source text, use the {@link #toString() toString()} method instead.
925      * <p>
926      * <dl>
927      * <dt>Example:</dt>
928      * <dd><code>CharacterReference.parse("&amp;GT").getCharacterReferenceString()</code> returns "<code>&amp;gt;</code>"</dd>
929      * </dl>
930      *
931      * @return the correct encoded form of this character entity reference.
932      * @see CharacterReference#getCharacterReferenceString(int codePoint)
933      */

934     public String JavaDoc getCharacterReferenceString() {
935         return getCharacterReferenceString(name);
936     }
937
938     /**
939      * Returns the character entity reference encoded form of the specified unicode code point.
940      * <p>
941      * If the specified unicode code point does not have an equivalent character entity reference, this method returns <code>null</code>.
942      * To get either the entity or numeric reference encoded form, use the {@link CharacterReference#getCharacterReferenceString(int codePoint)} method instead.
943      * <p>
944      * <dl>
945      * <dt>Examples:</dt>
946      * <dd><code>CharacterEntityReference.getCharacterReferenceString(62)</code> returns "<code>&amp;gt;</code>"</dd>
947      * <dd><code>CharacterEntityReference.getCharacterReferenceString(9786)</code> returns <code>null</code></dd>
948      * </dl>
949      *
950      * @return the character entity reference encoded form of the specified unicode code point, or <code>null</code> if none exists.
951      * @see CharacterReference#getCharacterReferenceString(int codePoint)
952      */

953     public static String JavaDoc getCharacterReferenceString(final int codePoint) {
954         if (codePoint>Character.MAX_VALUE) return null;
955         final String JavaDoc name=getName(codePoint);
956         return name!=null ? getCharacterReferenceString(name) : null;
957     }
958
959     /**
960      * Returns a map of character entity reference names (<code>String</code>) to unicode code points (<code>Integer</code>).
961      * @return a map of character entity reference names to unicode code points.
962      */

963     public static Map getNameToCodePointMap() {
964         return NAME_TO_CODE_POINT_MAP;
965     }
966
967     /**
968      * Returns a string representation of this object useful for debugging purposes.
969      * @return a string representation of this object useful for debugging purposes.
970      */

971     public String JavaDoc getDebugInfo() {
972         final StringBuffer JavaDoc sb=new StringBuffer JavaDoc();
973         sb.append('"');
974         appendCharacterReferenceString(sb,name);
975         sb.append("\" ");
976         appendUnicodeText(sb,codePoint);
977         sb.append(' ').append(super.getDebugInfo());
978         return sb.toString();
979     }
980
981     private static String JavaDoc getCharacterReferenceString(final String JavaDoc name) {
982         return appendCharacterReferenceString(new StringBuffer JavaDoc(),name).toString();
983     }
984
985     static final StringBuffer JavaDoc appendCharacterReferenceString(final StringBuffer JavaDoc sb, final String JavaDoc name) {
986         return sb.append('&').append(name).append(';');
987     }
988
989     static CharacterReference construct(final Source source, final int begin, final int unterminatedMaxCodePoint) {
990         // only called from CharacterReference.construct(), so we can assume that first character is '&'
991
String JavaDoc name;
992         final int nameBegin=begin+1;
993         final int maxNameEnd=nameBegin+MAX_NAME_LENGTH;
994         final int maxSourcePos=source.end-1;
995         int end;
996         int x=nameBegin;
997         boolean unterminated=false;
998         while (true) {
999             final char ch=source.charAt(x);
1000            if (ch==';') {
1001                end=x+1;
1002                name=source.subSequence(nameBegin,x).toString();
1003                break;
1004            }
1005            if (!isValidReferenceNameChar(ch)) {
1006                // At this point, ch is determined to be an invalid character, meaning the character reference is unterminated.
1007
unterminated=true;
1008            } else if (x==maxSourcePos) {
1009                // At this point, we have a valid name character but are at the last position in the source text without the terminating semicolon.
1010
unterminated=true;
1011                x++; // include this character in the name
1012
}
1013            if (unterminated) {
1014                // Different browsers react differently to unterminated character entity references.
1015
// The behaviour of this method is determined by the unterminatedMaxCodePoint parameter.
1016
if (unterminatedMaxCodePoint==INVALID_CODE_POINT) {
1017                    // reject:
1018
return null;
1019                } else {
1020                    // accept:
1021
end=x;
1022                    name=source.subSequence(nameBegin,x).toString();
1023                    break;
1024                }
1025            }
1026            if (++x>maxNameEnd) return null;
1027        }
1028        final int codePoint=getCodePointFromName(name);
1029        if (codePoint==INVALID_CODE_POINT || (unterminated && codePoint>unterminatedMaxCodePoint)) return null;
1030        return new CharacterEntityReference(source,begin,end,codePoint);
1031    }
1032
1033    private static final boolean isValidReferenceNameChar(final char ch) {
1034        return ch>='A' && ch<='z' && (ch<='Z' || ch>='a');
1035    }
1036}
1037
1038
Popular Tags