KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > lobobrowser > html > parser > HtmlParser


1 /*
2     GNU LESSER GENERAL PUBLIC LICENSE
3     Copyright (C) 2006 The Lobo Project
4
5     This library is free software; you can redistribute it and/or
6     modify it under the terms of the GNU Lesser General Public
7     License as published by the Free Software Foundation; either
8     version 2.1 of the License, or (at your option) any later version.
9
10     This library is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13     Lesser General Public License for more details.
14
15     You should have received a copy of the GNU Lesser General Public
16     License along with this library; if not, write to the Free Software
17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
19     Contact info: xamjadmin@users.sourceforge.net
20 */

21 /*
22  * Created on Aug 28, 2005
23  */

24 package org.lobobrowser.html.parser;
25
26 import java.io.*;
27 import java.util.*;
28 import org.w3c.dom.html2.*;
29 import org.w3c.dom.Node JavaDoc;
30 import org.w3c.dom.Element JavaDoc;
31 import org.w3c.dom.Document JavaDoc;
32 import org.xml.sax.ErrorHandler JavaDoc;
33 import org.xml.sax.Locator JavaDoc;
34 import org.xml.sax.SAXException JavaDoc;
35 import org.xml.sax.SAXParseException JavaDoc;
36 import org.lobobrowser.html.*;
37 import org.lobobrowser.html.io.*;
38 import org.mozilla.javascript.*;
39
40 /**
41  * The <code>HtmlParser</code> class is an HTML DOM parser.
42  * This parser provides the functionality for
43  * the standard DOM parser implementation {@link org.lobobrowser.html.parser.DocumentBuilderImpl}.
44  * This parser class may be used directly when a different DOM
45  * implementation is preferred.
46  */

47 public class HtmlParser {
48     private final HTMLDocument document;
49     private final UserAgentContext ucontext;
50     private final ErrorHandler JavaDoc errorHandler;
51     private final String JavaDoc publicId;
52     private final String JavaDoc systemId;
53     
54     private static final Map ENTITIES = new HashMap();
55     private static final Map ELEMENT_INFOS = new HashMap();
56
57     /**
58      * A node <code>UserData</code> key used to tell
59      * nodes that their content may be about to be
60      * modified. Elements could use this to temporarily
61      * suspend notifications. The value set
62      * will be either <code>Boolean.TRUE</code> or
63      * <code>Boolean.FALSE</code>.
64      */

65     public static final String JavaDoc MODIFYING_KEY = "cobra.suspend";
66     
67     static {
68         Map entities = ENTITIES;
69         entities.put("amp", new Character JavaDoc('&'));
70         entities.put("lt", new Character JavaDoc('<'));
71         entities.put("gt", new Character JavaDoc('>'));
72         entities.put("quot", new Character JavaDoc('"'));
73         entities.put("nbsp", new Character JavaDoc((char) 160));
74
75         entities.put("lsquo", new Character JavaDoc('`'));
76         entities.put("rsquo", new Character JavaDoc('´'));
77
78         entities.put("frasl", new Character JavaDoc((char) 47));
79         entities.put("ndash", new Character JavaDoc((char) 8211));
80         entities.put("mdash", new Character JavaDoc((char) 8212));
81         entities.put("iexcl", new Character JavaDoc((char) 161));
82         entities.put("cent", new Character JavaDoc((char) 162));
83         entities.put("pound", new Character JavaDoc((char) 163));
84         entities.put("curren", new Character JavaDoc((char) 164));
85         entities.put("yen", new Character JavaDoc((char) 165));
86         entities.put("brvbar", new Character JavaDoc((char) 166));
87         entities.put("brkbar", new Character JavaDoc((char) 166));
88         entities.put("sect", new Character JavaDoc((char) 167));
89         entities.put("uml", new Character JavaDoc((char) 168));
90         entities.put("die", new Character JavaDoc((char) 168));
91         entities.put("copy", new Character JavaDoc((char) 169));
92         entities.put("ordf", new Character JavaDoc((char) 170));
93         entities.put("laquo", new Character JavaDoc((char) 171));
94         entities.put("not", new Character JavaDoc((char) 172));
95         entities.put("shy", new Character JavaDoc((char) 173));
96         entities.put("reg", new Character JavaDoc((char) 174));
97         entities.put("macr", new Character JavaDoc((char) 175));
98         entities.put("hibar", new Character JavaDoc((char) 175));
99         entities.put("deg", new Character JavaDoc((char) 176));
100         entities.put("plusmn", new Character JavaDoc((char) 177));
101         entities.put("sup2", new Character JavaDoc((char) 178));
102         entities.put("sup3", new Character JavaDoc((char) 179));
103         entities.put("acute", new Character JavaDoc((char) 180));
104         entities.put("micro", new Character JavaDoc((char) 181));
105         entities.put("para", new Character JavaDoc((char) 182));
106         entities.put("middot", new Character JavaDoc((char) 183));
107         entities.put("cedil", new Character JavaDoc((char) 184));
108         entities.put("sup1", new Character JavaDoc((char) 185));
109         entities.put("ordm", new Character JavaDoc((char) 186));
110         entities.put("raquo", new Character JavaDoc((char) 187));
111         entities.put("frac14", new Character JavaDoc((char) 188));
112         entities.put("frac12", new Character JavaDoc((char) 189));
113         entities.put("frac34", new Character JavaDoc((char) 190));
114         entities.put("iquest", new Character JavaDoc((char) 191));
115         entities.put("Agrave", new Character JavaDoc((char) 192));
116         entities.put("Aacute", new Character JavaDoc((char) 193));
117         entities.put("Acirc", new Character JavaDoc((char) 194));
118         entities.put("Atilde", new Character JavaDoc((char) 195));
119         entities.put("Auml", new Character JavaDoc((char) 196));
120         entities.put("Aring", new Character JavaDoc((char) 197));
121         entities.put("AElig", new Character JavaDoc((char) 198));
122         entities.put("Ccedil", new Character JavaDoc((char) 199));
123         entities.put("Egrave", new Character JavaDoc((char) 200));
124         entities.put("Eacute", new Character JavaDoc((char) 201));
125         entities.put("Ecirc", new Character JavaDoc((char) 202));
126         entities.put("Euml", new Character JavaDoc((char) 203));
127         entities.put("Igrave", new Character JavaDoc((char) 204));
128         entities.put("Iacute", new Character JavaDoc((char) 205));
129         entities.put("Icirc", new Character JavaDoc((char) 206));
130         entities.put("Iuml", new Character JavaDoc((char) 207));
131         entities.put("ETH", new Character JavaDoc((char) 208));
132         entities.put("Ntilde", new Character JavaDoc((char) 209));
133         entities.put("Ograve", new Character JavaDoc((char) 210));
134         entities.put("Oacute", new Character JavaDoc((char) 211));
135         entities.put("Ocirc", new Character JavaDoc((char) 212));
136         entities.put("Otilde", new Character JavaDoc((char) 213));
137         entities.put("Ouml", new Character JavaDoc((char) 214));
138         entities.put("times", new Character JavaDoc((char) 215));
139         entities.put("Oslash", new Character JavaDoc((char) 216));
140         entities.put("Ugrave", new Character JavaDoc((char) 217));
141         entities.put("Uacute", new Character JavaDoc((char) 218));
142         entities.put("Ucirc", new Character JavaDoc((char) 219));
143         entities.put("Uuml", new Character JavaDoc((char) 220));
144         entities.put("Yacute", new Character JavaDoc((char) 221));
145         entities.put("THORN", new Character JavaDoc((char) 222));
146         entities.put("szlig", new Character JavaDoc((char) 223));
147         entities.put("agrave", new Character JavaDoc((char) 224));
148         entities.put("aacute", new Character JavaDoc((char) 225));
149         entities.put("acirc", new Character JavaDoc((char) 226));
150         entities.put("atilde", new Character JavaDoc((char) 227));
151         entities.put("auml", new Character JavaDoc((char) 228));
152         entities.put("aring", new Character JavaDoc((char) 229));
153         entities.put("aelig", new Character JavaDoc((char) 230));
154         entities.put("ccedil", new Character JavaDoc((char) 231));
155         entities.put("egrave", new Character JavaDoc((char) 232));
156         entities.put("eacute", new Character JavaDoc((char) 233));
157         entities.put("ecirc", new Character JavaDoc((char) 234));
158         entities.put("euml", new Character JavaDoc((char) 235));
159         entities.put("igrave", new Character JavaDoc((char) 236));
160         entities.put("iacute", new Character JavaDoc((char) 237));
161         entities.put("icirc", new Character JavaDoc((char) 238));
162         entities.put("iuml", new Character JavaDoc((char) 239));
163         entities.put("eth", new Character JavaDoc((char) 240));
164         entities.put("ntilde", new Character JavaDoc((char) 241));
165         entities.put("ograve", new Character JavaDoc((char) 242));
166         entities.put("oacute", new Character JavaDoc((char) 243));
167         entities.put("ocirc", new Character JavaDoc((char) 244));
168         entities.put("otilde", new Character JavaDoc((char) 245));
169         entities.put("ouml", new Character JavaDoc((char) 246));
170         entities.put("divide", new Character JavaDoc((char) 247));
171         entities.put("oslash", new Character JavaDoc((char) 248));
172         entities.put("ugrave", new Character JavaDoc((char) 249));
173         entities.put("uacute", new Character JavaDoc((char) 250));
174         entities.put("ucirc", new Character JavaDoc((char) 251));
175         entities.put("uuml", new Character JavaDoc((char) 252));
176         entities.put("yacute", new Character JavaDoc((char) 253));
177         entities.put("thorn", new Character JavaDoc((char) 254));
178         entities.put("yuml", new Character JavaDoc((char) 255));
179
180         //TODO: See http://elcursillo0.tripod.com/muestras/caracters.html
181
//TODO: See http://www.koders.com/java/fid0B623B057D114F8A68B54C6060C2B1AEE2D04750.aspx
182

183         Map elementInfos = ELEMENT_INFOS;
184         
185         elementInfos.put("NOSCRIPT", new ElementInfo(true, ElementInfo.END_ELEMENT_REQUIRED, null, true));
186         
187         ElementInfo optionalEndElement = new ElementInfo(true, ElementInfo.END_ELEMENT_OPTIONAL);
188         ElementInfo forbiddenEndElement = new ElementInfo(false, ElementInfo.END_ELEMENT_FORBIDDEN);
189         ElementInfo onlyText = new ElementInfo(false, ElementInfo.END_ELEMENT_REQUIRED);
190         
191         Set tableCellStopElements = new HashSet();
192         tableCellStopElements.add("TH");
193         tableCellStopElements.add("TD");
194         tableCellStopElements.add("TR");
195         ElementInfo tableCellElement = new ElementInfo(true, ElementInfo.END_ELEMENT_OPTIONAL, tableCellStopElements);
196         
197         Set headStopElements = new HashSet();
198         headStopElements.add("BODY");
199         headStopElements.add("DIV");
200         headStopElements.add("SPAN");
201         headStopElements.add("TABLE");
202         ElementInfo headElement = new ElementInfo(true, ElementInfo.END_ELEMENT_OPTIONAL, headStopElements);
203
204         Set optionStopElements = new HashSet();
205         optionStopElements.add("OPTION");
206         optionStopElements.add("SELECT");
207         ElementInfo optionElement = new ElementInfo(true, ElementInfo.END_ELEMENT_OPTIONAL, optionStopElements);
208         
209         Set paragraphStopElements = new HashSet();
210         paragraphStopElements.add("P");
211         paragraphStopElements.add("DIV");
212         paragraphStopElements.add("TABLE");
213         paragraphStopElements.add("PRE");
214         paragraphStopElements.add("UL");
215         paragraphStopElements.add("OL");
216         ElementInfo paragraphElement = new ElementInfo(true, ElementInfo.END_ELEMENT_OPTIONAL, paragraphStopElements);
217
218         Set liStopElements = new HashSet();
219         liStopElements.add("LI");
220         liStopElements.add("UL");
221         liStopElements.add("OL");
222         
223         elementInfos.put("SCRIPT", onlyText);
224         elementInfos.put("STYLE", onlyText);
225         elementInfos.put("TEXTAREA", onlyText);
226         elementInfos.put("IMG", forbiddenEndElement);
227         elementInfos.put("META", forbiddenEndElement);
228         elementInfos.put("LINK", forbiddenEndElement);
229         elementInfos.put("BASE", forbiddenEndElement);
230         elementInfos.put("INPUT", forbiddenEndElement);
231         elementInfos.put("FRAME", forbiddenEndElement);
232         elementInfos.put("BR", forbiddenEndElement);
233         elementInfos.put("HR", forbiddenEndElement);
234         elementInfos.put("EMBED", forbiddenEndElement);
235         elementInfos.put("SPACER", forbiddenEndElement);
236         
237         elementInfos.put("P", paragraphElement);
238         elementInfos.put("LI", optionalEndElement);
239         elementInfos.put("DT", optionalEndElement);
240         elementInfos.put("DD", optionalEndElement);
241         elementInfos.put("TR", optionalEndElement);
242         elementInfos.put("TH", tableCellElement);
243         elementInfos.put("TD", tableCellElement);
244         elementInfos.put("HEAD", headElement);
245         elementInfos.put("OPTION", optionElement);
246         //TODO: Keep adding tags here
247
}
248
249     /**
250      * Constructs a <code>HtmlParser</code>.
251      * @param context An instance of {@link org.lobobrowser.html.HtmlRendererContext},
252      * which may be an instance of {@link org.lobobrowser.html.test.SimpleHtmlRendererContext}.
253      * @param document An instanceof of <code>HTMLDocument</code>.
254      * @param errorHandler The error handler.
255      * @param publicId The public ID of the document.
256      * @param systemId The system ID of the document.
257      * @deprecated HtmlParserContext not needed by parser
258      * and UserAgentContext should be passed instead.
259      */

260     public HtmlParser(HtmlParserContext context, HTMLDocument document, ErrorHandler JavaDoc errorHandler, String JavaDoc publicId, String JavaDoc systemId) {
261         this.ucontext = null;
262         this.document = document;
263         this.errorHandler = errorHandler;
264         this.publicId = publicId;
265         this.systemId = systemId;
266     }
267     
268     /**
269      * Constructs a <code>HtmlParser</code>.
270      * @param document An instanceof of <code>HTMLDocument</code>.
271      * @param errorHandler The error handler.
272      * @param publicId The public ID of the document.
273      * @param systemId The system ID of the document.
274      * @deprecated UserAgentContext should be passed in constructor.
275      */

276     public HtmlParser(HTMLDocument document, ErrorHandler JavaDoc errorHandler, String JavaDoc publicId, String JavaDoc systemId) {
277         this.ucontext = null;
278         this.document = document;
279         this.errorHandler = errorHandler;
280         this.publicId = publicId;
281         this.systemId = systemId;
282     }
283
284     /**
285      * Constructs a <code>HtmlParser</code>.
286      * @param ucontext The user agent context.
287      * @param document An instanceof of <code>HTMLDocument</code>.
288      * @param errorHandler The error handler.
289      * @param publicId The public ID of the document.
290      * @param systemId The system ID of the document.
291      */

292     public HtmlParser(UserAgentContext ucontext, HTMLDocument document, ErrorHandler JavaDoc errorHandler, String JavaDoc publicId, String JavaDoc systemId) {
293         this.ucontext = ucontext;
294         this.document = document;
295         this.errorHandler = errorHandler;
296         this.publicId = publicId;
297         this.systemId = systemId;
298     }
299
300     /**
301      * Constructs a <code>HtmlParser</code>.
302      * @param ucontext The user agent context.
303      * @param document An instanceof of <code>HTMLDocument</code>.
304      */

305     public HtmlParser(UserAgentContext ucontext, HTMLDocument document) {
306         this.ucontext = ucontext;
307         this.document = document;
308         this.errorHandler = null;
309         this.publicId = null;
310         this.systemId = null;
311     }
312
313     /**
314      * Parses HTML from an input stream, assuming
315      * the character set is ISO-8859-1.
316      * @param in The input stream.
317      * @throws IOException Thrown when there are errors reading the stream.
318      * @throws SAXException Thrown when there are parse errors.
319      */

320     public void parse(InputStream in) throws IOException,SAXException JavaDoc,UnsupportedEncodingException {
321         this.parse(in, "ISO-8859-1");
322     }
323     
324     /**
325      * Parses HTML from an input stream, using the given character set.
326      * @param in The input stream.
327      * @param charset The character set.
328      * @throws IOException Thrown when there's an error reading from the stream.
329      * @throws SAXException Thrown when there is a parser error.
330      * @throws UnsupportedEncodingException Thrown if the character set is not supported.
331      */

332     public void parse(InputStream in, String JavaDoc charset) throws IOException,SAXException JavaDoc,UnsupportedEncodingException {
333         WritableLineReader reader = new WritableLineReader(new InputStreamReader(in, charset));
334         this.parse(reader);
335     }
336
337     /**
338      * Parses HTML given by a <code>Reader</code>. This method appends
339      * nodes to the document provided to the parser.
340      * @param reader An instance of <code>Reader</code>.
341      * @throws IOException Thrown if there are errors reading the input stream.
342      * @throws SAXException Thrown if there are parse errors.
343      */

344     public void parse(Reader reader) throws IOException, SAXException JavaDoc {
345         this.parse(new LineNumberReader(reader));
346     }
347
348     public void parse(LineNumberReader reader) throws IOException, SAXException JavaDoc {
349         Document JavaDoc doc = this.document;
350         this.parse(reader, doc);
351     }
352
353     /**
354      * This method may be used when the DOM should be built under
355      * a given node, such as when <code>innerHTML</code> is used
356      * in Javascript.
357      * @param reader A document reader.
358      * @param parent The root node for the parsed DOM.
359      * @throws IOException
360      * @throws SAXException
361      */

362     public void parse(Reader reader, Node JavaDoc parent) throws IOException, SAXException JavaDoc {
363         this.parse(new LineNumberReader(reader), parent);
364     }
365
366     /**
367      * This method may be used when the DOM should be built under
368      * a given node, such as when <code>innerHTML</code> is used
369      * in Javascript.
370      * @param reader A LineNumberReader for the document.
371      * @param parent The root node for the parsed DOM.
372      * @throws IOException
373      * @throws SAXException
374      */

375     public void parse(LineNumberReader reader, Node JavaDoc parent) throws IOException, SAXException JavaDoc {
376         // Note: Parser does not clear document. It could be used incrementally.
377
try {
378             parent.setUserData(MODIFYING_KEY, Boolean.TRUE, null);
379             try {
380                 while(this.parseToken(parent, reader, null, new LinkedList()) != TOKEN_EOD) {;}
381             } catch(StopException se) {
382                 throw new SAXException JavaDoc("Unexpected flow exception", se);
383             }
384         } finally {
385             parent.setUserData(MODIFYING_KEY, Boolean.FALSE, null);
386         }
387     }
388         
389     private static final int TOKEN_EOD = 0;
390     private static final int TOKEN_COMMENT = 1;
391     private static final int TOKEN_TEXT = 2;
392     private static final int TOKEN_BEGIN_ELEMENT = 3;
393     private static final int TOKEN_END_ELEMENT = 4;
394     private static final int TOKEN_FULL_ELEMENT = 5;
395     private static final int TOKEN_BAD = 6;
396     
397     private String JavaDoc normalLastTag = null;
398     private boolean justReadTagBegin = false;
399     private boolean justReadTagEnd = false;
400     
401     /**
402      * Only set when readAttribute returns false.
403      */

404     private boolean justReadEmptyElement = false;
405     
406     /**
407      * Parses text followed by one element.
408      * @param parent
409      * @param reader
410      * @param stopAtTagUC If this tag is encountered, the method throws StopException.
411      * @param stopTags If tags in this set are encountered, the method throws StopException.
412      * @return
413      * @throws IOException
414      * @throws StopException
415      * @throws SAXException
416      */

417     private final int parseToken(Node JavaDoc parent, LineNumberReader reader, Set stopTags, LinkedList ancestors) throws IOException, StopException, SAXException JavaDoc {
418         Document JavaDoc doc = this.document;
419         StringBuffer JavaDoc textSb = this.readUpToTagBegin(reader);
420         if(textSb == null) {
421             return TOKEN_EOD;
422         }
423         if(textSb.length() != 0) {
424             int textLine = reader.getLineNumber();
425             StringBuffer JavaDoc decText = this.entityDecode(textSb, textLine);
426             Node JavaDoc textNode = doc.createTextNode(decText.toString());
427             parent.appendChild(textNode);
428         }
429         if(this.justReadTagBegin) {
430             String JavaDoc tag = this.readTag(reader);
431             if(tag == null) {
432                 return TOKEN_EOD;
433             }
434             String JavaDoc normalTag = tag.toUpperCase();
435             try {
436                 if(tag.startsWith("!")) {
437                     if("!--".equals(tag)) {
438                         int commentLine = reader.getLineNumber();
439                         StringBuffer JavaDoc comment = this.passEndOfComment(reader);
440                         StringBuffer JavaDoc decText = this.entityDecode(comment, commentLine);
441                         parent.appendChild(doc.createComment(decText.toString()));
442                         return TOKEN_COMMENT;
443                     }
444                     else {
445                         //TODO: DOCTYPE node
446
this.passEndOfTag(reader);
447                         return TOKEN_BAD;
448                     }
449                 }
450                 else if(tag.startsWith("/")) {
451                     tag = tag.substring(1);
452                     normalTag = normalTag.substring(1);
453                     this.passEndOfTag(reader);
454                     return TOKEN_END_ELEMENT;
455                 }
456                 else {
457                     Element JavaDoc element = doc.createElement(tag);
458                     element.setUserData(MODIFYING_KEY, Boolean.TRUE, null);
459                     try {
460                         if(!this.justReadTagEnd) {
461                             while(this.readAttribute(reader, element)) {;}
462                         }
463                         if(stopTags != null && stopTags.contains(normalTag)) {
464                             // Throw before appending to parent.
465
// After attributes are set.
466
// After MODIFYING_KEY is set.
467
throw new StopException(element);
468                         }
469                         // Add element to parent before children are added.
470
// This is necessary for incremental rendering.
471
parent.appendChild(element);
472                         if(!this.justReadEmptyElement) {
473                             ElementInfo einfo = (ElementInfo) ELEMENT_INFOS.get(normalTag);
474                             int endTagType = einfo == null ? ElementInfo.END_ELEMENT_REQUIRED : einfo.endElementType;
475                             if(endTagType != ElementInfo.END_ELEMENT_FORBIDDEN) {
476                                 boolean childrenOk = einfo == null ? true : einfo.childElementOk;
477                                 Set newStopSet = einfo == null ? null : einfo.stopTags;
478                                 if(newStopSet == null) {
479                                     if(endTagType == ElementInfo.END_ELEMENT_OPTIONAL) {
480                                         newStopSet = Collections.singleton(normalTag);
481                                     }
482                                 }
483                                 if(stopTags != null) {
484                                     if(newStopSet != null) {
485                                         Set newStopSet2 = new HashSet();
486                                         newStopSet2.addAll(stopTags);
487                                         newStopSet2.addAll(newStopSet);
488                                         newStopSet = newStopSet2;
489                                     }
490                                     else {
491                                         newStopSet = endTagType == ElementInfo.END_ELEMENT_REQUIRED ? null : stopTags;
492                                     }
493                                 }
494                                 ancestors.addFirst(normalTag);
495                                 try {
496                                     for(;;) {
497                                         try {
498                                             int token;
499                                             if(einfo != null && einfo.noScriptElement) {
500                                                 UserAgentContext ucontext = this.ucontext;
501                                                 if(ucontext == null || ucontext.isScriptingEnabled()) {
502                                                     token = this.parseForEndTag(parent, reader, tag, false);
503                                                 }
504                                                 else {
505                                                     token = this.parseToken(element, reader, newStopSet, ancestors);
506                                                 }
507                                             }
508                                             else {
509                                                 token = childrenOk ? this.parseToken(element, reader, newStopSet, ancestors) : this.parseForEndTag(element, reader, tag, true);
510                                             }
511                                             if(token == TOKEN_END_ELEMENT) {
512                                                 String JavaDoc normalLastTag = this.normalLastTag;
513                                                 if(normalTag.equals(normalLastTag)) {
514                                                     return TOKEN_FULL_ELEMENT;
515                                                 }
516                                                 else {
517                                                     ElementInfo closeTagInfo = (ElementInfo) ELEMENT_INFOS.get(normalLastTag);
518                                                     if(closeTagInfo == null || closeTagInfo.endElementType != ElementInfo.END_ELEMENT_FORBIDDEN) {
519                                                         //TODO: Rather inefficient algorithm, but it's probably executed infrequently?
520
Iterator i = ancestors.iterator();
521                                                         if(i.hasNext()) {
522                                                             i.next();
523                                                             while(i.hasNext()) {
524                                                                 String JavaDoc normalAncestorTag = (String JavaDoc) i.next();
525                                                                 if(normalLastTag.equals(normalAncestorTag)) {
526                                                                     normalTag = normalLastTag;
527                                                                     return TOKEN_END_ELEMENT;
528                                                                 }
529                                                             }
530                                                         }
531                                                     }
532                                                     //TODO: Working here
533
}
534                                             }
535                                             else if(token == TOKEN_EOD) {
536                                                 return TOKEN_EOD;
537                                             }
538                                         } catch(StopException se) {
539                                             // newElement does not have a parent.
540
Element JavaDoc newElement = se.getElement();
541                                             tag = newElement.getTagName();
542                                             normalTag = tag.toUpperCase();
543                                             // If a subelement throws StopException with
544
// a tag matching the current stop tag, the exception
545
// is rethrown (e.g. <TR><TD>blah<TR><TD>blah)
546
if(stopTags != null && stopTags.contains(normalTag)) {
547                                                 throw se;
548                                             }
549                                             einfo = (ElementInfo) ELEMENT_INFOS.get(normalTag);
550                                             endTagType = einfo == null ? ElementInfo.END_ELEMENT_REQUIRED : einfo.endElementType;
551                                             childrenOk = einfo == null ? true : einfo.childElementOk;
552                                             newStopSet = einfo == null ? null : einfo.stopTags;
553                                             if(newStopSet == null) {
554                                                 if(endTagType == ElementInfo.END_ELEMENT_OPTIONAL) {
555                                                     newStopSet = Collections.singleton(normalTag);
556                                                 }
557                                             }
558                                             if(stopTags != null && newStopSet != null) {
559                                                 Set newStopSet2 = new HashSet();
560                                                 newStopSet2.addAll(stopTags);
561                                                 newStopSet2.addAll(newStopSet);
562                                                 newStopSet = newStopSet2;
563                                             }
564                                             ancestors.removeFirst();
565                                             ancestors.addFirst(normalTag);
566                                             //Switch element
567
element.setUserData(MODIFYING_KEY, Boolean.FALSE, null);
568                                             //newElement should have been suspended.
569
element = newElement;
570                                             // Add to parent
571
parent.appendChild(element);
572                                             if(this.justReadEmptyElement) {
573                                                 return TOKEN_BEGIN_ELEMENT;
574                                             }
575                                         }
576                                     }
577                                 } finally {
578                                     ancestors.removeFirst();
579                                 }
580                             }
581                         }
582                         return TOKEN_BEGIN_ELEMENT;
583                     } finally {
584                         //TODO: Element could be null. Currently working on this.
585
// This can inform elements to continue with notifications.
586
// It can also cause Javascript to get processed.
587
element.setUserData(MODIFYING_KEY, Boolean.FALSE, null);
588                     }
589                 }
590             } finally {
591                 this.normalLastTag = normalTag;
592             }
593         }
594         else {
595             this.normalLastTag = null;
596             return TOKEN_TEXT;
597         }
598     }
599
600 // private final int parseToken(Node parent, LineNumberReader reader, String stopAtTagUC, Set stopTags) throws IOException, StopException, SAXException {
601
// Document doc = this.document;
602
// StringBuffer textSb = this.readUpToTagBegin(reader);
603
// if(textSb == null) {
604
// return TOKEN_EOD;
605
// }
606
// if(textSb.length() != 0) {
607
// int textLine = reader.getLineNumber();
608
// StringBuffer decText = this.entityDecode(textSb, textLine);
609
// Node textNode = doc.createTextNode(decText.toString());
610
// parent.appendChild(textNode);
611
// }
612
// if(this.justReadTagBegin) {
613
// String tag = this.readTag(reader);
614
// if(tag == null) {
615
// return TOKEN_EOD;
616
// }
617
// try {
618
// if(tag.startsWith("!")) {
619
// if("!--".equals(tag)) {
620
// int commentLine = reader.getLineNumber();
621
// StringBuffer comment = this.passEndOfComment(reader);
622
// StringBuffer decText = this.entityDecode(comment, commentLine);
623
// parent.appendChild(doc.createComment(decText.toString()));
624
// return TOKEN_COMMENT;
625
// }
626
// else {
627
// //TODO: DOCTYPE node
628
// this.passEndOfTag(reader);
629
// return TOKEN_BAD;
630
// }
631
// }
632
// else if(tag.startsWith("/")) {
633
// tag = tag.substring(1);
634
// this.passEndOfTag(reader);
635
// return TOKEN_END_ELEMENT;
636
// }
637
// else {
638
// Element element = doc.createElement(tag);
639
// element.setUserData(MODIFYING_KEY, Boolean.TRUE, null);
640
// try {
641
// if(!this.justReadTagEnd) {
642
// while(this.readAttribute(reader, element)) {;}
643
// }
644
// String normalTag = tag.toUpperCase();
645
// if(stopAtTagUC != null && stopAtTagUC.equals(normalTag)) {
646
// // Throw before appending to parent.
647
// // After attributes are set.
648
// // After MODIFYING_KEY is set.
649
// throw new StopException(element);
650
// }
651
// else if(stopTags != null && stopTags.contains(normalTag)) {
652
// throw new StopException(element);
653
// }
654
// // Add element to parent before children are added.
655
// // This is necessary for incremental rendering.
656
// parent.appendChild(element);
657
// if(!this.justReadEmptyElement) {
658
// ElementInfo einfo = (ElementInfo) ELEMENT_INFOS.get(normalTag);
659
// int endTagType = einfo == null ? ElementInfo.END_ELEMENT_REQUIRED : einfo.endElementType;
660
// if(endTagType != ElementInfo.END_ELEMENT_FORBIDDEN) {
661
// boolean childrenOk = einfo == null ? true : einfo.childElementOk;
662
// String newStopTag = null;
663
// Set newStopSet = einfo == null ? null : einfo.stopTags;
664
// if(newStopSet == null) {
665
// if(endTagType == ElementInfo.END_ELEMENT_OPTIONAL) {
666
// newStopTag = normalTag;
667
// }
668
// }
669
// for(;;) {
670
// try {
671
// int token = childrenOk ? this.parseToken(element, reader, newStopTag, newStopSet) : this.parseForEndTag(element, reader, tag);
672
// if(token == TOKEN_END_ELEMENT) {
673
// String lastTag = this.lastTag;
674
// if(tag.equalsIgnoreCase(lastTag)) {
675
// return TOKEN_FULL_ELEMENT;
676
// }
677
// else if(newStopTag != null) {
678
// if(newStopTag.equalsIgnoreCase(lastTag)) {
679
// return TOKEN_END_ELEMENT;
680
// }
681
// }
682
// else if(newStopSet != null) {
683
// if(newStopSet.contains(lastTag.toUpperCase())) {
684
// return TOKEN_END_ELEMENT;
685
// }
686
// }
687
// }
688
// else if(token == TOKEN_EOD) {
689
// return TOKEN_EOD;
690
// }
691
// } catch(StopException se) {
692
// // newElement does not have a parent.
693
// Element newElement = se.getElement();
694
// tag = newElement.getTagName();
695
// normalTag = tag.toUpperCase();
696
// // If a subelement throws StopException with
697
// // a tag matching the current stop tag, the exception
698
// // is rethrown (e.g. <TR><TD>blah<TR><TD>blah)
699
// if(stopAtTagUC != null && stopAtTagUC.equals(normalTag)) {
700
// throw se;
701
// }
702
// else if(stopTags != null && stopTags.contains(normalTag)) {
703
// throw se;
704
// }
705
// einfo = (ElementInfo) ELEMENT_INFOS.get(normalTag);
706
// endTagType = einfo == null ? ElementInfo.END_ELEMENT_REQUIRED : einfo.endElementType;
707
// childrenOk = einfo == null ? true : einfo.childElementOk;
708
// newStopTag = null;
709
// newStopSet = einfo == null ? null : einfo.stopTags;
710
// if(newStopSet == null) {
711
// if(endTagType == ElementInfo.END_ELEMENT_OPTIONAL) {
712
// newStopTag = normalTag;
713
// }
714
// }
715
// //Switch element
716
// element.setUserData(MODIFYING_KEY, Boolean.FALSE, null);
717
// //newElement should have been suspended.
718
// element = newElement;
719
// // Add to parent
720
// parent.appendChild(element);
721
// if(this.justReadEmptyElement) {
722
// return TOKEN_BEGIN_ELEMENT;
723
// }
724
// }
725
// }
726
// }
727
// }
728
// return TOKEN_BEGIN_ELEMENT;
729
// } finally {
730
// //TODO: Element could be null. Currently working on this.
731
// // This can inform elements to continue with notifications.
732
// // It can also cause Javascript to get processed.
733
// element.setUserData(MODIFYING_KEY, Boolean.FALSE, null);
734
// }
735
// }
736
// } finally {
737
// this.lastTag = tag;
738
// }
739
// }
740
// else {
741
// this.lastTag = null;
742
// return TOKEN_TEXT;
743
// }
744
// }
745

746     /**
747      * Reads text until the beginning of the next tag.
748      * Leaves the reader offset past the opening angle bracket.
749      * Returns null only on EOF.
750      */

751     private final StringBuffer JavaDoc readUpToTagBegin(LineNumberReader reader) throws IOException, SAXException JavaDoc{
752         StringBuffer JavaDoc sb = null;
753         int intCh;
754         while((intCh = reader.read()) != -1) {
755             char ch = (char) intCh;
756             if(ch == '<') {
757                 this.justReadTagBegin = true;
758                 this.justReadTagEnd = false;
759                 this.justReadEmptyElement = false;
760                 if(sb == null) {
761                     sb = new StringBuffer JavaDoc(0);
762                 }
763                 return sb;
764             }
765             if(sb == null) {
766                 sb = new StringBuffer JavaDoc();
767             }
768             sb.append(ch);
769         }
770         this.justReadTagBegin = false;
771         this.justReadTagEnd = false;
772         this.justReadEmptyElement = false;
773         return sb;
774     }
775
776     /**
777      * Assumes that the content is completely made up of text,
778      * and parses until an ending tag is found.
779      * @param parent
780      * @param reader
781      * @param tagName
782      * @return
783      * @throws IOException
784      */

785     private final int parseForEndTag(Node JavaDoc parent, LineNumberReader reader, String JavaDoc tagName, boolean addTextNode) throws IOException {
786         Document JavaDoc doc = this.document;
787         int intCh;
788         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
789         while((intCh = reader.read()) != -1) {
790             char ch = (char) intCh;
791             if(ch == '<') {
792                 intCh = reader.read();
793                 if(intCh != -1) {
794                     ch = (char) intCh;
795                     if(ch == '/') {
796                         StringBuffer JavaDoc tempBuffer = new StringBuffer JavaDoc();
797                         INNER:
798                         while((intCh = reader.read()) != -1) {
799                             ch = (char) intCh;
800                             if(ch == '>') {
801                                 String JavaDoc thisTag = tempBuffer.toString().trim();
802                                 if(thisTag.equalsIgnoreCase(tagName)) {
803                                     this.justReadTagBegin = false;
804                                     this.justReadTagEnd = true;
805                                     this.justReadEmptyElement = false;
806                                     this.normalLastTag = thisTag.toUpperCase();
807                                     if(addTextNode) {
808                                         String JavaDoc text = sb.toString();
809                                         if(text.length() != 0) {
810                                             Node JavaDoc textNode = doc.createTextNode(text);
811                                             parent.appendChild(textNode);
812                                         }
813                                     }
814                                     return HtmlParser.TOKEN_END_ELEMENT;
815                                 }
816                                 else {
817                                     break INNER;
818                                 }
819                             }
820                             else {
821                                 tempBuffer.append(ch);
822                             }
823                         }
824                         sb.append("</");
825                         sb.append(tempBuffer);
826                     }
827                     else {
828                         sb.append('<');
829                     }
830                 }
831             }
832             sb.append(ch);
833         }
834         this.justReadTagBegin = false;
835         this.justReadTagEnd = false;
836         this.justReadEmptyElement = false;
837         if(addTextNode) {
838             String JavaDoc text = sb.toString();
839             if(text.length() != 0) {
840                 Node JavaDoc textNode = doc.createTextNode(text);
841                 parent.appendChild(textNode);
842             }
843         }
844         return HtmlParser.TOKEN_EOD;
845     }
846     
847         /**
848      * The reader offset should be
849      * @param reader
850      * @return
851      */

852     private final String JavaDoc readTag(LineNumberReader reader) throws IOException {
853         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
854         int chInt;
855         chInt = reader.read();
856         if(chInt != -1) {
857             boolean cont = true;
858             char ch = (char) chInt;
859             if(ch == '!') {
860                 sb.append('!');
861                 chInt = reader.read();
862                 if(chInt != -1) {
863                     ch = (char) chInt;
864                     if(ch == '-') {
865                         sb.append('-');
866                         chInt = reader.read();
867                         if(chInt != -1) {
868                             ch = (char) chInt;
869                             if(ch == '-') {
870                                 sb.append('-');
871                                 cont = false;
872                             }
873                         }
874                         else {
875                             cont = false;
876                         }
877                     }
878                 }
879                 else {
880                     cont = false;
881                 }
882             }
883             else if(ch == '/') {
884                 sb.append(ch);
885                 chInt = reader.read();
886                 if(chInt != -1) {
887                     ch = (char) chInt;
888                 }
889                 else {
890                     cont = false;
891                 }
892             }
893             if(cont) {
894                 boolean lastCharSlash = false;
895                 for(;;) {
896                     if(Character.isWhitespace(ch)) {
897                         break;
898                     }
899                     else if(ch == '>') {
900                         this.justReadTagEnd = true;
901                         this.justReadTagBegin = false;
902                         this.justReadEmptyElement = lastCharSlash;
903                         String JavaDoc tag = sb.toString();
904                         return tag;
905                     }
906                     else if (ch == '/') {
907                         lastCharSlash = true;
908                     }
909                     else {
910                         if(lastCharSlash) {
911                             sb.append('/');
912                         }
913                         lastCharSlash = false;
914                         sb.append(ch);
915                     }
916                     chInt = reader.read();
917                     if(chInt == -1) {
918                         break;
919                     }
920                     ch = (char) chInt;
921                 }
922             }
923         }
924         if(sb.length() > 0) {
925             this.justReadTagEnd = false;
926             this.justReadTagBegin = false;
927             this.justReadEmptyElement = false;
928         }
929         String JavaDoc tag = sb.toString();
930         return tag;
931     }
932
933     private final StringBuffer JavaDoc passEndOfComment(LineNumberReader reader) throws IOException {
934         if(this.justReadTagEnd) {
935             return new StringBuffer JavaDoc(0);
936         }
937         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
938         OUTER:
939         for(;;) {
940             int chInt = reader.read();
941             if(chInt == -1) {
942                 break OUTER;
943             }
944             char ch = (char) chInt;
945             if(ch == '-') {
946                 chInt = reader.read();
947                 if(chInt == -1) {
948                     sb.append(ch);
949                     break OUTER;
950                 }
951                 ch = (char) chInt;
952                 if(ch == '-') {
953                     StringBuffer JavaDoc extra = null;
954                     INNER:
955                     for(;;) {
956                         chInt = reader.read();
957                         if(chInt == -1) {
958                             if(extra != null) {
959                                 sb.append(extra.toString());
960                             }
961                             break OUTER;
962                         }
963                         ch = (char) chInt;
964                         if(ch == '>') {
965                             this.justReadTagBegin = false;
966                             this.justReadTagEnd = true;
967                             return sb;
968                         }
969                         else if(Character.isWhitespace(ch)) {
970                             if(extra == null) {
971                                 extra = new StringBuffer JavaDoc();
972                                 extra.append("--");
973                             }
974                             extra.append(ch);
975                         }
976                         else {
977                             if(extra != null) {
978                                 sb.append(extra.toString());
979                             }
980                             sb.append(ch);
981                             break INNER;
982                         }
983                     }
984                 }
985                 else {
986                     sb.append('-');
987                     sb.append(ch);
988                 }
989             }
990             else {
991                 sb.append(ch);
992             }
993         }
994         if(sb.length() > 0) {
995             this.justReadTagBegin = false;
996             this.justReadTagEnd = false;
997         }
998         return sb;
999     }
1000
1001    private final void passEndOfTag(Reader reader) throws IOException {
1002        if(this.justReadTagEnd) {
1003            return;
1004        }
1005        boolean readSomething = false;
1006        for(;;) {
1007            int chInt = reader.read();
1008            if(chInt == -1) {
1009                break;
1010            }
1011            readSomething = true;
1012            char ch = (char) chInt;
1013            if(ch == '>') {
1014                this.justReadTagEnd = true;
1015                this.justReadTagBegin = false;
1016                return;
1017            }
1018        }
1019        if(readSomething) {
1020            this.justReadTagBegin = false;
1021            this.justReadTagEnd = false;
1022        }
1023    }
1024    
1025    private final boolean readAttribute(LineNumberReader reader, Element JavaDoc element) throws IOException, SAXException JavaDoc {
1026        if(this.justReadTagEnd) {
1027            return false;
1028        }
1029
1030        // Read attribute name up to '=' character.
1031
// May read several attribute names without explicit values.
1032

1033        StringBuffer JavaDoc attributeName = null;
1034        boolean blankFound = false;
1035        boolean lastCharSlash = false;
1036        for(;;) {
1037            int chInt = reader.read();
1038            if(chInt == -1) {
1039                if(attributeName != null && attributeName.length() != 0) {
1040                    String JavaDoc attributeNameStr = attributeName.toString();
1041                    element.setAttribute(attributeNameStr, attributeNameStr);
1042                    attributeName.setLength(0);
1043                }
1044                this.justReadTagBegin = false;
1045                this.justReadTagEnd = false;
1046                this.justReadEmptyElement = false;
1047                return false;
1048            }
1049            char ch = (char) chInt;
1050            if(ch == '=') {
1051                lastCharSlash = false;
1052                blankFound = false;
1053                break;
1054            }
1055            else if(ch == '>') {
1056                if(attributeName != null && attributeName.length() != 0) {
1057                    String JavaDoc attributeNameStr = attributeName.toString();
1058                    element.setAttribute(attributeNameStr, attributeNameStr);
1059                }
1060                this.justReadTagBegin = false;
1061                this.justReadTagEnd = true;
1062                this.justReadEmptyElement = lastCharSlash;
1063                return false;
1064            }
1065            else if(ch == '/') {
1066                blankFound = true;
1067                lastCharSlash = true;
1068            }
1069            else if(Character.isWhitespace(ch)) {
1070                lastCharSlash = false;
1071                blankFound = true;
1072            }
1073            else {
1074                lastCharSlash = false;
1075                if(blankFound) {
1076                    blankFound = false;
1077                    if(attributeName != null && attributeName.length() != 0) {
1078                        String JavaDoc attributeNameStr = attributeName.toString();
1079                        element.setAttribute(attributeNameStr, attributeNameStr);
1080                        attributeName.setLength(0);
1081                    }
1082                }
1083                if(attributeName == null) {
1084                    attributeName = new StringBuffer JavaDoc(6);
1085                }
1086                attributeName.append(ch);
1087            }
1088        }
1089        // Read blanks up to open quote or first non-blank.
1090
StringBuffer JavaDoc attributeValue = null;
1091        int openQuote = -1;
1092        for(;;) {
1093            int chInt = reader.read();
1094            if(chInt == -1) {
1095                break;
1096            }
1097            char ch = (char) chInt;
1098            if(ch == '>') {
1099                if(attributeName != null && attributeName.length() != 0) {
1100                    String JavaDoc attributeNameStr = attributeName.toString();
1101                    element.setAttribute(attributeNameStr, attributeNameStr);
1102                }
1103                this.justReadTagBegin = false;
1104                this.justReadTagEnd = true;
1105                this.justReadEmptyElement = lastCharSlash;
1106                return false;
1107            }
1108            else if(ch == '/') {
1109                lastCharSlash = true;
1110            }
1111            else if(Character.isWhitespace(ch)) {
1112                lastCharSlash = false;
1113            }
1114            else {
1115                lastCharSlash = false;
1116                if(ch == '"') {
1117                    openQuote = '"';
1118                }
1119                else if(ch == '\'') {
1120                    openQuote = '\'';
1121                }
1122                else {
1123                    openQuote = -1;
1124                    if(attributeValue == null) {
1125                        attributeValue = new StringBuffer JavaDoc(6);
1126                    }
1127                    attributeValue.append(ch);
1128                }
1129                break;
1130            }
1131        }
1132
1133        // Read attribute value
1134

1135        for(;;) {
1136            int chInt = reader.read();
1137            if(chInt == -1) {
1138                break;
1139            }
1140            char ch = (char) chInt;
1141            if(openQuote != -1 && ch == openQuote) {
1142                lastCharSlash = false;
1143                if(attributeName != null) {
1144                    String JavaDoc attributeNameStr = attributeName.toString();
1145                    if(attributeValue == null) {
1146                        element.setAttribute(attributeNameStr, null);
1147                    }
1148                    else {
1149                        StringBuffer JavaDoc actualAttributeValue = this.entityDecode(attributeValue, reader.getLineNumber());
1150                        element.setAttribute(attributeNameStr, actualAttributeValue.toString());
1151                    }
1152                }
1153                this.justReadTagBegin = false;
1154                this.justReadTagEnd = false;
1155                return true;
1156            }
1157            else if(openQuote == -1 && ch == '>') {
1158                if(attributeName != null) {
1159                    String JavaDoc attributeNameStr = attributeName.toString();
1160                    if(attributeValue == null) {
1161                        element.setAttribute(attributeNameStr, null);
1162                    }
1163                    else {
1164                        StringBuffer JavaDoc actualAttributeValue = this.entityDecode(attributeValue, reader.getLineNumber());
1165                        element.setAttribute(attributeNameStr, actualAttributeValue.toString());
1166                    }
1167                }
1168                this.justReadTagBegin = false;
1169                this.justReadTagEnd = true;
1170                this.justReadEmptyElement = lastCharSlash;
1171                return false;
1172            }
1173            else if(openQuote == -1 && Character.isWhitespace(ch)) {
1174                lastCharSlash = false;
1175                if(attributeName != null) {
1176                    String JavaDoc attributeNameStr = attributeName.toString();
1177                    if(attributeValue == null) {
1178                        element.setAttribute(attributeNameStr, null);
1179                    }
1180                    else {
1181                        StringBuffer JavaDoc actualAttributeValue = this.entityDecode(attributeValue, reader.getLineNumber());
1182                        element.setAttribute(attributeNameStr, actualAttributeValue.toString());
1183                    }
1184                }
1185                this.justReadTagBegin = false;
1186                this.justReadTagEnd = false;
1187                return true;
1188            }
1189            else {
1190                if(attributeValue == null) {
1191                    attributeValue = new StringBuffer JavaDoc(6);
1192                }
1193                if(lastCharSlash) {
1194                    attributeValue.append('/');
1195                }
1196                lastCharSlash = false;
1197                attributeValue.append(ch);
1198            }
1199        }
1200        this.justReadTagBegin = false;
1201        this.justReadTagEnd = false;
1202        if(attributeName != null) {
1203            String JavaDoc attributeNameStr = attributeName.toString();
1204            if(attributeValue == null) {
1205                element.setAttribute(attributeNameStr, null);
1206            }
1207            else {
1208                StringBuffer JavaDoc actualAttributeValue = this.entityDecode(attributeValue, reader.getLineNumber());
1209                element.setAttribute(attributeNameStr, actualAttributeValue.toString());
1210            }
1211        }
1212        return false;
1213    }
1214        
1215    private final StringBuffer JavaDoc entityDecode(StringBuffer JavaDoc rawText, int lineNumber) throws org.xml.sax.SAXException JavaDoc {
1216        int startIdx = 0;
1217        StringBuffer JavaDoc sb = null;
1218        for(;;) {
1219            int ampIdx = rawText.indexOf("&", startIdx);
1220            if(ampIdx == -1) {
1221                if(sb == null) {
1222                    return rawText;
1223                }
1224                else {
1225                    sb.append(rawText.substring(startIdx));
1226                    return sb;
1227                }
1228            }
1229            if(sb == null) {
1230                sb = new StringBuffer JavaDoc();
1231            }
1232            sb.append(rawText.substring(startIdx, ampIdx));
1233            int colonIdx = rawText.indexOf(";", ampIdx);
1234            if(colonIdx == -1) {
1235                sb.append('&');
1236                startIdx = ampIdx+1;
1237                continue;
1238            }
1239            String JavaDoc spec = rawText.substring(ampIdx+1, colonIdx);
1240            if(spec.startsWith("#")) {
1241                String JavaDoc number = spec.substring(1).toLowerCase();
1242                int decimal;
1243                try {
1244                    if(number.startsWith("x")) {
1245                        decimal = Integer.parseInt(number.substring(1), 16);
1246                    }
1247                    else {
1248                        decimal = Integer.parseInt(number);
1249                    }
1250                } catch(NumberFormatException JavaDoc nfe) {
1251                    if(this.errorHandler != null) {
1252                        this.errorHandler.error(new SAXParseException JavaDoc("Bad entity: " + spec, this.getLocator(lineNumber, 0)));
1253                    }
1254                    decimal = 0;
1255                }
1256                sb.append((char) decimal);
1257            }
1258            else {
1259                int chInt = this.getEntityChar(spec);
1260                if(chInt == -1) {
1261                    sb.append('&');
1262                    sb.append(spec);
1263                    sb.append(';');
1264                }
1265                else {
1266                    sb.append((char) chInt);
1267                }
1268            }
1269            startIdx = colonIdx+1;
1270        }
1271    }
1272        
1273    private final Locator JavaDoc getLocator(int lineNumber, int columnNumber) {
1274        return new LocatorImpl(this.publicId, this.systemId, lineNumber, columnNumber);
1275    }
1276    
1277    private final int getEntityChar(String JavaDoc spec) {
1278        //TODO: Declared entities
1279
Character JavaDoc c = (Character JavaDoc) ENTITIES.get(spec);
1280        if(c == null) {
1281            String JavaDoc specTL = spec.toLowerCase();
1282            c = (Character JavaDoc) ENTITIES.get(specTL);
1283            if(c == null) {
1284                return -1;
1285            }
1286        }
1287        return (int) c.charValue();
1288    }
1289}
1290
Popular Tags