KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > xml > xmlc > html > parsers > swing > SwingParser


1 /*
2  * Enhydra Java Application Server Project
3  *
4  * The contents of this file are subject to the Enhydra Public License
5  * Version 1.1 (the "License"); you may not use this file except in
6  * compliance with the License. You may obtain a copy of the License on
7  * the Enhydra web site ( http://www.enhydra.org/ ).
8  *
9  * Software distributed under the License is distributed on an "AS IS"
10  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
11  * the License for the specific terms governing rights and limitations
12  * under the License.
13  *
14  * The Initial Developer of the Enhydra Application Server is Lutris
15  * Technologies, Inc. The Enhydra Application Server and portions created
16  * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
17  * All Rights Reserved.
18  *
19  * Contributor(s):
20  *
21  * $Id: SwingParser.java,v 1.3 2005/01/26 08:29:24 jkjome Exp $
22  */

23
24 package org.enhydra.xml.xmlc.html.parsers.swing;
25
26 import java.io.IOException JavaDoc;
27 import java.io.Reader JavaDoc;
28 import java.util.Enumeration JavaDoc;
29 import java.util.HashSet JavaDoc;
30
31 import javax.swing.text.BadLocationException JavaDoc;
32 import javax.swing.text.MutableAttributeSet JavaDoc;
33 import javax.swing.text.html.HTML JavaDoc;
34 import javax.swing.text.html.HTMLEditorKit JavaDoc;
35 import javax.swing.text.html.parser.DocumentParser JavaDoc;
36
37 import org.enhydra.xml.io.ErrorReporter;
38 import org.enhydra.xml.xmlc.XMLCException;
39 import org.enhydra.xml.xmlc.codegen.JavaLang;
40 import org.enhydra.xml.xmlc.dom.XMLCDocument;
41 import org.enhydra.xml.xmlc.html.parsers.HTMLDocBuilder;
42 import org.enhydra.xml.xmlc.misc.LineNumberMap;
43 import org.enhydra.xml.xmlc.parsers.ParseTracer;
44 import org.w3c.dom.Document JavaDoc;
45 import org.w3c.dom.Node JavaDoc;
46 import org.w3c.dom.html.HTMLBodyElement;
47 import org.w3c.dom.html.HTMLHtmlElement;
48 import org.w3c.dom.html.HTMLScriptElement;
49
50 /**
51  * Interface to the Swing HTML parser. This attempts to correct problem with
52  * using the SWING HTML 3.2 parser to parse HTML 4.0 constructs and well as
53  * general bugginess in the Swing parser.
54  * <P>
55  * The known Swing problems are:
56  * <UL>
57  * <LI> Swing 1.1 didn't allow LINK elements in the header.
58  * <LI> Confusion around end forms (see processEndTagMisMatch),
59  * </UL>
60  */

61 class SwingParser extends HTMLEditorKit.ParserCallback JavaDoc {
62     /**
63      * Map constructed by reader of stream offset to filename
64      * and line number.
65      */

66     private LineNumberMap fLineNumberMap;
67
68     /**
69      * Since the Swing parser saves an exception and continues to parse,
70      * we set this flag to stop the parsing on an error.
71      */

72     private boolean fGotError = false;
73
74     /**
75      * XMLC Document object.
76      */

77     private XMLCDocument fXmlcDoc;
78
79     /**
80      * The document builder.
81      */

82     private HTMLDocBuilder fDocBuilder;
83
84     /**
85      * Error thrown to stop parser. This carries a causing exception
86      * out of the parser. This avoids the swing parser trying to handle
87      * the error and creating cascading errors instead.
88      */

89     class ParserError extends Error JavaDoc {
90         /**
91          * Causing exception.
92          */

93         private Exception JavaDoc fCause;
94
95         /**
96          * Constructor.
97          */

98         public ParserError(Exception JavaDoc cause) {
99             super(cause.getMessage());
100             fCause = cause;
101             fGotError = true;
102         }
103
104         /**
105          * Get the cause.
106          */

107         public Throwable JavaDoc getCause() {
108             return fCause;
109         }
110     }
111
112     /**
113      * Table of prefixes of SWING error messages to ignore.
114      */

115     private static final String JavaDoc[] fSwingIgnoredPrefixes = {
116         "invalid.tagatt", // Allow any attribute name.
117
"javascript.unsupported", // JavaScript should always be ignored :-)
118
"req.att",
119         "tag.unrecognized",
120         "end.unrecognized",
121         "unmatched.endtag",
122         "multi.tagatt",
123         "tag.ignoreimg"
124     };
125     
126     /**
127      * Ignore body tag errors for frame sets.
128      */

129     private static final String JavaDoc fIgnoreBodyPrefix = "tag.ignorebody";
130
131     /**
132      * Table of SWING unrecognized tags that we want to assume have and end
133      * tag.
134      */

135      private static final HashSet JavaDoc fUnrecognizedAssumeEndTag = new HashSet JavaDoc();
136
137     /**
138      * Error reporter.
139      */

140     private ErrorReporter fReporter;
141
142     /**
143      * Verbose tracing.
144      */

145     private ParseTracer fTracer;
146
147     /**
148      * Is tracing enabled?
149      */

150     private boolean fTracingEnabled;
151
152     /**
153      * Static constructor.
154      */

155     static {
156         fUnrecognizedAssumeEndTag.add("noscript");
157         fUnrecognizedAssumeEndTag.add("frameset");
158         fUnrecognizedAssumeEndTag.add("noframes");
159         fUnrecognizedAssumeEndTag.add("span");
160     }
161
162     /**
163      * Parse a file into a DOM.
164      */

165     void parse(HTMLDocBuilder docBuilder,
166                ErrorReporter errorReporter,
167                ParseTracer tracer,
168                Reader JavaDoc input,
169                LineNumberMap lineNumberMap) throws XMLCException, IOException JavaDoc {
170         fDocBuilder = docBuilder;
171         fXmlcDoc = docBuilder.getXMLCDocument();
172         fLineNumberMap = lineNumberMap;
173         fGotError = false;
174         fReporter = errorReporter;
175         fTracer = tracer;
176         fTracingEnabled = ((fTracer != null) && fTracer.enabled());
177
178         try {
179             DocumentParser JavaDoc parser = new ParserAdaptor().getParser();
180             parser.parse(input, this, true);
181         } catch (ParserError err) {
182             Throwable JavaDoc cause = err.getCause();
183             if (cause instanceof XMLCException) {
184                 throw (XMLCException)cause;
185             } else {
186                 throw new XMLCException(cause);
187             }
188         }
189         Document document = fXmlcDoc.getDocument();
190         if (fXmlcDoc.isHtmlFrameSet()) {
191             cleanupFrameSet(document);
192         }
193         checkNodes(document);
194     }
195
196     /**
197      * Generating an error message.
198      */

199     private void reportError(String JavaDoc msg, int pos) {
200         LineNumberMap.Line rec = fLineNumberMap.getLineFromOffset(pos);
201         fReporter.error(msg, rec.getFileName(), rec.getLineNum());
202     }
203
204     /**
205      * Is an HTML.Tag object representing an end-tag,
206      * as indicating by an ENDTAG attribute.
207      */

208      private boolean isEndTag(MutableAttributeSet JavaDoc attrSet) {
209          return attrSet.isDefined(HTML.Attribute.ENDTAG);
210      }
211
212     /**
213      * Determine if this one of the unknown tags that should be assumed
214      * to have an end tag.
215      */

216     private boolean assumeEndTag(HTML.Tag JavaDoc tag) {
217         return fUnrecognizedAssumeEndTag.contains(tag.toString());
218     }
219
220     /*
221      * Output tag trace information. Doesn't start a level of indentation.
222      */

223     private void traceTag(String JavaDoc message,
224                           HTML.Tag JavaDoc tag,
225                           MutableAttributeSet JavaDoc attrSet) {
226         StringBuffer JavaDoc msg = new StringBuffer JavaDoc(message);
227         msg.append(": ");
228         msg.append(tag.toString());
229         
230         if (attrSet != null) {
231             msg.append(":");
232             Enumeration JavaDoc keyes = attrSet.getAttributeNames();
233             while (keyes.hasMoreElements()) {
234                 Object JavaDoc attrKey = keyes.nextElement();
235                 Object JavaDoc attrValue = attrSet.getAttribute(attrKey);
236                 msg.append(" " + attrKey + "=\"" + attrValue + "\"");
237             }
238         }
239         fTracer.trace(msg.toString());
240     }
241
242     /*
243      * Output start tag trace info.
244      */

245     private void traceStartTag(String JavaDoc message,
246                                HTML.Tag JavaDoc tag,
247                                MutableAttributeSet JavaDoc attrSet) {
248         traceTag(message, tag, attrSet);
249         fTracer.enter();
250     }
251
252     /*
253      * Output end tag trace info.
254      */

255     private void traceEndTag(String JavaDoc message,
256                              HTML.Tag JavaDoc tag) {
257         StringBuffer JavaDoc msg = new StringBuffer JavaDoc(message);
258         msg.append(": ");
259         msg.append(tag.toString());
260         fTracer.leave();
261         fTracer.trace(msg.toString());
262     }
263
264     /**
265      * Determine if an error should be ignored based on some crude
266      * assumptions.
267      */

268     private boolean ignore(String JavaDoc errorMsg) {
269         // Swing can't handle <body> in <noframes> section
270
if (fXmlcDoc.isHtmlFrameSet()
271             && errorMsg.startsWith(fIgnoreBodyPrefix)) {
272             return true;
273         }
274         for (int i = 0; i < fSwingIgnoredPrefixes.length; i++) {
275             if (errorMsg.startsWith(fSwingIgnoredPrefixes[i])) {
276                 return true;
277             }
278         }
279         return false;
280     }
281
282     /**
283      * Generate basic close-tag mismatch message.
284      */

285     private String JavaDoc tagMisMatchMsg(HTML.Tag JavaDoc tag) {
286         return "Close tag mismatch, got </"
287             + tag.toString().toUpperCase() + ">, expected </"
288             + fDocBuilder.getCurrentNode().getNodeName() + ">";
289     }
290
291     /**
292      * Handle end tag mismatch, trying to correct the problem.
293      * Swing seems to get very confused by </FORM>, generating
294      * a end-tag event before all other tags are closed.
295      */

296     private void processEndTagMisMatch(HTML.Tag JavaDoc tag, int pos) {
297         if (tag.toString().equalsIgnoreCase("form")
298              || tag.toString().equalsIgnoreCase("span")) {
299             // </FORM> when not expected.
300
if (fTracingEnabled) {
301                 fTracer.trace("IGNORED: " + tagMisMatchMsg(tag)
302                       + ", not modifying node stack");
303             }
304             return;
305         }
306         Node JavaDoc currentNode = fDocBuilder.getCurrentNode();
307         if (currentNode.getNodeName().equalsIgnoreCase("form")
308              || currentNode.getNodeName().equalsIgnoreCase("span")) {
309             // <FORM> still on the stack, lets drop.
310
if (fTracingEnabled) {
311                 fTracer.trace("IGNORED: " + tagMisMatchMsg(tag)
312                       + ", appears to be left on stack from previous error; removing");
313             }
314             fDocBuilder.popCurrentNode();
315
316             // Try again with next node on stack.
317
handleEndTag(tag, pos);
318             return;
319         }
320
321         // Don't have a clue whats going on, maybe a bad page.
322
handleError(tagMisMatchMsg(tag), pos);
323     }
324
325     /**
326      * Process a begin tag.
327      */

328     private void processBeginTag(HTML.Tag JavaDoc tag,
329                                  MutableAttributeSet JavaDoc attrSet) {
330         String JavaDoc tagName = tag.toString();
331
332         fDocBuilder.startElement(tagName);
333         if ((attrSet != null) && (attrSet.getAttributeCount() > 0)) {
334             Enumeration JavaDoc keyes = attrSet.getAttributeNames();
335             while (keyes.hasMoreElements()) {
336                 Object JavaDoc attrKey = keyes.nextElement();
337                 Object JavaDoc attrValue = attrSet.getAttribute(attrKey);
338                 if (attrValue != null) {
339                     String JavaDoc name = attrKey.toString();
340                     if (!name.equals(ParserAdaptor.IMPLIED_PSEUDO_ATTR)) {
341                         fDocBuilder.addAttribute(name, attrValue.toString());
342                     }
343                 }
344             }
345         }
346     }
347
348     /**
349      * Process an end tag (real or synthetic).
350      */

351     private void processEndTag(HTML.Tag JavaDoc tag,
352                                int pos) {
353         Node JavaDoc currentNode = fDocBuilder.getCurrentNode();
354         if ((currentNode != null)
355             && !currentNode.getNodeName().equalsIgnoreCase(tag.toString())) {
356             processEndTagMisMatch(tag, pos);
357         } else {
358             fDocBuilder.finishElement();
359         }
360     }
361
362     /*
363      * Flush method for interface; does nothing.
364      * @see javax.swing.text.html.HTMLEditorKit.ParserCallback#flush
365      */

366     public void flush() throws BadLocationException JavaDoc {
367     }
368
369     /**
370      * Swing text handler.
371      * @see javax.swing.text.html.HTMLEditorKit.ParserCallback#handleText
372      */

373     public void handleText(char[] data, int pos) {
374         if (fGotError) {
375             return;
376         }
377         if (fTracingEnabled) {
378             fTracer.trace("TEXT: " + JavaLang.createStringConst(new String JavaDoc(data)));
379         }
380         fDocBuilder.addTextNode(new String JavaDoc(data));
381     }
382
383     /**
384      * Swing comment handler.
385      * @see javax.swing.text.html.HTMLEditorKit.ParserCallback#handleComment
386      */

387     public void handleComment(char[] data, int pos) {
388         if (fGotError) {
389             return;
390         }
391         if (fTracingEnabled) {
392             fTracer.trace("COMMENT: " + JavaLang.createStringConst(new String JavaDoc(data)));
393         }
394         fDocBuilder.addComment(new String JavaDoc(data));
395     }
396
397     /**
398      * Swing tag start handler.
399      * @see javax.swing.text.html.HTMLEditorKit.ParserCallback#handleStartTag
400      */

401     public void handleStartTag(HTML.Tag JavaDoc tag, MutableAttributeSet JavaDoc attrSet, int pos) {
402         if (fGotError) {
403             return;
404         }
405         if (fTracingEnabled) {
406             traceStartTag("BEGIN TAG", tag, attrSet);
407         }
408         processBeginTag(tag, attrSet);
409     }
410
411     /**
412      * Swing end tag start handler.
413      * @see javax.swing.text.html.HTMLEditorKit.ParserCallback#handleEndTag
414      */

415     public void handleEndTag(HTML.Tag JavaDoc tag, int pos) {
416         if (fGotError) {
417             return;
418         }
419         if (fTracingEnabled) {
420             traceEndTag("END TAG", tag);
421         }
422         processEndTag(tag, pos);
423     }
424
425     /**
426      * Swing simple tag start handler.
427      * @see javax.swing.text.html.HTMLEditorKit.ParserCallback#handleSimpleTag
428      */

429     public void handleSimpleTag(HTML.Tag JavaDoc tag,
430                                 MutableAttributeSet JavaDoc attrSet,
431                                 int pos) {
432         String JavaDoc tagName = tag.toString();
433
434         // Swing 1.3 throws in special tag call at the end.
435
if (fGotError || tagName.equals(ParserAdaptor.MAGIC_END_TAG)) {
436             return;
437         }
438         if (fTracingEnabled) {
439             traceTag("SIMPLE TAG", tag, attrSet);
440         }
441
442         /*
443          * Unknown tags get special handling where we correct the tree when
444          * the end tag is found. This is necessary, as it was treated as a
445          * simple tag when the opening tag was found. SPAN also comes through
446          * here in JDK 1.3.
447          */

448         if (isEndTag(attrSet)) {
449             if ((tag instanceof HTML.UnknownTag JavaDoc) || tagName.equals("span")) {
450                 // unrecognized or span
451
try {
452                     fDocBuilder.fixUnrecognizedTagNesting(tagName);
453                 } catch (XMLCException except) {
454                     reportError(except.toString(), pos);
455                 }
456             }
457         } else {
458             processBeginTag(tag, attrSet);
459             processEndTag(tag, pos);
460         }
461     }
462
463     /**
464      * Swing parser error handler that tries to manage the message
465      * generated by parser that should really be ignored.
466      * @see javax.swing.text.html.HTMLEditorKit.ParserCallback#handleError
467      */

468     public void handleError(String JavaDoc errorMsg,
469                             int pos) {
470         if (fGotError) {
471             return;
472         }
473         boolean ignored = ignore(errorMsg);
474         if (fTracingEnabled) {
475             if (ignored) {
476                 fTracer.trace("IGNORE: " + errorMsg + ": " + pos);
477             } else {
478                 fTracer.trace("ERROR: " + errorMsg + ": " + pos);
479             }
480         }
481         if (!ignored) {
482             reportError(errorMsg, pos);
483         }
484     }
485
486     /*
487      * Make sure a script element contains a comment. This is
488      * necessary because the SWING parser drops the contents
489      * if its not in comments.
490      */

491     private void checkScriptElement(HTMLScriptElement scriptElement) {
492         if (scriptElement.getFirstChild() == null) {
493             fReporter.error("Empty <SCRIPT> element found. The swing HTML parser discards javascript\n"
494                             + " that is not in a comment");
495         }
496     }
497
498     /**
499      * Recursively perform somes checks on a node once complete. This checks
500      * for a script elements without comments, as text in such elements is
501      * dropped.
502      */

503     private void checkNodes(Node JavaDoc node) {
504         if (node instanceof HTMLScriptElement) {
505             checkScriptElement((HTMLScriptElement)node);
506         }
507         for (Node JavaDoc child = node.getFirstChild(); child != null;
508              child = child.getNextSibling()) {
509             checkNodes(child);
510         }
511     }
512
513     /**
514      * Cleanup inserted top-level body in framesets that was added by Swing.
515      */

516     private void cleanupFrameSet(Document doc) {
517         // Find <HTML> node.
518
Node JavaDoc htmlElem = null;
519         for (Node JavaDoc child = doc.getFirstChild(); child != null;
520              child = child.getNextSibling()) {
521             if (child instanceof HTMLHtmlElement) {
522                 htmlElem = child;
523                 break;
524             }
525         }
526         if (htmlElem == null) {
527             return;
528         }
529         
530         // Find <BODY> node
531
Node JavaDoc bodyElem = null;
532         for (Node JavaDoc child = htmlElem.getFirstChild(); child != null;
533              child = child.getNextSibling()) {
534             if (child instanceof HTMLBodyElement) {
535                 bodyElem = child;
536                 break;
537             }
538         }
539         if (bodyElem == null) {
540             return;
541         }
542
543         // Move body children up one level.
544
for (Node JavaDoc child = bodyElem.getFirstChild(); child != null;) {
545             Node JavaDoc move = child;
546             child = child.getNextSibling();
547
548             bodyElem.removeChild(move);
549             htmlElem.insertBefore(move, bodyElem);
550         }
551         
552         // Finally, drop the <BODY>
553
htmlElem.removeChild(bodyElem);
554     }
555 }
556
Popular Tags