KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > cyberneko > html > HTMLTagBalancer


1 /*
2  * (C) Copyright 2002-2005, Andy Clark. All rights reserved.
3  *
4  * This file is distributed under an Apache style license. Please
5  * refer to the LICENSE file for specific details.
6  */

7
8 package org.cyberneko.html;
9
10 import java.lang.reflect.InvocationTargetException JavaDoc;
11 import java.lang.reflect.Method JavaDoc;
12
13 import org.apache.xerces.util.XMLAttributesImpl;
14 import org.apache.xerces.xni.Augmentations;
15 import org.apache.xerces.xni.NamespaceContext;
16 import org.apache.xerces.xni.QName;
17 import org.apache.xerces.xni.XMLAttributes;
18 import org.apache.xerces.xni.XMLDocumentHandler;
19 import org.apache.xerces.xni.XMLLocator;
20 import org.apache.xerces.xni.XMLResourceIdentifier;
21 import org.apache.xerces.xni.XMLString;
22 import org.apache.xerces.xni.XNIException;
23 import org.apache.xerces.xni.parser.XMLComponentManager;
24 import org.apache.xerces.xni.parser.XMLConfigurationException;
25 import org.apache.xerces.xni.parser.XMLDocumentFilter;
26 import org.apache.xerces.xni.parser.XMLDocumentSource;
27                       
28 /**
29  * Balances tags in an HTML document. This component receives document events
30  * and tries to correct many common mistakes that human (and computer) HTML
31  * document authors make. This tag balancer can:
32  * <ul>
33  * <li>add missing parent elements;
34  * <li>automatically close elements with optional end tags; and
35  * <li>handle mis-matched inline element tags.
36  * </ul>
37  * <p>
38  * This component recognizes the following features:
39  * <ul>
40  * <li>http://cyberneko.org/html/features/augmentations
41  * <li>http://cyberneko.org/html/features/report-errors
42  * <li>http://cyberneko.org/html/features/balance-tags/document-fragment
43  * <li>http://cyberneko.org/html/features/balance-tags/ignore-outside-content
44  * </ul>
45  * <p>
46  * This component recognizes the following properties:
47  * <ul>
48  * <li>http://cyberneko.org/html/properties/names/elems
49  * <li>http://cyberneko.org/html/properties/names/attrs
50  * <li>http://cyberneko.org/html/properties/error-reporter
51  * </ul>
52  *
53  * @see HTMLElements
54  *
55  * @author Andy Clark
56  *
57  * @version $Id: HTMLTagBalancer.java,v 1.20 2005/02/14 04:06:22 andyc Exp $
58  */

59 public class HTMLTagBalancer
60     implements XMLDocumentFilter, HTMLComponent {
61
62     //
63
// Constants
64
//
65

66     // features
67

68     /** Namespaces. */
69     protected static final String JavaDoc NAMESPACES = "http://xml.org/sax/features/namespaces";
70
71     /** Include infoset augmentations. */
72     protected static final String JavaDoc AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
73
74     /** Report errors. */
75     protected static final String JavaDoc REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors";
76
77     /** Document fragment balancing only (deprecated). */
78     protected static final String JavaDoc DOCUMENT_FRAGMENT_DEPRECATED = "http://cyberneko.org/html/features/document-fragment";
79
80     /** Document fragment balancing only. */
81     protected static final String JavaDoc DOCUMENT_FRAGMENT = "http://cyberneko.org/html/features/balance-tags/document-fragment";
82
83     /** Ignore outside content. */
84     protected static final String JavaDoc IGNORE_OUTSIDE_CONTENT = "http://cyberneko.org/html/features/balance-tags/ignore-outside-content";
85
86     /** Recognized features. */
87     private static final String JavaDoc[] RECOGNIZED_FEATURES = {
88         NAMESPACES,
89         AUGMENTATIONS,
90         REPORT_ERRORS,
91         DOCUMENT_FRAGMENT_DEPRECATED,
92         DOCUMENT_FRAGMENT,
93         IGNORE_OUTSIDE_CONTENT,
94     };
95
96     /** Recognized features defaults. */
97     private static final Boolean JavaDoc[] RECOGNIZED_FEATURES_DEFAULTS = {
98         null,
99         null,
100         null,
101         null,
102         Boolean.FALSE,
103         Boolean.FALSE,
104     };
105
106     // properties
107

108     /** Modify HTML element names: { "upper", "lower", "default" }. */
109     protected static final String JavaDoc NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems";
110
111     /** Modify HTML attribute names: { "upper", "lower", "default" }. */
112     protected static final String JavaDoc NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs";
113     
114     /** Error reporter. */
115     protected static final String JavaDoc ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter";
116
117     /** Recognized properties. */
118     private static final String JavaDoc[] RECOGNIZED_PROPERTIES = {
119         NAMES_ELEMS,
120         NAMES_ATTRS,
121         ERROR_REPORTER,
122     };
123
124     /** Recognized properties defaults. */
125     private static final Object JavaDoc[] RECOGNIZED_PROPERTIES_DEFAULTS = {
126         null,
127         null,
128         null,
129     };
130
131     // modify HTML names
132

133     /** Don't modify HTML names. */
134     protected static final short NAMES_NO_CHANGE = 0;
135
136     /** Match HTML element names. */
137     protected static final short NAMES_MATCH = 0;
138
139     /** Uppercase HTML names. */
140     protected static final short NAMES_UPPERCASE = 1;
141
142     /** Lowercase HTML names. */
143     protected static final short NAMES_LOWERCASE = 2;
144
145     // static vars
146

147     /** Synthesized event info item. */
148     protected static final HTMLEventInfo SYNTHESIZED_ITEM =
149         new HTMLEventInfo.SynthesizedItem();
150
151     //
152
// Data
153
//
154

155     // features
156

157     /** Namespaces. */
158     protected boolean fNamespaces;
159
160     /** Include infoset augmentations. */
161     protected boolean fAugmentations;
162     
163     /** Report errors. */
164     protected boolean fReportErrors;
165
166     /** Document fragment balancing only. */
167     protected boolean fDocumentFragment;
168
169     /** Ignore outside content. */
170     protected boolean fIgnoreOutsideContent;
171
172     // properties
173

174     /** Modify HTML element names. */
175     protected short fNamesElems;
176
177     /** Modify HTML attribute names. */
178     protected short fNamesAttrs;
179
180     /** Error reporter. */
181     protected HTMLErrorReporter fErrorReporter;
182
183     // connections
184

185     /** The document source. */
186     protected XMLDocumentSource fDocumentSource;
187
188     /** The document handler. */
189     protected XMLDocumentHandler fDocumentHandler;
190
191     // state
192

193     /** The element stack. */
194     protected final InfoStack fElementStack = new InfoStack();
195
196     /** The inline stack. */
197     protected final InfoStack fInlineStack = new InfoStack();
198
199     /** True if seen anything. Important for xml declaration. */
200     protected boolean fSeenAnything;
201
202     /** True if root element has been seen. */
203     protected boolean fSeenDoctype;
204
205     /** True if root element has been seen. */
206     protected boolean fSeenRootElement;
207
208     /**
209      * True if seen the end of the document element. In other words,
210      * this variable is set to false <em>until</em> the end &lt;/HTML&gt;
211      * tag is seen (or synthesized). This is used to ensure that
212      * extraneous events after the end of the document element do not
213      * make the document stream ill-formed.
214      */

215     protected boolean fSeenRootElementEnd;
216
217     /** True if seen &lt;head&lt; element. */
218     protected boolean fSeenHeadElement;
219
220     /** True if seen &lt;body&lt; element. */
221     protected boolean fSeenBodyElement;
222
223     // temp vars
224

225     /** A qualified name. */
226     private final QName fQName = new QName();
227
228     /** Empty attributes. */
229     private final XMLAttributes fEmptyAttrs = new XMLAttributesImpl();
230
231     /** Augmentations. */
232     private final HTMLAugmentations fInfosetAugs = new HTMLAugmentations();
233
234     //
235
// HTMLComponent methods
236
//
237

238     /** Returns the default state for a feature. */
239     public Boolean JavaDoc getFeatureDefault(String JavaDoc featureId) {
240         int length = RECOGNIZED_FEATURES != null ? RECOGNIZED_FEATURES.length : 0;
241         for (int i = 0; i < length; i++) {
242             if (RECOGNIZED_FEATURES[i].equals(featureId)) {
243                 return RECOGNIZED_FEATURES_DEFAULTS[i];
244             }
245         }
246         return null;
247     } // getFeatureDefault(String):Boolean
248

249     /** Returns the default state for a property. */
250     public Object JavaDoc getPropertyDefault(String JavaDoc propertyId) {
251         int length = RECOGNIZED_PROPERTIES != null ? RECOGNIZED_PROPERTIES.length : 0;
252         for (int i = 0; i < length; i++) {
253             if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
254                 return RECOGNIZED_PROPERTIES_DEFAULTS[i];
255             }
256         }
257         return null;
258     } // getPropertyDefault(String):Object
259

260     //
261
// XMLComponent methods
262
//
263

264     /** Returns recognized features. */
265     public String JavaDoc[] getRecognizedFeatures() {
266         return RECOGNIZED_FEATURES;
267     } // getRecognizedFeatures():String[]
268

269     /** Returns recognized properties. */
270     public String JavaDoc[] getRecognizedProperties() {
271         return RECOGNIZED_PROPERTIES;
272     } // getRecognizedProperties():String[]
273

274     /** Resets the component. */
275     public void reset(XMLComponentManager manager)
276         throws XMLConfigurationException {
277
278         // get features
279
fNamespaces = manager.getFeature(NAMESPACES);
280         fAugmentations = manager.getFeature(AUGMENTATIONS);
281         fReportErrors = manager.getFeature(REPORT_ERRORS);
282         fDocumentFragment = manager.getFeature(DOCUMENT_FRAGMENT) ||
283                             manager.getFeature(DOCUMENT_FRAGMENT_DEPRECATED);
284         fIgnoreOutsideContent = manager.getFeature(IGNORE_OUTSIDE_CONTENT);
285
286         // get properties
287
fNamesElems = getNamesValue(String.valueOf(manager.getProperty(NAMES_ELEMS)));
288         fNamesAttrs = getNamesValue(String.valueOf(manager.getProperty(NAMES_ATTRS)));
289         fErrorReporter = (HTMLErrorReporter)manager.getProperty(ERROR_REPORTER);
290
291     } // reset(XMLComponentManager)
292

293     /** Sets a feature. */
294     public void setFeature(String JavaDoc featureId, boolean state)
295         throws XMLConfigurationException {
296
297         if (featureId.equals(AUGMENTATIONS)) {
298             fAugmentations = state;
299             return;
300         }
301         if (featureId.equals(REPORT_ERRORS)) {
302             fReportErrors = state;
303             return;
304         }
305         if (featureId.equals(IGNORE_OUTSIDE_CONTENT)) {
306             fIgnoreOutsideContent = state;
307             return;
308         }
309
310     } // setFeature(String,boolean)
311

312     /** Sets a property. */
313     public void setProperty(String JavaDoc propertyId, Object JavaDoc value)
314         throws XMLConfigurationException {
315     
316         if (propertyId.equals(NAMES_ELEMS)) {
317             fNamesElems = getNamesValue(String.valueOf(value));
318             return;
319         }
320
321         if (propertyId.equals(NAMES_ATTRS)) {
322             fNamesAttrs = getNamesValue(String.valueOf(value));
323             return;
324         }
325
326     } // setProperty(String,Object)
327

328     //
329
// XMLDocumentSource methods
330
//
331

332     /** Sets the document handler. */
333     public void setDocumentHandler(XMLDocumentHandler handler) {
334         fDocumentHandler = handler;
335     } // setDocumentHandler(XMLDocumentHandler)
336

337     // @since Xerces 2.1.0
338

339     /** Returns the document handler. */
340     public XMLDocumentHandler getDocumentHandler() {
341         return fDocumentHandler;
342     } // getDocumentHandler():XMLDocumentHandler
343

344     //
345
// XMLDocumentHandler methods
346
//
347

348     // since Xerces-J 2.2.0
349

350     /** Start document. */
351     public void startDocument(XMLLocator locator, String JavaDoc encoding,
352                               NamespaceContext nscontext, Augmentations augs)
353         throws XNIException {
354
355         // reset state
356
fElementStack.top = 0;
357         fSeenAnything = false;
358         fSeenDoctype = false;
359         fSeenRootElement = false;
360         fSeenRootElementEnd = false;
361         fSeenHeadElement = false;
362         fSeenBodyElement = false;
363
364         // pass on event
365
if (fDocumentHandler != null) {
366             try {
367                 // NOTE: Hack to allow the default filter to work with
368
// old and new versions of the XNI document handler
369
// interface. -Ac
370
Class JavaDoc cls = fDocumentHandler.getClass();
371                 Class JavaDoc[] types = {
372                     XMLLocator.class, String JavaDoc.class,
373                     NamespaceContext.class, Augmentations.class
374                 };
375                 Method JavaDoc method = cls.getMethod("startDocument", types);
376                 Object JavaDoc[] params = {
377                     locator, encoding,
378                     nscontext, augs
379                 };
380                 method.invoke(fDocumentHandler, params);
381             }
382             catch (IllegalAccessException JavaDoc e) {
383                 throw new XNIException(e);
384             }
385             catch (InvocationTargetException JavaDoc e) {
386                 throw new XNIException(e);
387             }
388             catch (NoSuchMethodException JavaDoc e) {
389                 try {
390                     // NOTE: Hack to allow the default filter to work with
391
// old and new versions of the XNI document handler
392
// interface. -Ac
393
Class JavaDoc cls = fDocumentHandler.getClass();
394                     Class JavaDoc[] types = {
395                         XMLLocator.class, String JavaDoc.class, Augmentations.class
396                     };
397                     Method JavaDoc method = cls.getMethod("startDocument", types);
398                     Object JavaDoc[] params = {
399                         locator, encoding, augs
400                     };
401                     method.invoke(fDocumentHandler, params);
402                 }
403                 catch (IllegalAccessException JavaDoc ex) {
404                     // NOTE: Should never reach here!
405
throw new XNIException(ex);
406                 }
407                 catch (InvocationTargetException JavaDoc ex) {
408                     // NOTE: Should never reach here!
409
throw new XNIException(ex);
410                 }
411                 catch (NoSuchMethodException JavaDoc ex) {
412                     // NOTE: Should never reach here!
413
throw new XNIException(ex);
414                 }
415             }
416         }
417     
418     } // startDocument(XMLLocator,String,Augmentations)
419

420     // old methods
421

422     /** XML declaration. */
423     public void xmlDecl(String JavaDoc version, String JavaDoc encoding, String JavaDoc standalone,
424                         Augmentations augs) throws XNIException {
425         if (!fSeenAnything && fDocumentHandler != null) {
426             fDocumentHandler.xmlDecl(version, encoding, standalone, augs);
427         }
428     } // xmlDecl(String,String,String,Augmentations)
429

430     /** Doctype declaration. */
431     public void doctypeDecl(String JavaDoc rootElementName, String JavaDoc publicId, String JavaDoc systemId,
432                             Augmentations augs) throws XNIException {
433         fSeenAnything = true;
434         if (fReportErrors) {
435             if (fSeenRootElement) {
436                 fErrorReporter.reportError("HTML2010", null);
437             }
438             else if (fSeenDoctype) {
439                 fErrorReporter.reportError("HTML2011", null);
440             }
441         }
442         if (!fSeenRootElement && !fSeenDoctype) {
443             fSeenDoctype = true;
444             if (fDocumentHandler != null) {
445                 fDocumentHandler.doctypeDecl(rootElementName, publicId, systemId, augs);
446             }
447         }
448     } // doctypeDecl(String,String,String,Augmentations)
449

450     /** End document. */
451     public void endDocument(Augmentations augs) throws XNIException {
452
453         // handle empty document
454
if (!fSeenRootElement && !fDocumentFragment) {
455             if (fReportErrors) {
456                 fErrorReporter.reportError("HTML2000", null);
457             }
458             String JavaDoc ename = modifyName("html", fNamesElems);
459             fQName.setValues(null, ename, ename, null);
460             if (fDocumentHandler != null) {
461                 callStartElement(fQName, emptyAttributes(), synthesizedAugs());
462                 callEndElement(fQName, synthesizedAugs());
463             }
464         }
465
466         // pop all remaining elements
467
else {
468             int length = fElementStack.top;
469             for (int i = 0; i < length; i++) {
470                 Info info = fElementStack.pop();
471                 if (fReportErrors) {
472                     String JavaDoc ename = info.qname.rawname;
473                     fErrorReporter.reportWarning("HTML2001", new Object JavaDoc[]{ename});
474                 }
475                 if (fDocumentHandler != null) {
476                     callEndElement(info.qname, synthesizedAugs());
477                 }
478             }
479         }
480
481         // call handler
482
if (fDocumentHandler != null) {
483             fDocumentHandler.endDocument(augs);
484         }
485
486     } // endDocument(Augmentations)
487

488     /** Comment. */
489     public void comment(XMLString text, Augmentations augs) throws XNIException {
490         fSeenAnything = true;
491         if (fDocumentHandler != null) {
492             fDocumentHandler.comment(text, augs);
493         }
494     } // comment(XMLString,Augmentations)
495

496     /** Processing instruction. */
497     public void processingInstruction(String JavaDoc target, XMLString data,
498                                       Augmentations augs) throws XNIException {
499         fSeenAnything = true;
500         if (fDocumentHandler != null) {
501             fDocumentHandler.processingInstruction(target, data, augs);
502         }
503     } // processingInstruction(String,XMLString,Augmentations)
504

505     /** Start element. */
506     public void startElement(QName elem, XMLAttributes attrs, Augmentations augs)
507         throws XNIException {
508         fSeenAnything = true;
509         
510         // check for end of document
511
if (fSeenRootElementEnd) {
512             return;
513         }
514
515         // get element information
516
HTMLElements.Element element = getElement(elem.rawname);
517
518         // ignore multiple html, head, body elements
519
if (fSeenRootElement && element.code == HTMLElements.HTML) {
520             return;
521         }
522         if (element.code == HTMLElements.HEAD) {
523             if (fSeenHeadElement) {
524                 return;
525             }
526             fSeenHeadElement = true;
527         }
528         if (element.code == HTMLElements.BODY) {
529             if (fSeenBodyElement) {
530                 return;
531             }
532             fSeenBodyElement = true;
533         }
534
535         // check proper parent
536
if (element.parent != null) {
537             if (!fSeenRootElement && !fDocumentFragment) {
538                 String JavaDoc pname = element.parent[0].name;
539                 pname = modifyName(pname, fNamesElems);
540                 if (fReportErrors) {
541                     String JavaDoc ename = elem.rawname;
542                     fErrorReporter.reportWarning("HTML2002", new Object JavaDoc[]{ename,pname});
543                 }
544                 QName qname = new QName(null, pname, pname, null);
545                 startElement(qname, null, synthesizedAugs());
546             }
547             else {
548                 HTMLElements.Element pelement = element.parent[0];
549                 if (pelement.code != HTMLElements.HEAD || (!fSeenBodyElement && !fDocumentFragment)) {
550                     int depth = getParentDepth(element.parent, element.bounds);
551                     if (depth == -1) {
552                         String JavaDoc pname = pelement.name;
553                         pname = modifyName(pname, fNamesElems);
554                         int pdepth = getParentDepth(pelement.parent, pelement.bounds);
555                         if (pdepth != -1) {
556                             QName qname = new QName(null, pname, pname, null);
557                             if (fReportErrors) {
558                                 String JavaDoc ename = elem.rawname;
559                                 fErrorReporter.reportWarning("HTML2004", new Object JavaDoc[]{ename,pname});
560                             }
561                             startElement(qname, null, synthesizedAugs());
562                         }
563                     }
564                 }
565             }
566         }
567
568         // if block element, save immediate parent inline elements
569
int depth = 0;
570         if (element.flags == 0) {
571             int length = fElementStack.top;
572             fInlineStack.top = 0;
573             for (int i = length - 1; i >= 0; i--) {
574                 Info info = fElementStack.data[i];
575                 if (!info.element.isInline()) {
576                     break;
577                 }
578                 fInlineStack.push(info);
579                 endElement(info.qname, synthesizedAugs());
580             }
581             depth = fInlineStack.top;
582         }
583
584         // close previous elements
585
if (element.closes != null) {
586             int length = fElementStack.top;
587             for (int i = length - 1; i >= 0; i--) {
588                 Info info = fElementStack.data[i];
589
590                 // does it close the element we're looking at?
591
if (element.closes(info.element.code)) {
592                     if (fReportErrors) {
593                         String JavaDoc ename = elem.rawname;
594                         String JavaDoc iname = info.qname.rawname;
595                         fErrorReporter.reportWarning("HTML2005", new Object JavaDoc[]{ename,iname});
596                     }
597                     for (int j = length - 1; j >= i; j--) {
598                         info = fElementStack.pop();
599                         if (fDocumentHandler != null) {
600                             // PATCH: Marc-André Morissette
601
callEndElement(info.qname, synthesizedAugs());
602                         }
603                     }
604                     length = i;
605                     continue;
606                 }
607                 
608                 // should we stop searching?
609
boolean container = info.element.isContainer();
610                 boolean parent = false;
611                 if (!container) {
612                     for (int j = 0; j < element.parent.length; j++) {
613                         parent = parent || info.element.code == element.parent[j].code;
614                     }
615                 }
616                 if (container || parent) {
617                     break;
618                 }
619             }
620         }
621
622         // call handler
623
fSeenRootElement = true;
624         if (element != null && element.isEmpty()) {
625             if (attrs == null) {
626                 attrs = emptyAttributes();
627             }
628             if (fDocumentHandler != null) {
629                 fDocumentHandler.emptyElement(elem, attrs, augs);
630             }
631         }
632         else {
633             boolean inline = element != null && element.isInline();
634             fElementStack.push(new Info(element, elem, inline ? attrs : null));
635             if (attrs == null) {
636                 attrs = emptyAttributes();
637             }
638             if (fDocumentHandler != null) {
639                 callStartElement(elem, attrs, augs);
640             }
641         }
642
643         // re-open inline elements
644
for (int i = 0; i < depth; i++) {
645             Info info = fInlineStack.pop();
646             startElement(info.qname, info.attributes, synthesizedAugs());
647         }
648
649     } // startElement(QName,XMLAttributes,Augmentations)
650

651     /** Empty element. */
652     public void emptyElement(QName elem, XMLAttributes attrs, Augmentations augs)
653         throws XNIException {
654         startElement(elem, attrs, augs);
655         endElement(elem, augs);
656     } // emptyElement(QName,XMLAttributes,Augmentations)
657

658     /** Start entity. */
659     public void startGeneralEntity(String JavaDoc name,
660                                    XMLResourceIdentifier id,
661                                    String JavaDoc encoding,
662                                    Augmentations augs) throws XNIException {
663         fSeenAnything = true;
664
665         // check for end of document
666
if (fSeenRootElementEnd) {
667             return;
668         }
669
670         // insert body, if needed
671
if (!fDocumentFragment) {
672             boolean insertBody = !fSeenRootElement;
673             if (!insertBody) {
674                 Info info = fElementStack.peek();
675                 if (info.element.code == HTMLElements.HEAD ||
676                     info.element.code == HTMLElements.HTML) {
677                     String JavaDoc hname = modifyName("head", fNamesElems);
678                     String JavaDoc bname = modifyName("body", fNamesElems);
679                     if (fReportErrors) {
680                         fErrorReporter.reportWarning("HTML2009", new Object JavaDoc[]{hname,bname});
681                     }
682                     fQName.setValues(null, hname, hname, null);
683                     endElement(fQName, synthesizedAugs());
684                     insertBody = true;
685                 }
686             }
687             if (insertBody) {
688                 String JavaDoc ename = modifyName("body", fNamesElems);
689                 fQName.setValues(null, ename, ename, null);
690                 if (fReportErrors) {
691                     fErrorReporter.reportWarning("HTML2006", new Object JavaDoc[]{ename});
692                 }
693                 startElement(fQName, null, synthesizedAugs());
694             }
695         }
696         
697         // call handler
698
if (fDocumentHandler != null) {
699             fDocumentHandler.startGeneralEntity(name, id, encoding, augs);
700         }
701
702     } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
703

704     /** Text declaration. */
705     public void textDecl(String JavaDoc version, String JavaDoc encoding, Augmentations augs)
706         throws XNIException {
707         fSeenAnything = true;
708         
709         // check for end of document
710
if (fSeenRootElementEnd) {
711             return;
712         }
713
714         // call handler
715
if (fDocumentHandler != null) {
716             fDocumentHandler.textDecl(version, encoding, augs);
717         }
718
719     } // textDecl(String,String,Augmentations)
720

721     /** End entity. */
722     public void endGeneralEntity(String JavaDoc name, Augmentations augs) throws XNIException {
723         
724         // check for end of document
725
if (fSeenRootElementEnd) {
726             return;
727         }
728
729         // call handler
730
if (fDocumentHandler != null) {
731             fDocumentHandler.endGeneralEntity(name, augs);
732         }
733
734     } // endGeneralEntity(String,Augmentations)
735

736     /** Start CDATA section. */
737     public void startCDATA(Augmentations augs) throws XNIException {
738         fSeenAnything = true;
739         
740         // check for end of document
741
if (fSeenRootElementEnd) {
742             return;
743         }
744
745         // call handler
746
if (fDocumentHandler != null) {
747             fDocumentHandler.startCDATA(augs);
748         }
749
750     } // startCDATA(Augmentations)
751

752     /** End CDATA section. */
753     public void endCDATA(Augmentations augs) throws XNIException {
754
755         // check for end of document
756
if (fSeenRootElementEnd) {
757             return;
758         }
759
760         // call handler
761
if (fDocumentHandler != null) {
762             fDocumentHandler.endCDATA(augs);
763         }
764
765     } // endCDATA(Augmentations)
766

767     /** Characters. */
768     public void characters(XMLString text, Augmentations augs) throws XNIException {
769
770         // check for end of document
771
if (fSeenRootElementEnd) {
772             return;
773         }
774
775         // is this text whitespace?
776
boolean whitespace = true;
777         for (int i = 0; i < text.length; i++) {
778             if (!Character.isWhitespace(text.ch[text.offset + i])) {
779                 whitespace = false;
780                 break;
781             }
782         }
783
784         if (!fDocumentFragment) {
785             // handle bare characters
786
if (!fSeenRootElement) {
787                 if (whitespace) {
788                     return;
789                 }
790                 String JavaDoc ename = modifyName("body", fNamesElems);
791                 fQName.setValues(null, ename, ename, null);
792                 if (fReportErrors) {
793                     fErrorReporter.reportWarning("HTML2006", new Object JavaDoc[]{ename});
794                 }
795                 startElement(fQName, null, synthesizedAugs());
796             }
797
798             // handle character content in head
799
// NOTE: This fequently happens when the document looks like:
800
// <title>Title</title>
801
// And here's some text.
802
else if (!whitespace) {
803                 Info info = fElementStack.peek();
804                 if (info.element.code == HTMLElements.HEAD ||
805                     info.element.code == HTMLElements.HTML) {
806                     String JavaDoc hname = modifyName("head", fNamesElems);
807                     String JavaDoc bname = modifyName("body", fNamesElems);
808                     if (fReportErrors) {
809                         fErrorReporter.reportWarning("HTML2009", new Object JavaDoc[]{hname,bname});
810                     }
811                     fQName.setValues(null, hname, hname, null);
812                     endElement(fQName, synthesizedAugs());
813                     fQName.setValues(null, bname, bname, null);
814                     startElement(fQName, null, synthesizedAugs());
815                 }
816             }
817         }
818
819         // call handler
820
if (fDocumentHandler != null) {
821             fDocumentHandler.characters(text, augs);
822         }
823
824     } // characters(XMLString,Augmentations)
825

826     /** Ignorable whitespace. */
827     public void ignorableWhitespace(XMLString text, Augmentations augs)
828         throws XNIException {
829         characters(text, augs);
830     } // ignorableWhitespace(XMLString,Augmentations)
831

832     /** End element. */
833     public void endElement(QName element, Augmentations augs) throws XNIException {
834         
835         // is there anything to do?
836
if (fSeenRootElementEnd) {
837             return;
838         }
839         
840         // get element information
841
HTMLElements.Element elem = getElement(element.rawname);
842
843         // do we ignore outside content?
844
if (!fIgnoreOutsideContent &&
845             (elem.code == HTMLElements.BODY || elem.code == HTMLElements.HTML)) {
846             return;
847         }
848
849         // check for end of document
850
if (elem.code == HTMLElements.HTML) {
851             fSeenRootElementEnd = true;
852         }
853
854         // empty element
855
int depth = getElementDepth(elem);
856         if (depth == -1 && elem.code == HTMLElements.P) {
857             startElement(element, emptyAttributes(), synthesizedAugs());
858             endElement(element, augs);
859             return;
860         }
861
862         // find unbalanced inline elements
863
if (depth > 1 && elem.isInline()) {
864             int size = fElementStack.top;
865             fInlineStack.top = 0;
866             for (int i = 0; i < depth - 1; i++) {
867                 Info info = fElementStack.data[size - i - 1];
868                 HTMLElements.Element pelem = info.element;
869                 if (pelem.isInline()) {
870                     // NOTE: I don't have to make a copy of the info because
871
// it will just be popped off of the element stack
872
// as soon as we close it, anyway.
873
fInlineStack.push(info);
874                 }
875             }
876         }
877
878         // close children up to appropriate element
879
for (int i = 0; i < depth; i++) {
880             Info info = fElementStack.pop();
881             if (fReportErrors && i < depth - 1) {
882                 String JavaDoc ename = modifyName(element.rawname, fNamesElems);
883                 String JavaDoc iname = info.qname.rawname;
884                 fErrorReporter.reportWarning("HTML2007", new Object JavaDoc[]{ename,iname});
885             }
886             if (fDocumentHandler != null) {
887                 // PATCH: Marc-André Morissette
888
callEndElement(info.qname, i < depth - 1 ? synthesizedAugs() : augs);
889             }
890         }
891
892         // re-open inline elements
893
if (depth > 1) {
894             int size = fInlineStack.top;
895             for (int i = 0; i < size; i++) {
896                 Info info = (Info)fInlineStack.pop();
897                 XMLAttributes attributes = info.attributes;
898                 if (fReportErrors) {
899                     String JavaDoc iname = info.qname.rawname;
900                     fErrorReporter.reportWarning("HTML2008", new Object JavaDoc[]{iname});
901                 }
902                 startElement(info.qname, attributes, synthesizedAugs());
903             }
904         }
905
906     } // endElement(QName,Augmentations)
907

908     // @since Xerces 2.1.0
909

910     /** Sets the document source. */
911     public void setDocumentSource(XMLDocumentSource source) {
912         fDocumentSource = source;
913     } // setDocumentSource(XMLDocumentSource)
914

915     /** Returns the document source. */
916     public XMLDocumentSource getDocumentSource() {
917         return fDocumentSource;
918     } // getDocumentSource():XMLDocumentSource
919

920     // removed since Xerces-J 2.3.0
921

922     /** Start document. */
923     public void startDocument(XMLLocator locator, String JavaDoc encoding, Augmentations augs)
924         throws XNIException {
925         startDocument(locator, encoding, null, augs);
926     } // startDocument(XMLLocator,String,Augmentations)
927

928     /** Start prefix mapping. */
929     public void startPrefixMapping(String JavaDoc prefix, String JavaDoc uri, Augmentations augs)
930         throws XNIException {
931         
932         // check for end of document
933
if (fSeenRootElementEnd) {
934             return;
935         }
936
937         // call handler
938
if (fDocumentHandler != null) {
939             Class JavaDoc cls = fDocumentHandler.getClass();
940             Class JavaDoc[] types = { String JavaDoc.class, String JavaDoc.class, Augmentations.class };
941             try {
942                 Method JavaDoc method = cls.getMethod("startPrefixMapping", types);
943                 Object JavaDoc[] args = { prefix, uri, augs };
944                 method.invoke(fDocumentHandler, args);
945             }
946             catch (NoSuchMethodException JavaDoc e) {
947                 // ignore
948
}
949             catch (IllegalAccessException JavaDoc e) {
950                 // ignore
951
}
952             catch (InvocationTargetException JavaDoc e) {
953                 // ignore
954
}
955         }
956     
957     } // startPrefixMapping(String,String,Augmentations)
958

959     /** End prefix mapping. */
960     public void endPrefixMapping(String JavaDoc prefix, Augmentations augs)
961         throws XNIException {
962         
963         // check for end of document
964
if (fSeenRootElementEnd) {
965             return;
966         }
967
968         // call handler
969
if (fDocumentHandler != null) {
970             Class JavaDoc cls = fDocumentHandler.getClass();
971             Class JavaDoc[] types = { String JavaDoc.class, Augmentations.class };
972             try {
973                 Method JavaDoc method = cls.getMethod("endPrefixMapping", types);
974                 Object JavaDoc[] args = { prefix, augs };
975                 method.invoke(fDocumentHandler, args);
976             }
977             catch (NoSuchMethodException JavaDoc e) {
978                 // ignore
979
}
980             catch (IllegalAccessException JavaDoc e) {
981                 // ignore
982
}
983             catch (InvocationTargetException JavaDoc e) {
984                 // ignore
985
}
986         }
987     
988     } // endPrefixMapping(String,Augmentations)
989

990     //
991
// Protected methods
992
//
993

994     /** Returns an HTML element. */
995     protected HTMLElements.Element getElement(String JavaDoc name) {
996         if (fNamespaces) {
997             int index = name.indexOf(':');
998             if (index != -1) {
999                 name = name.substring(index+1);
1000            }
1001        }
1002        return HTMLElements.getElement(name);
1003    } // getElement(String):HTMLElements.Element
1004

1005    /** Call document handler start element. */
1006    protected final void callStartElement(QName element, XMLAttributes attrs,
1007                                          Augmentations augs)
1008        throws XNIException {
1009        fDocumentHandler.startElement(element, attrs, augs);
1010    } // callStartElement(QName,XMLAttributes,Augmentations)
1011

1012    /** Call document handler end element. */
1013    protected final void callEndElement(QName element, Augmentations augs)
1014        throws XNIException {
1015        fDocumentHandler.endElement(element, augs);
1016    } // callEndElement(QName,Augmentations)
1017

1018    /**
1019     * Returns the depth of the open tag associated with the specified
1020     * element name or -1 if no matching element is found.
1021     *
1022     * @param element The element.
1023     */

1024    protected final int getElementDepth(HTMLElements.Element element) {
1025        final boolean container = element.isContainer();
1026        int depth = -1;
1027        for (int i = fElementStack.top - 1; i >= 0; i--) {
1028            Info info = fElementStack.data[i];
1029            if (info.element.code == element.code) {
1030                depth = fElementStack.top - i;
1031                break;
1032            }
1033            if (!container && info.element.isBlock()) {
1034                break;
1035            }
1036        }
1037        return depth;
1038    } // getElementDepth(HTMLElements.Element)
1039

1040    /**
1041     * Returns the depth of the open tag associated with the specified
1042     * element parent names or -1 if no matching element is found.
1043     *
1044     * @param parents The parent elements.
1045     */

1046    protected int getParentDepth(HTMLElements.Element[] parents, short bounds) {
1047        if (parents != null) {
1048            for (int i = fElementStack.top - 1; i >= 0; i--) {
1049                Info info = fElementStack.data[i];
1050                if (info.element.code == bounds) {
1051                    break;
1052                }
1053                for (int j = 0; j < parents.length; j++) {
1054                    if (info.element.code == parents[j].code) {
1055                        return fElementStack.top - i;
1056                    }
1057                }
1058            }
1059        }
1060        return -1;
1061    } // getParentDepth(HTMLElements.Element[],short):int
1062

1063    /** Returns a set of empty attributes. */
1064    protected final XMLAttributes emptyAttributes() {
1065        fEmptyAttrs.removeAllAttributes();
1066        return fEmptyAttrs;
1067    } // emptyAttributes():XMLAttributes
1068

1069    /** Returns an augmentations object with a synthesized item added. */
1070    protected final Augmentations synthesizedAugs() {
1071        HTMLAugmentations augs = null;
1072        if (fAugmentations) {
1073            augs = fInfosetAugs;
1074            augs.removeAllItems();
1075            augs.putItem(AUGMENTATIONS, SYNTHESIZED_ITEM);
1076        }
1077        return augs;
1078    } // synthesizedAugs():Augmentations
1079

1080    //
1081
// Protected static methods
1082
//
1083

1084    /** Modifies the given name based on the specified mode. */
1085    protected static final String JavaDoc modifyName(String JavaDoc name, short mode) {
1086        switch (mode) {
1087            case NAMES_UPPERCASE: return name.toUpperCase();
1088            case NAMES_LOWERCASE: return name.toLowerCase();
1089        }
1090        return name;
1091    } // modifyName(String,short):String
1092

1093    /**
1094     * Converts HTML names string value to constant value.
1095     *
1096     * @see #NAMES_NO_CHANGE
1097     * @see #NAMES_LOWERCASE
1098     * @see #NAMES_UPPERCASE
1099     */

1100    protected static final short getNamesValue(String JavaDoc value) {
1101        if (value.equals("lower")) {
1102            return NAMES_LOWERCASE;
1103        }
1104        if (value.equals("upper")) {
1105            return NAMES_UPPERCASE;
1106        }
1107        return NAMES_NO_CHANGE;
1108    } // getNamesValue(String):short
1109

1110    //
1111
// Classes
1112
//
1113

1114    /**
1115     * Element info for each start element. This information is used when
1116     * closing unbalanced inline elements. For example:
1117     * <pre>
1118     * &lt;i>unbalanced &lt;b>HTML&lt;/i> content&lt;/b>
1119     * </pre>
1120     * <p>
1121     * It seems that it is a waste of processing and memory to copy the
1122     * attributes for every start element even if there are no unbalanced
1123     * inline elements in the document. However, if the attributes are
1124     * <em>not</em> saved, then important attributes such as style
1125     * information would be lost.
1126     *
1127     * @author Andy Clark
1128     */

1129    public static class Info {
1130
1131        //
1132
// Data
1133
//
1134

1135        /** The element. */
1136        public HTMLElements.Element element;
1137
1138        /** The element qualified name. */
1139        public QName qname;
1140
1141        /** The element attributes. */
1142        public XMLAttributes attributes;
1143
1144        //
1145
// Constructors
1146
//
1147

1148        /**
1149         * Creates an element information object.
1150         * <p>
1151         * <strong>Note:</strong>
1152         * This constructor makes a copy of the element information.
1153         *
1154         * @param element The element qualified name.
1155         */

1156        public Info(HTMLElements.Element element, QName qname) {
1157            this(element, qname, null);
1158        } // <init>(HTMLElements.Element,QName)
1159

1160        /**
1161         * Creates an element information object.
1162         * <p>
1163         * <strong>Note:</strong>
1164         * This constructor makes a copy of the element information.
1165         *
1166         * @param element The element qualified name.
1167         * @param attributes The element attributes.
1168         */

1169        public Info(HTMLElements.Element element,
1170                    QName qname, XMLAttributes attributes) {
1171            this.element = element;
1172            this.qname = new QName(qname);
1173            if (attributes != null) {
1174                int length = attributes.getLength();
1175                if (length > 0) {
1176                    QName aqname = new QName();
1177                    XMLAttributes newattrs = new XMLAttributesImpl();
1178                    for (int i = 0; i < length; i++) {
1179                        attributes.getName(i, aqname);
1180                        String JavaDoc type = attributes.getType(i);
1181                        String JavaDoc value = attributes.getValue(i);
1182                        String JavaDoc nonNormalizedValue = attributes.getNonNormalizedValue(i);
1183                        boolean specified = attributes.isSpecified(i);
1184                        newattrs.addAttribute(aqname, type, value);
1185                        newattrs.setNonNormalizedValue(i, nonNormalizedValue);
1186                        newattrs.setSpecified(i, specified);
1187                    }
1188                    this.attributes = newattrs;
1189                }
1190            }
1191        } // <init>(HTMLElements.Element,QName,XMLAttributes)
1192

1193    } // class Info
1194

1195    /** Unsynchronized stack of element information. */
1196    public static class InfoStack {
1197
1198        //
1199
// Data
1200
//
1201

1202        /** The top of the stack. */
1203        public int top;
1204
1205        /** The stack data. */
1206        public Info[] data = new Info[10];
1207
1208        //
1209
// Public methods
1210
//
1211

1212        /** Pushes element information onto the stack. */
1213        public void push(Info info) {
1214            if (top == data.length) {
1215                Info[] newarray = new Info[top + 10];
1216                System.arraycopy(data, 0, newarray, 0, top);
1217                data = newarray;
1218            }
1219            data[top++] = info;
1220        } // push(Info)
1221

1222        /** Peeks at the top of the stack. */
1223        public Info peek() {
1224            return data[top-1];
1225        } // peek():Info
1226

1227        /** Pops the top item off of the stack. */
1228        public Info pop() {
1229            return data[--top];
1230        } // pop():Info
1231

1232    } // class InfoStack
1233

1234} // class HTMLTagBalancer
1235
Popular Tags