KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > ccil > cowan > tagsoup > Parser


1 // This file is part of TagSoup.
2
//
3
// This program is free software; you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation; either version 2 of the License, or
6
// (at your option) any later version. You may also distribute
7
// and/or modify it under version 2.1 of the Academic Free License.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
//
13
//
14
// The TagSoup parser
15

16 package org.ccil.cowan.tagsoup;
17 import java.util.HashMap JavaDoc;
18 import java.util.ArrayList JavaDoc;
19 import java.io.*;
20 import java.net.URL JavaDoc;
21 import java.net.URLConnection JavaDoc;
22 import org.xml.sax.*;
23 import org.xml.sax.helpers.DefaultHandler JavaDoc;
24 import org.xml.sax.ext.LexicalHandler JavaDoc;
25
26
27 /**
28 The SAX parser class.
29 **/

30 public class Parser extends DefaultHandler JavaDoc implements ScanHandler, XMLReader, LexicalHandler JavaDoc {
31
32     // XMLReader implementation
33

34     private ContentHandler theContentHandler = this;
35     private LexicalHandler JavaDoc theLexicalHandler = this;
36     private DTDHandler theDTDHandler = this;
37     private ErrorHandler theErrorHandler = this;
38     private EntityResolver theEntityResolver = this;
39     private Schema theSchema;
40     private Scanner theScanner;
41     private AutoDetector theAutoDetector;
42     // Feature flags
43
private boolean namespaces = true;
44     private boolean ignoreBogons = false;
45     private boolean bogonsEmpty = true;
46     private boolean defaultAttributes = true;
47     private boolean translateColons = false;
48     private boolean restartElements = true;
49     private boolean ignorableWhitespace = false;
50
51     /**
52     A value of "true" indicates namespace URIs and unprefixed local
53     names for element and attribute names will be available.
54     **/

55     public final static String JavaDoc namespacesFeature =
56         "http://xml.org/sax/features/namespaces";
57
58     /**
59     A value of "true" indicates that XML qualified names (with prefixes)
60     and attributes (including xmlns* attributes) will be available.
61     We don't support this value.
62     **/

63     public final static String JavaDoc namespacePrefixesFeature =
64         "http://xml.org/sax/features/namespace-prefixes";
65
66     /**
67     Reports whether this parser processes external general entities
68     (it doesn't).
69     **/

70     public final static String JavaDoc externalGeneralEntitiesFeature =
71         "http://xml.org/sax/features/external-general-entities";
72
73     /**
74     Reports whether this parser processes external parameter entities
75     (it doesn't).
76     **/

77     public final static String JavaDoc externalParameterEntitiesFeature =
78         "http://xml.org/sax/features/external-parameter-entities";
79
80     /**
81     May be examined only during a parse, after the startDocument()
82     callback has been completed; read-only. The value is true if
83     the document specified standalone="yes" in its XML declaration,
84     and otherwise is false. (It's always false.)
85     **/

86     public final static String JavaDoc isStandaloneFeature =
87         "http://xml.org/sax/features/is-standalone";
88
89     /**
90     A value of "true" indicates that the LexicalHandler will report
91     the beginning and end of parameter entities (it won't).
92     **/

93     public final static String JavaDoc lexicalHandlerParameterEntitiesFeature =
94         "http://xml.org/sax/features/lexical-handler/parameter-entities";
95
96     /**
97     A value of "true" indicates that system IDs in declarations will
98     be absolutized (relative to their base URIs) before reporting.
99     (This returns true but doesn't actually do anything.)
100     **/

101     public final static String JavaDoc resolveDTDURIsFeature =
102         "http://xml.org/sax/features/resolve-dtd-uris";
103
104     /**
105     Has a value of "true" if all XML names (for elements,
106     prefixes, attributes, entities, notations, and local
107     names), as well as Namespace URIs, will have been interned
108     using java.lang.String.intern. This supports fast testing of
109     equality/inequality against string constants, rather than forcing
110     slower calls to String.equals(). (We always intern.)
111     **/

112     public final static String JavaDoc stringInterningFeature =
113         "http://xml.org/sax/features/string-interning";
114
115     /**
116     Returns "true" if the Attributes objects passed by this
117     parser in ContentHandler.startElement() implement the
118     org.xml.sax.ext.Attributes2 interface. (They don't.)
119     **/

120
121     public final static String JavaDoc useAttributes2Feature =
122         "http://xml.org/sax/features/use-attributes2";
123
124     /**
125     Returns "true" if the Locator objects passed by this parser
126     in ContentHandler.setDocumentLocator() implement the
127     org.xml.sax.ext.Locator2 interface. (They don't.)
128     **/

129     public final static String JavaDoc useLocator2Feature =
130         "http://xml.org/sax/features/use-locator2";
131
132     /**
133     Returns "true" if, when setEntityResolver is given an object
134     implementing the org.xml.sax.ext.EntityResolver2 interface,
135     those new methods will be used. (They won't be.)
136     **/

137     public final static String JavaDoc useEntityResolver2Feature =
138         "http://xml.org/sax/features/use-entity-resolver2";
139
140     /**
141     Controls whether the parser is reporting all validity errors
142     (We don't report any validity errors.)
143     **/

144     public final static String JavaDoc validationFeature =
145         "http://xml.org/sax/features/validation";
146
147     /**
148     Controls whether the parser reports Unicode normalization
149     errors as described in section 2.13 and Appendix B of the XML
150     1.1 Recommendation. (We don't normalize.)
151     **/

152     public final static String JavaDoc unicodeNormalizationCheckingFeature =
153 "http://xml.org/sax/features/unicode-normalization-checking";
154
155     /**
156     Controls whether, when the namespace-prefixes feature is set,
157     the parser treats namespace declaration attributes as being in
158     the http://www.w3.org/2000/xmlns/ namespace. (It doesn't.)
159     **/

160     public final static String JavaDoc xmlnsURIsFeature =
161         "http://xml.org/sax/features/xmlns-uris";
162
163     /**
164     Returns "true" if the parser supports both XML 1.1 and XML 1.0.
165     (Always false.)
166     **/

167     public final static String JavaDoc XML11Feature =
168         "http://xml.org/sax/features/xml-1.1";
169
170     /**
171     A value of "true" indicates that the parser will ignore
172     unknown elements.
173     **/

174     public final static String JavaDoc ignoreBogonsFeature =
175         "http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons";
176
177     /**
178     A value of "true" indicates that the parser will give unknown
179     elements a content model of EMPTY; a value of "false", a
180     content model of ANY.
181     **/

182     public final static String JavaDoc bogonsEmptyFeature =
183         "http://www.ccil.org/~cowan/tagsoup/features/bogons-empty";
184
185     /**
186     A value of "true" indicates that the parser will return default
187     attribute values for missing attributes that have default values.
188     **/

189     public final static String JavaDoc defaultAttributesFeature =
190         "http://www.ccil.org/~cowan/tagsoup/features/default-attributes";
191
192     /**
193     A value of "true" indicates that the parser will
194     translate colons into underscores in names.
195     **/

196     public final static String JavaDoc translateColonsFeature =
197         "http://www.ccil.org/~cowan/tagsoup/features/translate-colons";
198
199     /**
200     A value of "true" indicates that the parser will
201     attempt to restart the restartable elements.
202     **/

203     public final static String JavaDoc restartElementsFeature =
204         "http://www.ccil.org/~cowan/tagsoup/features/restart-elements";
205
206     /**
207     A value of "true" indicates that the parser will
208     transmit whitespace in element-only content via the SAX
209     ignorableWhitespace callback. Normally this is not done,
210     because HTML is an SGML application and SGML suppresses
211     such whitespace.
212     **/

213     public final static String JavaDoc ignorableWhitespaceFeature =
214         "http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace";
215
216     /**
217     Used to see some syntax events that are essential in some
218     applications: comments, CDATA delimiters, selected general
219     entity inclusions, and the start and end of the DTD (and
220     declaration of document element name). The Object must implement
221     org.xml.sax.ext.LexicalHandler.
222     **/

223     public final static String JavaDoc lexicalHandlerProperty =
224         "http://xml.org/sax/properties/lexical-handler";
225
226     /**
227     Specifies the Scanner object this Parser uses.
228     **/

229     public final static String JavaDoc scannerProperty =
230         "http://www.ccil.org/~cowan/tagsoup/properties/scanner";
231
232     /**
233     Specifies the Schema object this Parser uses.
234     **/

235     public final static String JavaDoc schemaProperty =
236         "http://www.ccil.org/~cowan/tagsoup/properties/schema";
237
238     /**
239     Specifies the AutoDetector (for encoding detection) this Parser uses.
240     **/

241     public final static String JavaDoc autoDetectorProperty =
242         "http://www.ccil.org/~cowan/tagsoup/properties/auto-detector";
243
244     private HashMap JavaDoc theFeatures = new HashMap JavaDoc();
245     {
246         theFeatures.put(namespacesFeature, Boolean.TRUE);
247         theFeatures.put(namespacePrefixesFeature, Boolean.FALSE);
248         theFeatures.put(externalGeneralEntitiesFeature, Boolean.FALSE);
249         theFeatures.put(externalParameterEntitiesFeature, Boolean.FALSE);
250         theFeatures.put(isStandaloneFeature, Boolean.FALSE);
251         theFeatures.put(lexicalHandlerParameterEntitiesFeature,
252             Boolean.FALSE);
253         theFeatures.put(resolveDTDURIsFeature, Boolean.TRUE);
254         theFeatures.put(stringInterningFeature, Boolean.TRUE);
255         theFeatures.put(useAttributes2Feature, Boolean.FALSE);
256         theFeatures.put(useLocator2Feature, Boolean.FALSE);
257         theFeatures.put(useEntityResolver2Feature, Boolean.FALSE);
258         theFeatures.put(validationFeature, Boolean.FALSE);
259         theFeatures.put(xmlnsURIsFeature, Boolean.FALSE);
260         theFeatures.put(xmlnsURIsFeature, Boolean.FALSE);
261         theFeatures.put(XML11Feature, Boolean.FALSE);
262         theFeatures.put(ignoreBogonsFeature, Boolean.FALSE);
263         theFeatures.put(bogonsEmptyFeature, Boolean.TRUE);
264         theFeatures.put(defaultAttributesFeature, Boolean.TRUE);
265         theFeatures.put(translateColonsFeature, Boolean.FALSE);
266         theFeatures.put(restartElementsFeature, Boolean.TRUE);
267         theFeatures.put(ignorableWhitespaceFeature, Boolean.FALSE);
268         }
269
270
271     public boolean getFeature (String JavaDoc name)
272         throws SAXNotRecognizedException, SAXNotSupportedException {
273         Boolean JavaDoc b = (Boolean JavaDoc)theFeatures.get(name);
274         if (b == null) {
275             throw new SAXNotRecognizedException("Unknown feature " + name);
276             }
277         return b.booleanValue();
278         }
279
280     public void setFeature (String JavaDoc name, boolean value)
281     throws SAXNotRecognizedException, SAXNotSupportedException {
282         Boolean JavaDoc b = (Boolean JavaDoc)theFeatures.get(name);
283         if (b == null) {
284             throw new SAXNotRecognizedException("Unknown feature " + name);
285             }
286         if (value) theFeatures.put(name, Boolean.TRUE);
287         else theFeatures.put(name, Boolean.FALSE);
288
289         if (name.equals(namespacesFeature)) namespaces = value;
290         else if (name.equals(ignoreBogonsFeature)) ignoreBogons = value;
291         else if (name.equals(bogonsEmptyFeature)) bogonsEmpty = value;
292         else if (name.equals(defaultAttributesFeature)) defaultAttributes = value;
293         else if (name.equals(translateColonsFeature)) translateColons = value;
294         else if (name.equals(restartElementsFeature)) restartElements = value;
295         else if (name.equals(ignorableWhitespaceFeature)) ignorableWhitespace = value;
296         }
297
298     public Object JavaDoc getProperty (String JavaDoc name)
299     throws SAXNotRecognizedException, SAXNotSupportedException {
300         if (name.equals(lexicalHandlerProperty)) {
301             return theLexicalHandler == this ? null : theLexicalHandler;
302             }
303         else if (name.equals(scannerProperty)) {
304             return theScanner;
305             }
306         else if (name.equals(schemaProperty)) {
307             return theSchema;
308             }
309         else if (name.equals(autoDetectorProperty)) {
310             return theAutoDetector;
311             }
312         else {
313             throw new SAXNotRecognizedException("Unknown property " + name);
314             }
315         }
316
317     public void setProperty (String JavaDoc name, Object JavaDoc value)
318     throws SAXNotRecognizedException, SAXNotSupportedException {
319         if (name.equals(lexicalHandlerProperty)) {
320             if (value instanceof LexicalHandler JavaDoc) {
321                 theLexicalHandler = (LexicalHandler JavaDoc)value;
322                 }
323             else {
324                 throw new SAXNotSupportedException("Your lexical handler is not a LexicalHandler");
325                 }
326             }
327         else if (name.equals(scannerProperty)) {
328             if (value instanceof Scanner) {
329                 theScanner = (Scanner)value;
330                 }
331             else {
332                 throw new SAXNotSupportedException("Your scanner is not a Scanner");
333                 }
334             }
335         else if (name.equals(schemaProperty)) {
336             if (value instanceof Schema) {
337                 theSchema = (Schema)value;
338                 }
339             else {
340                  throw new SAXNotSupportedException("Your schema is not a Schema");
341                 }
342             }
343         else if (name.equals(autoDetectorProperty)) {
344             if (value instanceof AutoDetector) {
345                 theAutoDetector = (AutoDetector)value;
346                 }
347             else {
348                 throw new SAXNotSupportedException("Your auto-detector is not an AutoDetector");
349                 }
350             }
351         else {
352             throw new SAXNotRecognizedException("Unknown property " + name);
353             }
354         }
355
356     public void setEntityResolver (EntityResolver resolver) {
357         theEntityResolver = resolver;
358         }
359
360     public EntityResolver getEntityResolver () {
361         return (theEntityResolver == this) ? null : theEntityResolver;
362         }
363
364     public void setDTDHandler (DTDHandler handler) {
365         theDTDHandler = handler;
366         }
367
368     public DTDHandler getDTDHandler () {
369         return (theDTDHandler == this) ? null : theDTDHandler;
370         }
371
372     public void setContentHandler (ContentHandler handler) {
373         theContentHandler = handler;
374         }
375
376     public ContentHandler getContentHandler () {
377         return (theContentHandler == this) ? null : theContentHandler;
378         }
379
380     public void setErrorHandler (ErrorHandler handler) {
381         theErrorHandler = handler;
382         }
383
384     public ErrorHandler getErrorHandler () {
385         return (theErrorHandler == this) ? null : theErrorHandler;
386         }
387
388     public void parse (InputSource input) throws IOException, SAXException {
389         setup();
390         Reader r = getReader(input);
391         theContentHandler.startDocument();
392         theScanner.resetDocumentLocator(input.getPublicId(), input.getSystemId());
393         if (theScanner instanceof Locator) {
394             theContentHandler.setDocumentLocator((Locator)theScanner);
395             }
396         if (!(theSchema.getURI().equals("")))
397             theContentHandler.startPrefixMapping(theSchema.getPrefix(),
398                 theSchema.getURI());
399         theScanner.scan(r, this);
400         }
401
402     public void parse (String JavaDoc systemid) throws IOException, SAXException {
403         parse(new InputSource(systemid));
404         }
405
406     // Sets up instance variables that haven't been set by setFeature
407
private void setup() {
408         if (theSchema == null) theSchema = new HTMLSchema();
409         if (theScanner == null) theScanner = new HTMLScanner();
410         if (theAutoDetector == null) {
411             theAutoDetector = new AutoDetector() {
412                 public Reader autoDetectingReader(InputStream i) {
413                     return new InputStreamReader(i);
414                     }
415                 };
416             }
417         theStack = new Element(theSchema.getElementType("<root>"), defaultAttributes);
418         thePCDATA = new Element(theSchema.getElementType("<pcdata>"), defaultAttributes);
419         theNewElement = null;
420         theAttributeName = null;
421         thePITarget = null;
422         theSaved = null;
423         theEntity = 0;
424         virginStack = true;
425                 doctypename = doctypepublicid = doctypesystemid = null;
426         }
427
428     // Return a Reader based on the contents of an InputSource
429
// Buffer both the InputStream and the Reader
430
private Reader getReader(InputSource s) throws SAXException, IOException {
431         Reader r = s.getCharacterStream();
432         InputStream i = s.getByteStream();
433         String JavaDoc encoding = s.getEncoding();
434         String JavaDoc publicid = s.getPublicId();
435         String JavaDoc systemid = s.getSystemId();
436         if (r == null) {
437             if (i == null) i = getInputStream(publicid, systemid);
438 // i = new BufferedInputStream(i);
439
if (encoding == null) {
440                 r = theAutoDetector.autoDetectingReader(i);
441                 }
442             else {
443                 try {
444                     r = new InputStreamReader(i, encoding);
445                     }
446                 catch (UnsupportedEncodingException e) {
447                     r = new InputStreamReader(i);
448                     }
449                 }
450             }
451 // r = new BufferedReader(r);
452
return r;
453         }
454
455     // Get an InputStream based on a publicid and a systemid
456
private InputStream getInputStream(String JavaDoc publicid, String JavaDoc systemid) throws IOException, SAXException {
457         URL JavaDoc basis = new URL JavaDoc("file", "", System.getProperty("user.dir") + "/.");
458         URL JavaDoc url = new URL JavaDoc(basis, systemid);
459         URLConnection JavaDoc c = url.openConnection();
460         return c.getInputStream();
461         }
462         // We don't process publicids (who uses them anyhow?)
463

464     // ScanHandler implementation
465

466     private Element theNewElement = null;
467     private String JavaDoc theAttributeName = null;
468     private String JavaDoc doctypepublicid = null;
469     private String JavaDoc doctypesystemid = null;
470     private String JavaDoc doctypename = null;
471     private String JavaDoc thePITarget = null;
472     private Element theStack = null;
473     private Element theSaved = null;
474     private Element thePCDATA = null;
475     private char theEntity = 0;
476
477     public void adup(char[] buff, int offset, int length) throws SAXException {
478         if (theNewElement == null || theAttributeName == null) return;
479         theNewElement.setAttribute(theAttributeName, null, theAttributeName);
480         theAttributeName = null;
481         }
482
483     public void aname(char[] buff, int offset, int length) throws SAXException {
484         if (theNewElement == null) return;
485         theAttributeName = makeName(buff, offset, length);
486 // System.err.println("%% Attribute name " + theAttributeName);
487
}
488
489     public void aval(char[] buff, int offset, int length) throws SAXException {
490         if (theNewElement == null || theAttributeName == null) return;
491         String JavaDoc value = new String JavaDoc(buff, offset, length);
492 // System.err.println("%% Attribute value [" + value + "]");
493
theNewElement.setAttribute(theAttributeName, null, value);
494         theAttributeName = null;
495 // System.err.println("%% Aval done");
496
}
497
498     public void entity(char[] buff, int offset, int length) throws SAXException {
499         if (length < 1) {
500             theEntity = 0;
501             return;
502             }
503 // System.err.println("%% Entity at " + offset + " " + length);
504
String JavaDoc name = new String JavaDoc(buff, offset, length);
505 // System.err.println("%% Got entity [" + name + "]");
506
theEntity = theSchema.getEntity(name);
507         }
508
509     public void eof(char[] buff, int offset, int length) throws SAXException {
510         if (virginStack) rectify(thePCDATA);
511         while (theStack.next() != null) {
512             pop();
513             }
514         if (!(theSchema.getURI().equals("")))
515             theContentHandler.endPrefixMapping(theSchema.getPrefix());
516         theContentHandler.endDocument();
517         }
518
519     public void etag(char[] buff, int offset, int length) throws SAXException {
520         if (etag_cdata(buff, offset, length)) return;
521         etag_basic(buff, offset, length);
522         }
523
524     private static char[] etagchars = {'<', '/', '>'};
525     public boolean etag_cdata(char[] buff, int offset, int length) throws SAXException {
526         String JavaDoc currentName = theStack.name();
527         // If this is a CDATA element and the tag doesn't match,
528
// or isn't properly formed (junk after the name),
529
// restart CDATA mode and process the tag as characters.
530
if ((theStack.flags() & Schema.F_CDATA) != 0) {
531             boolean realTag = (length == currentName.length());
532             if (realTag) {
533                 for (int i = 0; i < length; i++) {
534                     if (Character.toLowerCase(buff[offset + i]) != Character.toLowerCase(currentName.charAt(i))) {
535                         realTag = false;
536                         break;
537                         }
538                     }
539                 }
540             if (!realTag) {
541                 theContentHandler.characters(etagchars, 0, 2);
542                 theContentHandler.characters(buff, offset, length);
543                 theContentHandler.characters(etagchars, 2, 1);
544                 theScanner.startCDATA();
545                 return true;
546                 }
547             }
548         return false;
549         }
550
551     public void etag_basic(char[] buff, int offset, int length) throws SAXException {
552         theNewElement = null;
553         String JavaDoc name;
554         if (length != 0) name = makeName(buff, offset, length);
555         else name = theStack.name();
556 // System.err.println("%% Got end of " + name);
557

558         Element sp;
559         boolean inNoforce = false;
560         for (sp = theStack; sp != null; sp = sp.next()) {
561             if (sp.name().equals(name)) break;
562             if ((sp.flags() & Schema.F_NOFORCE) != 0) inNoforce = true;
563             }
564
565         if (sp == null) return; // Ignore unknown etags
566
if (sp.next() == null || sp.next().next() == null) return;
567         if (inNoforce) { // inside an F_NOFORCE element?
568
sp.preclose(); // preclose the matching element
569
}
570         else { // restartably pop everything above us
571
while (theStack != sp) {
572                 restartablyPop();
573                 }
574             pop();
575             }
576         // pop any preclosed elements now at the top
577
while (theStack.isPreclosed()) {
578             pop();
579             }
580         restart(null);
581         }
582
583     // Push restartables on the stack if possible
584
// e is the next element to be started, if we know what it is
585
private void restart(Element e) throws SAXException {
586         while (theSaved != null && theStack.canContain(theSaved) &&
587                 (e == null || theSaved.canContain(e))) {
588             Element next = theSaved.next();
589             push(theSaved);
590             theSaved = next;
591             }
592         }
593
594     // Pop the stack irrevocably
595
private void pop() throws SAXException {
596         if (theStack == null) return; // empty stack
597
String JavaDoc name = theStack.name();
598         String JavaDoc localName = theStack.localName();
599         String JavaDoc namespace = theStack.namespace();
600 // System.err.println("%% Popping " + name);
601
if ((theStack.flags() & Schema.F_CDATA) != 0) {
602             theLexicalHandler.endCDATA();
603             }
604         if (!namespaces) namespace = localName = "";
605         theContentHandler.endElement(namespace, localName, name);
606         theStack = theStack.next();
607         }
608
609     // Pop the stack restartably
610
private void restartablyPop() throws SAXException {
611         Element popped = theStack;
612         pop();
613         if (restartElements && (popped.flags() & Schema.F_RESTART) != 0) {
614             popped.anonymize();
615             popped.setNext(theSaved);
616             theSaved = popped;
617             }
618         }
619
620     // Push element onto stack
621
private boolean virginStack = true;
622     private void push(Element e) throws SAXException {
623         String JavaDoc name = e.name();
624         String JavaDoc localName = e.localName();
625         String JavaDoc namespace = e.namespace();
626 // System.err.println("%% Pushing " + name);
627
e.clean();
628         if (!namespaces) namespace = localName = "";
629                 if (virginStack && localName.equalsIgnoreCase(doctypename)) {
630                     try {
631                         theEntityResolver.resolveEntity(doctypepublicid, doctypesystemid);
632                     } catch (IOException ew) { } // Can't be thrown for root I believe.
633
}
634         theContentHandler.startElement(namespace, localName, name, e.atts());
635         e.setNext(theStack);
636         theStack = e;
637         virginStack = false;
638         if ((theStack.flags() & Schema.F_CDATA) != 0) {
639             theScanner.startCDATA();
640             theLexicalHandler.startCDATA();
641             }
642         }
643
644         /**
645          * Parsing the complete XML Document Type Definition is way too complex,
646          * but for many simple cases we can extract something useful from it.
647          *
648          * doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
649          * DeclSep ::= PEReference | S
650          * intSubset ::= (markupdecl | DeclSep)*
651          * markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
652          * ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
653          */

654     public void decl(char[] buff, int offset, int length) throws SAXException {
655         String JavaDoc s = new String JavaDoc(buff, offset, length);
656         String JavaDoc name = null;
657         String JavaDoc systemid = null;
658         String JavaDoc publicid = null;
659         String JavaDoc[] v = split(s);
660         if (v.length > 0 && "DOCTYPE".equals(v[0])) {
661             if (v.length > 1) {
662                 name = v[1];
663                 if (v.length>3 && "SYSTEM".equals(v[2])) {
664                 systemid = v[3];
665                 }
666             else if (v.length > 3 && "PUBLIC".equals(v[2])) {
667                 publicid = v[3];
668                 if (v.length > 4) {
669                     systemid = v[4];
670                     }
671                 else {
672                     systemid = "";
673                     }
674                     }
675                 }
676             }
677         publicid = trimquotes(publicid);
678         systemid = trimquotes(systemid);
679         if (name != null) {
680             publicid = cleanPublicid(publicid);
681             theLexicalHandler.startDTD(name, publicid, systemid);
682             theLexicalHandler.endDTD();
683             doctypename = name;
684             doctypepublicid = publicid;
685         if (theScanner instanceof Locator) { // Must resolve systemid
686
doctypesystemid = ((Locator)theScanner).getSystemId();
687                     try {
688                         doctypesystemid = new URL JavaDoc(new URL JavaDoc(doctypesystemid), systemid).toString();
689                     } catch (Exception JavaDoc e) {}
690                 }
691             }
692         }
693
694     // If the String is quoted, trim the quotes.
695
private static String JavaDoc trimquotes(String JavaDoc in) {
696         if (in == null) return in;
697         int length = in.length();
698         if (length == 0) return in;
699         char s = in.charAt(0);
700         char e = in.charAt(length - 1);
701         if (s == e && (s == '\'' || s == '"')) {
702             in = in.substring(1, in.length() - 1);
703             }
704         return in;
705         }
706
707     // Split the supplied String into words or phrases seperated by spaces.
708
// Recognises quotes around a phrase and doesn't split it.
709
private static String JavaDoc[] split(String JavaDoc val) throws IllegalArgumentException JavaDoc {
710         val = val.trim();
711         if (val.length() == 0) {
712             return new String JavaDoc[0];
713             }
714         else {
715             ArrayList JavaDoc l = new ArrayList JavaDoc();
716             int s = 0;
717             int e = 0;
718             boolean sq = false; // single quote
719
boolean dq = false; // double quote
720
char lastc = 0;
721             int len = val.length();
722             for (e=0; e < len; e++) {
723                 char c = val.charAt(e);
724                 if (!dq && c == '\'' && lastc != '\\') {
725                 sq = !sq;
726                 if (s < 0) s = e;
727                 }
728             else if (!sq && c == '\"' && lastc != '\\') {
729                 dq = !dq;
730                 if (s < 0) s = e;
731                 }
732             else if (!sq && !dq) {
733                 if (Character.isWhitespace(c)) {
734                     if (s >= 0) l.add(val.substring(s, e));
735                     s = -1;
736                     }
737                 else if (s < 0 && c != ' ') {
738                     s = e;
739                     }
740                 }
741             lastc = c;
742             }
743         l.add(val.substring(s, e));
744         return (String JavaDoc[])l.toArray(new String JavaDoc[0]);
745         }
746         }
747
748     // Replace junk in publicids with spaces
749
private static String JavaDoc legal =
750         "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-'()+,./:=?;!*#@$_%";
751
752     private String JavaDoc cleanPublicid(String JavaDoc src) {
753         if (src == null) return null;
754         int len = src.length();
755         StringBuffer JavaDoc dst = new StringBuffer JavaDoc(len);
756         boolean suppressSpace = true;
757         for (int i = 0; i < len; i++) {
758             char ch = src.charAt(i);
759             if (legal.indexOf(ch) != -1) { // legal but not whitespace
760
dst.append(ch);
761                 suppressSpace = false;
762                 }
763             else if (suppressSpace) { // normalizable whitespace or junk
764
;
765                 }
766             else {
767                 dst.append(' ');
768                 suppressSpace = true;
769                 }
770             }
771 // System.err.println("%% Publicid [" + dst.toString().trim() + "]");
772
return dst.toString().trim(); // trim any final junk whitespace
773
}
774
775
776     public void gi(char[] buff, int offset, int length) throws SAXException {
777         if (theNewElement != null) return;
778         String JavaDoc name = makeName(buff, offset, length);
779         if (name == null) return;
780         ElementType type = theSchema.getElementType(name);
781         if (type == null) {
782             // Suppress unknown elements if ignore-bogons is on
783
if (ignoreBogons) return;
784             theSchema.elementType(name, bogonsEmpty ? Schema.M_EMPTY : Schema.M_ANY, Schema.M_ANY, 0);
785             type = theSchema.getElementType(name);
786             }
787
788         theNewElement = new Element(type, defaultAttributes);
789 // System.err.println("%% Got GI " + theNewElement.name());
790
}
791
792     public void pcdata(char[] buff, int offset, int length) throws SAXException {
793         if (length == 0) return;
794         boolean allWhite = true;
795         for (int i = 0; i < length; i++) {
796             if (!Character.isWhitespace(buff[offset+i])) {
797                 allWhite = false;
798                 }
799             }
800         if (allWhite && !theStack.canContain(thePCDATA)) {
801             if (ignorableWhitespace) {
802                 theContentHandler.ignorableWhitespace(buff, offset, length);
803                 }
804             }
805         else {
806             rectify(thePCDATA);
807             theContentHandler.characters(buff, offset, length);
808             }
809         }
810
811     public void pitarget(char[] buff, int offset, int length) throws SAXException {
812         if (theNewElement != null) return;
813         thePITarget = makeName(buff, offset, length);
814         }
815
816     public void pi(char[] buff, int offset, int length) throws SAXException {
817         if (theNewElement != null || thePITarget == null) return;
818         if (thePITarget.toLowerCase().equals("xml")) return;
819 // if (length > 0 && buff[length - 1] == '?') System.out.println("%% Removing ? from PI");
820
if (length > 0 && buff[length - 1] == '?') length--; // remove trailing ?
821
theContentHandler.processingInstruction(thePITarget,
822             new String JavaDoc(buff, offset, length));
823         thePITarget = null;
824         }
825
826     public void stagc(char[] buff, int offset, int length) throws SAXException {
827 // System.err.println("%% Start-tag");
828
if (theNewElement == null) return;
829         rectify(theNewElement);
830         if (theStack.model() == Schema.M_EMPTY) {
831             // Force an immediate end tag
832
etag_basic(buff, offset, length);
833             }
834         }
835
836     public void stage(char[] buff, int offset, int length) throws SAXException {
837 // System.err.println("%% Empty-tag");
838
if (theNewElement == null) return;
839         rectify(theNewElement);
840         // Force an immediate end tag
841
etag_basic(buff, offset, length);
842         }
843
844     // Comment buffer is twice the size of the output buffer
845
private char[] theCommentBuffer = new char[2000];
846     public void cmnt(char[] buff, int offset, int length) throws SAXException {
847         int postOffset = offset + length;
848         if (theCommentBuffer.length < postOffset * 2) {
849             theCommentBuffer = new char[postOffset * 2];
850             }
851         int newSpaces = 0;
852         for (int i = offset, j = offset; i < postOffset; i++, j++) {
853             if (i == offset && buff[i] == '-') {
854                 theCommentBuffer[j++] = ' ';
855                 newSpaces++;
856                 }
857             theCommentBuffer[j] = buff[i];
858             if (buff[i] == '-') {
859                 if (i == postOffset || buff[i+1] == '-') {
860                     theCommentBuffer[j++] = ' ';
861                     newSpaces++;
862                     }
863                 }
864             }
865         theLexicalHandler.comment(theCommentBuffer, offset, length + newSpaces);
866         }
867
868     // Rectify the stack, pushing and popping as needed
869
// so that the argument can be safely pushed
870
private void rectify(Element e) throws SAXException {
871         Element sp;
872         while (true) {
873             for (sp = theStack; sp != null; sp = sp.next()) {
874                 if (sp.canContain(e)) break;
875                 }
876             if (sp != null) break;
877             ElementType parentType = e.parent();
878             if (parentType == null) break;
879             Element parent = new Element(parentType, defaultAttributes);
880 // System.err.println("%% Ascending from " + e.name() + " to " + parent.name());
881
parent.setNext(e);
882             e = parent;
883             }
884         if (sp == null) return; // don't know what to do
885
while (theStack != sp) {
886             if (theStack == null || theStack.next() == null ||
887                 theStack.next().next() == null) break;
888             restartablyPop();
889             }
890         while (e != null) {
891             Element nexte = e.next();
892             if (!e.name().equals("<pcdata>")) push(e);
893             e = nexte;
894             restart(e);
895             }
896         theNewElement = null;
897         }
898
899     public char getEntity() {
900         return theEntity;
901         }
902
903     // Return the argument as a valid XML name, lowercased
904
private String JavaDoc makeName(char[] buff, int offset, int length) {
905         StringBuffer JavaDoc dst = new StringBuffer JavaDoc(length + 2);
906         boolean seenColon = false;
907         boolean start = true;
908 // String src = new String(buff, offset, length); // DEBUG
909
for (; length-- > 0; offset++) {
910             char ch = Character.toLowerCase(buff[offset]);
911             if (Character.isLetter(ch) || ch == '_') {
912                 start = false;
913                 dst.append(ch);
914                 }
915             else if (Character.isDigit(ch) || ch == '-' || ch == '.') {
916                 if (start) dst.append('_');
917                 start = false;
918                 dst.append(ch);
919                 }
920             else if (ch == ':' && !seenColon) {
921                 seenColon = true;
922                 if (start) dst.append('_');
923                 start = true;
924                 dst.append(translateColons ? '_' : ch);
925                 }
926             }
927         int dstLength = dst.length();
928         if (dstLength == 0 || dst.charAt(dstLength - 1) == ':') dst.append('_');
929 // System.err.println("Made name \"" + dst + "\" from \"" + src + "\"");
930
return dst.toString().intern();
931         }
932
933     // Default LexicalHandler implementation
934

935     public void comment(char[] ch, int start, int length) throws SAXException { }
936     public void endCDATA() throws SAXException { }
937     public void endDTD() throws SAXException { }
938     public void endEntity(String JavaDoc name) throws SAXException { }
939     public void startCDATA() throws SAXException { }
940     public void startDTD(String JavaDoc name, String JavaDoc publicid, String JavaDoc systemid) throws SAXException { }
941     public void startEntity(String JavaDoc name) throws SAXException { }
942
943     }
944
Popular Tags