KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xerces > impl > XMLDocumentScannerImpl


1 /*
2  * Copyright 1999-2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.xerces.impl;
18
19 import java.io.CharConversionException JavaDoc;
20 import java.io.EOFException JavaDoc;
21 import java.io.IOException JavaDoc;
22
23 import org.apache.xerces.impl.dtd.XMLDTDDescription;
24 import org.apache.xerces.impl.io.MalformedByteSequenceException;
25 import org.apache.xerces.impl.validation.ValidationManager;
26 import org.apache.xerces.util.NamespaceSupport;
27 import org.apache.xerces.util.XMLChar;
28 import org.apache.xerces.util.XMLStringBuffer;
29 import org.apache.xerces.xni.Augmentations;
30 import org.apache.xerces.xni.NamespaceContext;
31 import org.apache.xerces.xni.XMLResourceIdentifier;
32 import org.apache.xerces.xni.XMLString;
33 import org.apache.xerces.xni.XNIException;
34 import org.apache.xerces.xni.parser.XMLComponentManager;
35 import org.apache.xerces.xni.parser.XMLConfigurationException;
36 import org.apache.xerces.xni.parser.XMLDTDScanner;
37 import org.apache.xerces.xni.parser.XMLInputSource;
38
39 /**
40  * This class is responsible for scanning XML document structure
41  * and content. The scanner acts as the source for the document
42  * information which is communicated to the document handler.
43  * <p>
44  * This component requires the following features and properties from the
45  * component manager that uses it:
46  * <ul>
47  * <li>http://xml.org/sax/features/namespaces</li>
48  * <li>http://xml.org/sax/features/validation</li>
49  * <li>http://apache.org/xml/features/nonvalidating/load-external-dtd</li>
50  * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
51  * <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
52  * <li>http://apache.org/xml/properties/internal/symbol-table</li>
53  * <li>http://apache.org/xml/properties/internal/error-reporter</li>
54  * <li>http://apache.org/xml/properties/internal/entity-manager</li>
55  * <li>http://apache.org/xml/properties/internal/dtd-scanner</li>
56  * </ul>
57  *
58  * @xerces.internal
59  *
60  * @author Glenn Marcy, IBM
61  * @author Andy Clark, IBM
62  * @author Arnaud Le Hors, IBM
63  * @author Eric Ye, IBM
64  *
65  * @version $Id: XMLDocumentScannerImpl.java,v 1.51 2005/02/24 19:03:49 mrglavas Exp $
66  */

67 public class XMLDocumentScannerImpl
68     extends XMLDocumentFragmentScannerImpl {
69
70     //
71
// Constants
72
//
73

74     // scanner states
75

76     /** Scanner state: XML declaration. */
77     protected static final int SCANNER_STATE_XML_DECL = 0;
78
79     /** Scanner state: prolog. */
80     protected static final int SCANNER_STATE_PROLOG = 5;
81
82     /** Scanner state: trailing misc. */
83     protected static final int SCANNER_STATE_TRAILING_MISC = 12;
84
85     /** Scanner state: DTD internal declarations. */
86     protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 17;
87
88     /** Scanner state: open DTD external subset. */
89     protected static final int SCANNER_STATE_DTD_EXTERNAL = 18;
90
91     /** Scanner state: DTD external declarations. */
92     protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 19;
93
94     // feature identifiers
95

96     /** Feature identifier: load external DTD. */
97     protected static final String JavaDoc LOAD_EXTERNAL_DTD =
98         Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE;
99
100     /** Feature identifier: load external DTD. */
101     protected static final String JavaDoc DISALLOW_DOCTYPE_DECL_FEATURE =
102         Constants.XERCES_FEATURE_PREFIX + Constants.DISALLOW_DOCTYPE_DECL_FEATURE;
103
104     // property identifiers
105

106     /** Property identifier: DTD scanner. */
107     protected static final String JavaDoc DTD_SCANNER =
108         Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY;
109
110     /** property identifier: ValidationManager */
111     protected static final String JavaDoc VALIDATION_MANAGER =
112         Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY;
113
114     /** property identifier: NamespaceContext */
115     protected static final String JavaDoc NAMESPACE_CONTEXT =
116         Constants.XERCES_PROPERTY_PREFIX + Constants.NAMESPACE_CONTEXT_PROPERTY;
117         
118
119
120     // recognized features and properties
121

122     /** Recognized features. */
123     private static final String JavaDoc[] RECOGNIZED_FEATURES = {
124         LOAD_EXTERNAL_DTD,
125         DISALLOW_DOCTYPE_DECL_FEATURE,
126     };
127
128     /** Feature defaults. */
129     private static final Boolean JavaDoc[] FEATURE_DEFAULTS = {
130         Boolean.TRUE,
131         Boolean.FALSE,
132     };
133
134     /** Recognized properties. */
135     private static final String JavaDoc[] RECOGNIZED_PROPERTIES = {
136         DTD_SCANNER,
137         VALIDATION_MANAGER,
138         NAMESPACE_CONTEXT,
139     };
140
141     /** Property defaults. */
142     private static final Object JavaDoc[] PROPERTY_DEFAULTS = {
143         null,
144         null,
145         null,
146     };
147
148     //
149
// Data
150
//
151

152     // properties
153

154     /** DTD scanner. */
155     protected XMLDTDScanner fDTDScanner;
156     /** Validation manager . */
157     protected ValidationManager fValidationManager;
158
159     // protected data
160

161     /** Scanning DTD. */
162     protected boolean fScanningDTD;
163
164     // other info
165

166     /** Doctype name. */
167     protected String JavaDoc fDoctypeName;
168
169     /** Doctype declaration public identifier. */
170     protected String JavaDoc fDoctypePublicId;
171
172     /** Doctype declaration system identifier. */
173     protected String JavaDoc fDoctypeSystemId;
174
175     /** Namespace support. */
176     protected NamespaceContext fNamespaceContext = new NamespaceSupport();
177
178     // features
179

180     /** Load external DTD. */
181     protected boolean fLoadExternalDTD = true;
182
183     /** Disallow doctype declaration. */
184     protected boolean fDisallowDoctype = false;
185
186     // state
187

188     /** Seen doctype declaration. */
189     protected boolean fSeenDoctypeDecl;
190
191     // dispatchers
192

193     /** XML declaration dispatcher. */
194     protected Dispatcher fXMLDeclDispatcher = new XMLDeclDispatcher();
195
196     /** Prolog dispatcher. */
197     protected Dispatcher fPrologDispatcher = new PrologDispatcher();
198
199     /** DTD dispatcher. */
200     protected Dispatcher fDTDDispatcher = new DTDDispatcher();
201
202     /** Trailing miscellaneous section dispatcher. */
203     protected Dispatcher fTrailingMiscDispatcher = new TrailingMiscDispatcher();
204
205     // temporary variables
206

207     /** Array of 3 strings. */
208     private final String JavaDoc[] fStrings = new String JavaDoc[3];
209
210     /** String. */
211     private final XMLString fString = new XMLString();
212
213     /** String buffer. */
214     private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
215     
216     /** External subset source. */
217     private XMLInputSource fExternalSubsetSource = null;
218     
219     /** A DTD Description. */
220     private final XMLDTDDescription fDTDDescription = new XMLDTDDescription(null, null, null, null, null);
221
222     //
223
// Constructors
224
//
225

226     /** Default constructor. */
227     public XMLDocumentScannerImpl() {} // <init>()
228

229     //
230
// XMLDocumentScanner methods
231
//
232

233     /**
234      * Sets the input source.
235      *
236      * @param inputSource The input source.
237      *
238      * @throws IOException Thrown on i/o error.
239      */

240     public void setInputSource(XMLInputSource inputSource) throws IOException JavaDoc {
241         fEntityManager.setEntityHandler(this);
242         fEntityManager.startDocumentEntity(inputSource);
243         //fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
244
} // setInputSource(XMLInputSource)
245

246     //
247
// XMLComponent methods
248
//
249

250     /**
251      * Resets the component. The component can query the component manager
252      * about any features and properties that affect the operation of the
253      * component.
254      *
255      * @param componentManager The component manager.
256      *
257      * @throws SAXException Thrown by component on initialization error.
258      * For example, if a feature or property is
259      * required for the operation of the component, the
260      * component manager may throw a
261      * SAXNotRecognizedException or a
262      * SAXNotSupportedException.
263      */

264     public void reset(XMLComponentManager componentManager)
265         throws XMLConfigurationException {
266
267         super.reset(componentManager);
268
269         // other settings
270
fDoctypeName = null;
271         fDoctypePublicId = null;
272         fDoctypeSystemId = null;
273         fSeenDoctypeDecl = false;
274         fScanningDTD = false;
275         fExternalSubsetSource = null;
276
277         if (!fParserSettings) {
278             // parser settings have not been changed
279
fNamespaceContext.reset();
280             // setup dispatcher
281
setScannerState(SCANNER_STATE_XML_DECL);
282             setDispatcher(fXMLDeclDispatcher);
283             return;
284         }
285
286         // xerces features
287
try {
288             fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD);
289         }
290         catch (XMLConfigurationException e) {
291             fLoadExternalDTD = true;
292         }
293         try {
294             fDisallowDoctype = componentManager.getFeature(DISALLOW_DOCTYPE_DECL_FEATURE);
295         }
296         catch (XMLConfigurationException e) {
297             fDisallowDoctype = false;
298         }
299
300         // xerces properties
301
fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER);
302         try {
303             fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER);
304         }
305         catch (XMLConfigurationException e) {
306             fValidationManager = null;
307         }
308
309         try {
310             fNamespaceContext = (NamespaceContext)componentManager.getProperty(NAMESPACE_CONTEXT);
311         }
312         catch (XMLConfigurationException e) { }
313         if (fNamespaceContext == null) {
314             fNamespaceContext = new NamespaceSupport();
315         }
316         fNamespaceContext.reset();
317         
318         // setup dispatcher
319
setScannerState(SCANNER_STATE_XML_DECL);
320         setDispatcher(fXMLDeclDispatcher);
321
322     } // reset(XMLComponentManager)
323

324     /**
325      * Returns a list of feature identifiers that are recognized by
326      * this component. This method may return null if no features
327      * are recognized by this component.
328      */

329     public String JavaDoc[] getRecognizedFeatures() {
330         String JavaDoc[] featureIds = super.getRecognizedFeatures();
331         int length = featureIds != null ? featureIds.length : 0;
332         String JavaDoc[] combinedFeatureIds = new String JavaDoc[length + RECOGNIZED_FEATURES.length];
333         if (featureIds != null) {
334             System.arraycopy(featureIds, 0, combinedFeatureIds, 0, featureIds.length);
335         }
336         System.arraycopy(RECOGNIZED_FEATURES, 0, combinedFeatureIds, length, RECOGNIZED_FEATURES.length);
337         return combinedFeatureIds;
338     } // getRecognizedFeatures():String[]
339

340     /**
341      * Sets the state of a feature. This method is called by the component
342      * manager any time after reset when a feature changes state.
343      * <p>
344      * <strong>Note:</strong> Components should silently ignore features
345      * that do not affect the operation of the component.
346      *
347      * @param featureId The feature identifier.
348      * @param state The state of the feature.
349      *
350      * @throws SAXNotRecognizedException The component should not throw
351      * this exception.
352      * @throws SAXNotSupportedException The component should not throw
353      * this exception.
354      */

355     public void setFeature(String JavaDoc featureId, boolean state)
356         throws XMLConfigurationException {
357
358         super.setFeature(featureId, state);
359
360         // Xerces properties
361
if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
362             final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length();
363             
364             if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() &&
365                 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) {
366                 fLoadExternalDTD = state;
367                 return;
368             }
369             else if (suffixLength == Constants.DISALLOW_DOCTYPE_DECL_FEATURE.length() &&
370                 featureId.endsWith(Constants.DISALLOW_DOCTYPE_DECL_FEATURE)) {
371                 fDisallowDoctype = state;
372                 return;
373             }
374         }
375
376     } // setFeature(String,boolean)
377

378     /**
379      * Returns a list of property identifiers that are recognized by
380      * this component. This method may return null if no properties
381      * are recognized by this component.
382      */

383     public String JavaDoc[] getRecognizedProperties() {
384         String JavaDoc[] propertyIds = super.getRecognizedProperties();
385         int length = propertyIds != null ? propertyIds.length : 0;
386         String JavaDoc[] combinedPropertyIds = new String JavaDoc[length + RECOGNIZED_PROPERTIES.length];
387         if (propertyIds != null) {
388             System.arraycopy(propertyIds, 0, combinedPropertyIds, 0, propertyIds.length);
389         }
390         System.arraycopy(RECOGNIZED_PROPERTIES, 0, combinedPropertyIds, length, RECOGNIZED_PROPERTIES.length);
391         return combinedPropertyIds;
392     } // getRecognizedProperties():String[]
393

394     /**
395      * Sets the value of a property. This method is called by the component
396      * manager any time after reset when a property changes value.
397      * <p>
398      * <strong>Note:</strong> Components should silently ignore properties
399      * that do not affect the operation of the component.
400      *
401      * @param propertyId The property identifier.
402      * @param value The value of the property.
403      *
404      * @throws SAXNotRecognizedException The component should not throw
405      * this exception.
406      * @throws SAXNotSupportedException The component should not throw
407      * this exception.
408      */

409     public void setProperty(String JavaDoc propertyId, Object JavaDoc value)
410         throws XMLConfigurationException {
411
412         super.setProperty(propertyId, value);
413
414         // Xerces properties
415
if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
416             final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
417             
418             if (suffixLength == Constants.DTD_SCANNER_PROPERTY.length() &&
419                 propertyId.endsWith(Constants.DTD_SCANNER_PROPERTY)) {
420                 fDTDScanner = (XMLDTDScanner)value;
421             }
422             if (suffixLength == Constants.NAMESPACE_CONTEXT_PROPERTY.length() &&
423                 propertyId.endsWith(Constants.NAMESPACE_CONTEXT_PROPERTY)) {
424                 if (value != null) {
425                     fNamespaceContext = (NamespaceContext)value;
426                 }
427             }
428
429             return;
430         }
431
432     } // setProperty(String,Object)
433

434     /**
435      * Returns the default state for a feature, or null if this
436      * component does not want to report a default value for this
437      * feature.
438      *
439      * @param featureId The feature identifier.
440      *
441      * @since Xerces 2.2.0
442      */

443     public Boolean JavaDoc getFeatureDefault(String JavaDoc featureId) {
444
445         for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
446             if (RECOGNIZED_FEATURES[i].equals(featureId)) {
447                 return FEATURE_DEFAULTS[i];
448             }
449         }
450         return super.getFeatureDefault(featureId);
451     } // getFeatureDefault(String):Boolean
452

453     /**
454      * Returns the default state for a property, or null if this
455      * component does not want to report a default value for this
456      * property.
457      *
458      * @param propertyId The property identifier.
459      *
460      * @since Xerces 2.2.0
461      */

462     public Object JavaDoc getPropertyDefault(String JavaDoc propertyId) {
463         for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
464             if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
465                 return PROPERTY_DEFAULTS[i];
466             }
467         }
468         return super.getPropertyDefault(propertyId);
469     } // getPropertyDefault(String):Object
470

471     //
472
// XMLEntityHandler methods
473
//
474

475     /**
476      * This method notifies of the start of an entity. The DTD has the
477      * pseudo-name of "[dtd]" parameter entity names start with '%'; and
478      * general entities are just specified by their name.
479      *
480      * @param name The name of the entity.
481      * @param identifier The resource identifier.
482      * @param encoding The auto-detected IANA encoding name of the entity
483      * stream. This value will be null in those situations
484      * where the entity encoding is not auto-detected (e.g.
485      * internal entities or a document entity that is
486      * parsed from a java.io.Reader).
487      *
488      * @throws XNIException Thrown by handler to signal an error.
489      */

490     public void startEntity(String JavaDoc name,
491                             XMLResourceIdentifier identifier,
492                             String JavaDoc encoding, Augmentations augs) throws XNIException {
493
494         super.startEntity(name, identifier, encoding, augs);
495
496         // prepare to look for a TextDecl if external general entity
497
if (!name.equals("[xml]") && fEntityScanner.isExternal()) {
498             setScannerState(SCANNER_STATE_TEXT_DECL);
499         }
500
501         // call handler
502
if (fDocumentHandler != null && name.equals("[xml]")) {
503             fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null);
504         }
505
506     } // startEntity(String,identifier,String)
507

508     /**
509      * This method notifies the end of an entity. The DTD has the pseudo-name
510      * of "[dtd]" parameter entity names start with '%'; and general entities
511      * are just specified by their name.
512      *
513      * @param name The name of the entity.
514      *
515      * @throws XNIException Thrown by handler to signal an error.
516      */

517     public void endEntity(String JavaDoc name, Augmentations augs) throws XNIException {
518
519         super.endEntity(name, augs);
520
521         // call handler
522
if (fDocumentHandler != null && name.equals("[xml]")) {
523             fDocumentHandler.endDocument(null);
524         }
525
526     } // endEntity(String)
527

528     //
529
// Protected methods
530
//
531

532     // dispatcher factory methods
533

534     /** Creates a content dispatcher. */
535     protected Dispatcher createContentDispatcher() {
536         return new ContentDispatcher();
537     } // createContentDispatcher():Dispatcher
538

539     // scanning methods
540

541     /** Scans a doctype declaration. */
542     protected boolean scanDoctypeDecl() throws IOException JavaDoc, XNIException {
543
544         // spaces
545
if (!fEntityScanner.skipSpaces()) {
546             reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL",
547                              null);
548         }
549
550         // root element name
551
fDoctypeName = fEntityScanner.scanName();
552         if (fDoctypeName == null) {
553             reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null);
554         }
555
556         // external id
557
if (fEntityScanner.skipSpaces()) {
558             scanExternalID(fStrings, false);
559             fDoctypeSystemId = fStrings[0];
560             fDoctypePublicId = fStrings[1];
561             fEntityScanner.skipSpaces();
562         }
563
564         fHasExternalDTD = fDoctypeSystemId != null;
565         
566         // Attempt to locate an external subset with an external subset resolver.
567
if (!fHasExternalDTD && fExternalSubsetResolver != null) {
568             fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null);
569             fDTDDescription.setRootName(fDoctypeName);
570             fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(fDTDDescription);
571             fHasExternalDTD = fExternalSubsetSource != null;
572         }
573
574         // call handler
575
if (fDocumentHandler != null) {
576             // NOTE: I don't like calling the doctypeDecl callback until
577
// end of the *full* doctype line (including internal
578
// subset) is parsed correctly but SAX2 requires that
579
// it knows the root element name and public and system
580
// identifier for the startDTD call. -Ac
581
if (fExternalSubsetSource == null) {
582                 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null);
583             }
584             else {
585                 fDocumentHandler.doctypeDecl(fDoctypeName, fExternalSubsetSource.getPublicId(), fExternalSubsetSource.getSystemId(), null);
586             }
587         }
588
589         // is there an internal subset?
590
boolean internalSubset = true;
591         if (!fEntityScanner.skipChar('[')) {
592             internalSubset = false;
593             fEntityScanner.skipSpaces();
594             if (!fEntityScanner.skipChar('>')) {
595                 reportFatalError("DoctypedeclUnterminated", new Object JavaDoc[]{fDoctypeName});
596             }
597             fMarkupDepth--;
598         }
599
600         return internalSubset;
601
602     } // scanDoctypeDecl():boolean
603

604     //
605
// Private methods
606
//
607

608     /** Returns the scanner state name. */
609     protected String JavaDoc getScannerStateName(int state) {
610
611         switch (state) {
612             case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL";
613             case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG";
614             case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC";
615             case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS";
616             case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL";
617             case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS";
618         }
619         return super.getScannerStateName(state);
620
621     } // getScannerStateName(int):String
622

623     //
624
// Classes
625
//
626

627     /**
628      * Dispatcher to handle XMLDecl scanning.
629      *
630      * @author Andy Clark, IBM
631      */

632     protected final class XMLDeclDispatcher
633         implements Dispatcher {
634
635         //
636
// Dispatcher methods
637
//
638

639         /**
640          * Dispatch an XML "event".
641          *
642          * @param complete True if this dispatcher is intended to scan
643          * and dispatch as much as possible.
644          *
645          * @return True if there is more to dispatch either from this
646          * or a another dispatcher.
647          *
648          * @throws IOException Thrown on i/o error.
649          * @throws XNIException Thrown on parse error.
650          */

651         public boolean dispatch(boolean complete)
652             throws IOException JavaDoc, XNIException {
653
654             // next dispatcher is prolog regardless of whether there
655
// is an XMLDecl in this document
656
setScannerState(SCANNER_STATE_PROLOG);
657             setDispatcher(fPrologDispatcher);
658
659             // scan XMLDecl
660
try {
661                 if (fEntityScanner.skipString("<?xml")) {
662                     fMarkupDepth++;
663                     // NOTE: special case where document starts with a PI
664
// whose name starts with "xml" (e.g. "xmlfoo")
665
if (XMLChar.isName(fEntityScanner.peekChar())) {
666                         fStringBuffer.clear();
667                         fStringBuffer.append("xml");
668                         if (fNamespaces) {
669                             while (XMLChar.isNCName(fEntityScanner.peekChar())) {
670                                 fStringBuffer.append((char)fEntityScanner.scanChar());
671                             }
672                         }
673                         else {
674                             while (XMLChar.isName(fEntityScanner.peekChar())) {
675                                 fStringBuffer.append((char)fEntityScanner.scanChar());
676                             }
677                         }
678                         String JavaDoc target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length);
679                         scanPIData(target, fString);
680                     }
681
682                     // standard XML declaration
683
else {
684                         scanXMLDeclOrTextDecl(false);
685                     }
686                 }
687                 fEntityManager.fCurrentEntity.mayReadChunks = true;
688
689                 // if no XMLDecl, then scan piece of prolog
690
return true;
691             }
692             // encoding errors
693
catch (MalformedByteSequenceException e) {
694                 fErrorReporter.reportError(e.getDomain(), e.getKey(),
695                     e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR);
696                 return false;
697             }
698             catch (CharConversionException JavaDoc e) {
699                 reportFatalError("CharConversionFailure", null);
700                 return false;
701             }
702             // premature end of file
703
catch (EOFException JavaDoc e) {
704                 reportFatalError("PrematureEOF", null);
705                 return false;
706                 //throw e;
707
}
708
709
710         } // dispatch(boolean):boolean
711

712     } // class XMLDeclDispatcher
713

714     /**
715      * Dispatcher to handle prolog scanning.
716      *
717      * @author Andy Clark, IBM
718      */

719     protected final class PrologDispatcher
720         implements Dispatcher {
721
722         //
723
// Dispatcher methods
724
//
725

726         /**
727          * Dispatch an XML "event".
728          *
729          * @param complete True if this dispatcher is intended to scan
730          * and dispatch as much as possible.
731          *
732          * @return True if there is more to dispatch either from this
733          * or a another dispatcher.
734          *
735          * @throws IOException Thrown on i/o error.
736          * @throws XNIException Thrown on parse error.
737          */

738         public boolean dispatch(boolean complete)
739             throws IOException JavaDoc, XNIException {
740
741             try {
742                 boolean again;
743                 do {
744                     again = false;
745                     switch (fScannerState) {
746                         case SCANNER_STATE_PROLOG: {
747                             fEntityScanner.skipSpaces();
748                             if (fEntityScanner.skipChar('<')) {
749                                 setScannerState(SCANNER_STATE_START_OF_MARKUP);
750                                 again = true;
751                             }
752                             else if (fEntityScanner.skipChar('&')) {
753                                 setScannerState(SCANNER_STATE_REFERENCE);
754                                 again = true;
755                             }
756                             else {
757                                 setScannerState(SCANNER_STATE_CONTENT);
758                                 again = true;
759                             }
760                             break;
761                         }
762                         case SCANNER_STATE_START_OF_MARKUP: {
763                             fMarkupDepth++;
764                             if (fEntityScanner.skipChar('!')) {
765                                 if (fEntityScanner.skipChar('-')) {
766                                     if (!fEntityScanner.skipChar('-')) {
767                                         reportFatalError("InvalidCommentStart",
768                                                          null);
769                                     }
770                                     setScannerState(SCANNER_STATE_COMMENT);
771                                     again = true;
772                                 }
773                                 else if (fEntityScanner.skipString("DOCTYPE")) {
774                                     setScannerState(SCANNER_STATE_DOCTYPE);
775                                     again = true;
776                                 }
777                                 else {
778                                     reportFatalError("MarkupNotRecognizedInProlog",
779                                                      null);
780                                 }
781                             }
782                             else if (isValidNameStartChar(fEntityScanner.peekChar())) {
783                                 setScannerState(SCANNER_STATE_ROOT_ELEMENT);
784                                 setDispatcher(fContentDispatcher);
785                                 return true;
786                             }
787                             else if (fEntityScanner.skipChar('?')) {
788                                 setScannerState(SCANNER_STATE_PI);
789                                 again = true;
790                             }
791                             else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
792                                 setScannerState(SCANNER_STATE_ROOT_ELEMENT);
793                                 setDispatcher(fContentDispatcher);
794                                 return true;
795                             }
796                             else {
797                                 reportFatalError("MarkupNotRecognizedInProlog",
798                                                  null);
799                             }
800                             break;
801                         }
802                         case SCANNER_STATE_COMMENT: {
803                             scanComment();
804                             setScannerState(SCANNER_STATE_PROLOG);
805                             break;
806                         }
807                         case SCANNER_STATE_PI: {
808                             scanPI();
809                             setScannerState(SCANNER_STATE_PROLOG);
810                             break;
811                         }
812                         case SCANNER_STATE_DOCTYPE: {
813                             if (fDisallowDoctype) {
814                                 reportFatalError("DoctypeNotAllowed", null);
815                             }
816                             if (fSeenDoctypeDecl) {
817                                 reportFatalError("AlreadySeenDoctype", null);
818                             }
819                             fSeenDoctypeDecl = true;
820
821                             // scanDoctypeDecl() sends XNI doctypeDecl event that
822
// in SAX is converted to startDTD() event.
823
if (scanDoctypeDecl()) {
824                                 setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS);
825                                 setDispatcher(fDTDDispatcher);
826                                 return true;
827                             }
828                             
829                             // handle external subset
830
if (fDoctypeSystemId != null) {
831                                 if (((fValidation || fLoadExternalDTD)
832                                     && (fValidationManager == null || !fValidationManager.isCachedDTD()))) {
833                                     setScannerState(SCANNER_STATE_DTD_EXTERNAL);
834                                     setDispatcher(fDTDDispatcher);
835                                     return true;
836                                 }
837                             }
838                             else if (fExternalSubsetSource != null) {
839                                 if (((fValidation || fLoadExternalDTD)
840                                     && (fValidationManager == null || !fValidationManager.isCachedDTD()))) {
841                                     // This handles the case of a DOCTYPE that had neither an internal subset or an external subset.
842
fDTDScanner.setInputSource(fExternalSubsetSource);
843                                     fExternalSubsetSource = null;
844                                     setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
845                                     setDispatcher(fDTDDispatcher);
846                                     return true;
847                                 }
848                             }
849                             
850                             // Send endDTD() call if:
851
// a) systemId is null or if an external subset resolver could not locate an external subset.
852
// b) "load-external-dtd" and validation are false
853
// c) DTD grammar is cached
854

855                             // in XNI this results in 3 events: doctypeDecl, startDTD, endDTD
856
// in SAX this results in 2 events: startDTD, endDTD
857
fDTDScanner.setInputSource(null);
858                             setScannerState(SCANNER_STATE_PROLOG);
859                             break;
860                         }
861                         case SCANNER_STATE_CONTENT: {
862                             reportFatalError("ContentIllegalInProlog", null);
863                             fEntityScanner.scanChar();
864                         }
865                         case SCANNER_STATE_REFERENCE: {
866                             reportFatalError("ReferenceIllegalInProlog", null);
867                         }
868                     }
869                 } while (complete || again);
870
871                 if (complete) {
872                     if (fEntityScanner.scanChar() != '<') {
873                         reportFatalError("RootElementRequired", null);
874                     }
875                     setScannerState(SCANNER_STATE_ROOT_ELEMENT);
876                     setDispatcher(fContentDispatcher);
877                 }
878             }
879             // encoding errors
880
catch (MalformedByteSequenceException e) {
881                 fErrorReporter.reportError(e.getDomain(), e.getKey(),
882                     e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR);
883                 return false;
884             }
885             catch (CharConversionException JavaDoc e) {
886                 reportFatalError("CharConversionFailure", null);
887                 return false;
888             }
889             // premature end of file
890
catch (EOFException JavaDoc e) {
891                 reportFatalError("PrematureEOF", null);
892                 return false;
893                 //throw e;
894
}
895
896             return true;
897
898         } // dispatch(boolean):boolean
899

900     } // class PrologDispatcher
901

902     /**
903      * Dispatcher to handle the internal and external DTD subsets.
904      *
905      * @author Andy Clark, IBM
906      */

907     protected final class DTDDispatcher
908         implements Dispatcher {
909
910         //
911
// Dispatcher methods
912
//
913

914         /**
915          * Dispatch an XML "event".
916          *
917          * @param complete True if this dispatcher is intended to scan
918          * and dispatch as much as possible.
919          *
920          * @return True if there is more to dispatch either from this
921          * or a another dispatcher.
922          *
923          * @throws IOException Thrown on i/o error.
924          * @throws XNIException Thrown on parse error.
925          */

926         public boolean dispatch(boolean complete)
927             throws IOException JavaDoc, XNIException {
928             fEntityManager.setEntityHandler(null);
929             try {
930                 boolean again;
931                 do {
932                     again = false;
933                     switch (fScannerState) {
934                         case SCANNER_STATE_DTD_INTERNAL_DECLS: {
935                             // REVISIT: Should there be a feature for
936
// the "complete" parameter?
937
boolean completeDTD = true;
938
939                             boolean moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD && fLoadExternalDTD);
940                             if (!moreToScan) {
941                                 // end doctype declaration
942
if (!fEntityScanner.skipChar(']')) {
943                                     reportFatalError("EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET",
944                                                      null);
945                                 }
946                                 fEntityScanner.skipSpaces();
947                                 if (!fEntityScanner.skipChar('>')) {
948                                     reportFatalError("DoctypedeclUnterminated", new Object JavaDoc[]{fDoctypeName});
949                                 }
950                                 fMarkupDepth--;
951
952                                 // scan external subset next
953
if (fDoctypeSystemId != null) {
954                                     if ((fValidation || fLoadExternalDTD)
955                                         && (fValidationManager == null || !fValidationManager.isCachedDTD())) {
956                                         setScannerState(SCANNER_STATE_DTD_EXTERNAL);
957                                         break;
958                                     }
959                                 }
960                                 else if (fExternalSubsetSource != null) {
961                                     if ((fValidation || fLoadExternalDTD)
962                                         && (fValidationManager == null || !fValidationManager.isCachedDTD())) {
963                                         // This handles the case of a DOCTYPE that only had an internal subset.
964
fDTDScanner.setInputSource(fExternalSubsetSource);
965                                         fExternalSubsetSource = null;
966                                         setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
967                                         break;
968                                     }
969                                 }
970                                 
971                                 // break out of this dispatcher.
972
setScannerState(SCANNER_STATE_PROLOG);
973                                 setDispatcher(fPrologDispatcher);
974                                 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
975                                 return true;
976                             }
977                             break;
978                         }
979                         case SCANNER_STATE_DTD_EXTERNAL: {
980                             fDTDDescription.setValues(fDoctypePublicId, fDoctypeSystemId, null, null);
981                             fDTDDescription.setRootName(fDoctypeName);
982                             XMLInputSource xmlInputSource =
983                                 fEntityManager.resolveEntity(fDTDDescription);
984                             fDTDScanner.setInputSource(xmlInputSource);
985                             setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
986                             again = true;
987                             break;
988                         }
989                         case SCANNER_STATE_DTD_EXTERNAL_DECLS: {
990                             // REVISIT: Should there be a feature for
991
// the "complete" parameter?
992
boolean completeDTD = true;
993                             boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD);
994                             if (!moreToScan) {
995                                 setScannerState(SCANNER_STATE_PROLOG);
996                                 setDispatcher(fPrologDispatcher);
997                                 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
998                                 return true;
999                             }
1000                            break;
1001                        }
1002                        default: {
1003                            throw new XNIException("DTDDispatcher#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')');
1004                        }
1005                    }
1006                } while (complete || again);
1007            }
1008            // encoding errors
1009
catch (MalformedByteSequenceException e) {
1010                fErrorReporter.reportError(e.getDomain(), e.getKey(),
1011                    e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR);
1012                return false;
1013            }
1014            catch (CharConversionException JavaDoc e) {
1015                reportFatalError("CharConversionFailure", null);
1016                return false;
1017            }
1018            // premature end of file
1019
catch (EOFException JavaDoc e) {
1020                reportFatalError("PrematureEOF", null);
1021                return false;
1022                //throw e;
1023
}
1024
1025            // cleanup
1026
finally {
1027                fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
1028            }
1029
1030            return true;
1031
1032        } // dispatch(boolean):boolean
1033

1034    } // class DTDDispatcher
1035

1036    /**
1037     * Dispatcher to handle content scanning.
1038     *
1039     * @author Andy Clark, IBM
1040     * @author Eric Ye, IBM
1041     */

1042    protected class ContentDispatcher
1043        extends FragmentContentDispatcher {
1044
1045        //
1046
// Protected methods
1047
//
1048

1049        // hooks
1050

1051        // NOTE: These hook methods are added so that the full document
1052
// scanner can share the majority of code with this class.
1053

1054        /**
1055         * Scan for DOCTYPE hook. This method is a hook for subclasses
1056         * to add code to handle scanning for a the "DOCTYPE" string
1057         * after the string "<!" has been scanned.
1058         *
1059         * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
1060         * was not scanned.
1061         */

1062        protected boolean scanForDoctypeHook()
1063            throws IOException JavaDoc, XNIException {
1064
1065            if (fEntityScanner.skipString("DOCTYPE")) {
1066                setScannerState(SCANNER_STATE_DOCTYPE);
1067                return true;
1068            }
1069            return false;
1070
1071        } // scanForDoctypeHook():boolean
1072

1073        /**
1074         * Element depth iz zero. This methos is a hook for subclasses
1075         * to add code to handle when the element depth hits zero. When
1076         * scanning a document fragment, an element depth of zero is
1077         * normal. However, when scanning a full XML document, the
1078         * scanner must handle the trailing miscellanous section of
1079         * the document after the end of the document's root element.
1080         *
1081         * @return True if the caller should stop and return true which
1082         * allows the scanner to switch to a new scanning
1083         * dispatcher. A return value of false indicates that
1084         * the content dispatcher should continue as normal.
1085         */

1086        protected boolean elementDepthIsZeroHook()
1087            throws IOException JavaDoc, XNIException {
1088
1089            setScannerState(SCANNER_STATE_TRAILING_MISC);
1090            setDispatcher(fTrailingMiscDispatcher);
1091            return true;
1092
1093        } // elementDepthIsZeroHook():boolean
1094

1095        /**
1096         * Scan for root element hook. This method is a hook for
1097         * subclasses to add code that handles scanning for the root
1098         * element. When scanning a document fragment, there is no
1099         * "root" element. However, when scanning a full XML document,
1100         * the scanner must handle the root element specially.
1101         *
1102         * @return True if the caller should stop and return true which
1103         * allows the scanner to switch to a new scanning
1104         * dispatcher. A return value of false indicates that
1105         * the content dispatcher should continue as normal.
1106         */

1107        protected boolean scanRootElementHook()
1108            throws IOException JavaDoc, XNIException {
1109
1110            if (fExternalSubsetResolver != null && !fSeenDoctypeDecl
1111                && !fDisallowDoctype && (fValidation || fLoadExternalDTD)) {
1112                scanStartElementName();
1113                resolveExternalSubsetAndRead();
1114                if (scanStartElementAfterName()) {
1115                    setScannerState(SCANNER_STATE_TRAILING_MISC);
1116                    setDispatcher(fTrailingMiscDispatcher);
1117                    return true;
1118                }
1119            }
1120            else if (scanStartElement()) {
1121                setScannerState(SCANNER_STATE_TRAILING_MISC);
1122                setDispatcher(fTrailingMiscDispatcher);
1123                return true;
1124            }
1125            return false;
1126
1127        } // scanRootElementHook():boolean
1128

1129        /**
1130         * End of file hook. This method is a hook for subclasses to
1131         * add code that handles the end of file. The end of file in
1132         * a document fragment is OK if the markup depth is zero.
1133         * However, when scanning a full XML document, an end of file
1134         * is always premature.
1135         */

1136        protected void endOfFileHook(EOFException JavaDoc e)
1137            throws IOException JavaDoc, XNIException {
1138
1139            reportFatalError("PrematureEOF", null);
1140            // in case continue-after-fatal-error set, should not do this...
1141
//throw e;
1142

1143        } // endOfFileHook()
1144

1145        /**
1146         * <p>Attempt to locate an external subset for a document that does not otherwise
1147         * have one. If an external subset is located, then it is scanned.</p>
1148         */

1149        protected void resolveExternalSubsetAndRead()
1150            throws IOException JavaDoc, XNIException {
1151            
1152            fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null);
1153            fDTDDescription.setRootName(fElementQName.rawname);
1154            XMLInputSource src = fExternalSubsetResolver.getExternalSubset(fDTDDescription);
1155            
1156            if (src != null) {
1157                fDoctypeName = fElementQName.rawname;
1158                fDoctypePublicId = src.getPublicId();
1159                fDoctypeSystemId = src.getSystemId();
1160                // call document handler
1161
if (fDocumentHandler != null) {
1162                    // This inserts a doctypeDecl event into the stream though no
1163
// DOCTYPE existed in the instance document.
1164
fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null);
1165                }
1166                try {
1167                    if (fValidationManager == null || !fValidationManager.isCachedDTD()) {
1168                        fDTDScanner.setInputSource(src);
1169                        while (fDTDScanner.scanDTDExternalSubset(true));
1170                    }
1171                    else {
1172                        // This sends startDTD and endDTD calls down the pipeline.
1173
fDTDScanner.setInputSource(null);
1174                    }
1175                }
1176                finally {
1177                    fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
1178                }
1179            }
1180        } // resolveExternalSubsetAndRead()
1181

1182    } // class ContentDispatcher
1183

1184    /**
1185     * Dispatcher to handle trailing miscellaneous section scanning.
1186     *
1187     * @author Andy Clark, IBM
1188     * @author Eric Ye, IBM
1189     */

1190    protected final class TrailingMiscDispatcher
1191        implements Dispatcher {
1192
1193        //
1194
// Dispatcher methods
1195
//
1196

1197        /**
1198         * Dispatch an XML "event".
1199         *
1200         * @param complete True if this dispatcher is intended to scan
1201         * and dispatch as much as possible.
1202         *
1203         * @return True if there is more to dispatch either from this
1204         * or a another dispatcher.
1205         *
1206         * @throws IOException Thrown on i/o error.
1207         * @throws XNIException Thrown on parse error.
1208         */

1209        public boolean dispatch(boolean complete)
1210            throws IOException JavaDoc, XNIException {
1211
1212            try {
1213                boolean again;
1214                do {
1215                    again = false;
1216                    switch (fScannerState) {
1217                        case SCANNER_STATE_TRAILING_MISC: {
1218                            fEntityScanner.skipSpaces();
1219                            if (fEntityScanner.skipChar('<')) {
1220                                setScannerState(SCANNER_STATE_START_OF_MARKUP);
1221                                again = true;
1222                            }
1223                            else {
1224                                setScannerState(SCANNER_STATE_CONTENT);
1225                                again = true;
1226                            }
1227                            break;
1228                        }
1229                        case SCANNER_STATE_START_OF_MARKUP: {
1230                            fMarkupDepth++;
1231                            if (fEntityScanner.skipChar('?')) {
1232                                setScannerState(SCANNER_STATE_PI);
1233                                again = true;
1234                            }
1235                            else if (fEntityScanner.skipChar('!')) {
1236                                setScannerState(SCANNER_STATE_COMMENT);
1237                                again = true;
1238                            }
1239                            else if (fEntityScanner.skipChar('/')) {
1240                                reportFatalError("MarkupNotRecognizedInMisc",
1241                                                 null);
1242                                again = true;
1243                            }
1244                            else if (isValidNameStartChar(fEntityScanner.peekChar())) {
1245                                reportFatalError("MarkupNotRecognizedInMisc",
1246                                                 null);
1247                                scanStartElement();
1248                                setScannerState(SCANNER_STATE_CONTENT);
1249                            }
1250                            else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
1251                                reportFatalError("MarkupNotRecognizedInMisc",
1252                                                 null);
1253                                scanStartElement();
1254                                setScannerState(SCANNER_STATE_CONTENT);
1255                            }
1256                            else {
1257                                reportFatalError("MarkupNotRecognizedInMisc",
1258                                                 null);
1259                            }
1260                            break;
1261                        }
1262                        case SCANNER_STATE_PI: {
1263                            scanPI();
1264                            setScannerState(SCANNER_STATE_TRAILING_MISC);
1265                            break;
1266                        }
1267                        case SCANNER_STATE_COMMENT: {
1268                            if (!fEntityScanner.skipString("--")) {
1269                                reportFatalError("InvalidCommentStart", null);
1270                            }
1271                            scanComment();
1272                            setScannerState(SCANNER_STATE_TRAILING_MISC);
1273                            break;
1274                        }
1275                        case SCANNER_STATE_CONTENT: {
1276                            int ch = fEntityScanner.peekChar();
1277                            if (ch == -1) {
1278                                setScannerState(SCANNER_STATE_TERMINATED);
1279                                return false;
1280                            }
1281                            reportFatalError("ContentIllegalInTrailingMisc",
1282                                             null);
1283                            fEntityScanner.scanChar();
1284                            setScannerState(SCANNER_STATE_TRAILING_MISC);
1285                            break;
1286                        }
1287                        case SCANNER_STATE_REFERENCE: {
1288                            reportFatalError("ReferenceIllegalInTrailingMisc",
1289                                             null);
1290                            setScannerState(SCANNER_STATE_TRAILING_MISC);
1291                            break;
1292                        }
1293                        case SCANNER_STATE_TERMINATED: {
1294                            return false;
1295                        }
1296                    }
1297                } while (complete || again);
1298            }
1299            // encoding errors
1300
catch (MalformedByteSequenceException e) {
1301                fErrorReporter.reportError(e.getDomain(), e.getKey(),
1302                    e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR);
1303                return false;
1304            }
1305            catch (CharConversionException JavaDoc e) {
1306                reportFatalError("CharConversionFailure", null);
1307                return false;
1308            }
1309            catch (EOFException JavaDoc e) {
1310                // NOTE: This is the only place we're allowed to reach
1311
// the real end of the document stream. Unless the
1312
// end of file was reached prematurely.
1313
if (fMarkupDepth != 0) {
1314                    reportFatalError("PrematureEOF", null);
1315                    return false;
1316                    //throw e;
1317
}
1318
1319                setScannerState(SCANNER_STATE_TERMINATED);
1320                return false;
1321            }
1322
1323            return true;
1324
1325        } // dispatch(boolean):boolean
1326

1327    } // class TrailingMiscDispatcher
1328

1329} // class XMLDocumentScannerImpl
1330
Popular Tags