KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xerces > framework > XMLDocumentScanner


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.enhydra.apache.xerces.framework;
59
60 import org.enhydra.apache.xerces.readers.DefaultEntityHandler;
61 import org.enhydra.apache.xerces.readers.XMLEntityHandler;
62 import org.enhydra.apache.xerces.utils.ChunkyCharArray;
63 import org.enhydra.apache.xerces.utils.QName;
64 import org.enhydra.apache.xerces.utils.StringPool;
65 import org.enhydra.apache.xerces.utils.XMLCharacterProperties;
66 import org.enhydra.apache.xerces.utils.XMLMessages;
67 import org.enhydra.apache.xerces.validators.common.GrammarResolver;
68
69 /**
70  * This class recognizes most of the grammer for an XML processor.
71  * Additional support is provided by the XMLEntityHandler, via the
72  * XMLEntityReader instances it creates, which are used to process
73  * simple constructs like string literals and character data between
74  * markup. The XMLDTDScanner class contains the remaining support
75  * for the grammer of DTD declarations. When a &lt;!DOCTYPE ...&gt; is
76  * found in the document, the scanDoctypeDecl method will then be
77  * called and the XMLDocumentScanner subclass is responsible for
78  * "connecting" that method to the corresponding method provided
79  * by the XMLDTDScanner class.
80  *
81  * @version $Id: XMLDocumentScanner.java,v 1.2 2005/01/26 08:28:44 jkjome Exp $
82  */

83 public final class XMLDocumentScanner {
84     //
85
// Constants
86
//
87

88     //
89
// These character arrays are used as parameters for calls to the
90
// XMLEntityHandler.EntityReader skippedString() method. Some have
91
// package access for use by the inner dispatcher classes.
92
//
93

94     //
95
// [19] CDStart ::= '<![CDATA['
96
//
97
static final char[] cdata_string = { '[','C','D','A','T','A','[' };
98     //
99
// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
100
// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
101
//
102
static final char[] xml_string = { 'x','m','l' };
103     //
104
// [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
105
//
106
private static final char[] version_string = { 'v','e','r','s','i','o','n' };
107     //
108
// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
109
// ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
110
//
111
static final char[] doctype_string = { 'D','O','C','T','Y','P','E' };
112     //
113
// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
114
// | ('"' ('yes' | 'no') '"'))
115
//
116
private static final char[] standalone_string = { 's','t','a','n','d','a','l','o','n','e' };
117     //
118
// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
119
//
120
private static final char[] encoding_string = { 'e','n','c','o','d','i','n','g' };
121
122     /*
123      * Return values for the EventHandler scanAttValue method.
124      */

125     public static final int
126         RESULT_SUCCESS = 0,
127         RESULT_FAILURE = -1,
128         RESULT_DUPLICATE_ATTR = -2;
129
130     /** Scanner states */
131     static final int
132         SCANNER_STATE_XML_DECL = 0,
133         SCANNER_STATE_START_OF_MARKUP = 1,
134         SCANNER_STATE_COMMENT = 2,
135         SCANNER_STATE_PI = 3,
136         SCANNER_STATE_DOCTYPE = 4,
137         SCANNER_STATE_PROLOG = 5,
138         SCANNER_STATE_ROOT_ELEMENT = 6,
139         SCANNER_STATE_CONTENT = 7,
140         SCANNER_STATE_REFERENCE = 8,
141         SCANNER_STATE_ATTRIBUTE_LIST = 9,
142         SCANNER_STATE_ATTRIBUTE_NAME = 10,
143         SCANNER_STATE_ATTRIBUTE_VALUE = 11,
144         SCANNER_STATE_TRAILING_MISC = 12,
145         SCANNER_STATE_END_OF_INPUT = 13,
146         SCANNER_STATE_TERMINATED = 14;
147
148     //
149
// Instance Variables
150
//
151
/***/
152     // NOTE: Used by old implementation of scanElementType method. -Ac
153
private StringPool.CharArrayRange fCurrentElementCharArrayRange = null;
154     /***/
155     int fAttrListHandle = -1;
156     XMLAttrList fAttrList = null;
157     GrammarResolver fGrammarResolver = null;
158     XMLDTDScanner fDTDScanner = null;
159     boolean fNamespacesEnabled = false;
160     boolean fValidationEnabled = false;
161     boolean fLoadExternalDTD = true;
162     QName fElementQName = new QName();
163     QName fAttributeQName = new QName();
164     QName fCurrentElementQName = new QName();
165     ScannerDispatcher fDispatcher = null;
166     EventHandler fEventHandler = null;
167     XMLDocumentHandler.DTDHandler fDTDHandler = null;
168     StringPool fStringPool = null;
169     XMLErrorReporter fErrorReporter = null;
170     XMLEntityHandler fEntityHandler = null;
171     XMLEntityHandler.EntityReader fEntityReader = null;
172     XMLEntityHandler.CharBuffer fLiteralData = null;
173     boolean fSeenRootElement = false;
174     boolean fSeenDoctypeDecl = false;
175     boolean fStandalone = false;
176     boolean fParseTextDecl = false;
177     boolean fScanningDTD = false;
178     int fScannerState = SCANNER_STATE_XML_DECL;
179     int fReaderId = -1;
180     int fAttValueReader = -1;
181     int fAttValueElementType = -1;
182     int fAttValueAttrName = -1;
183     int fAttValueOffset = -1;
184     int fAttValueMark = -1;
185     int fScannerMarkupDepth = 0;
186
187     //
188
// Interfaces
189
//
190

191     /**
192      * This interface must be implemented by the users of the XMLDocumentScanner class.
193      * These methods form the abstraction between the implementation semantics and the
194      * more generic task of scanning the XML non-DTD grammar.
195      */

196     public interface EventHandler {
197         /**
198          * Signal standalone = "yes"
199          *
200          * @exception java.lang.Exception
201          */

202         public void callStandaloneIsYes() throws Exception JavaDoc;
203
204         /**
205          * Signal the start of a document
206          *
207          * @exception java.lang.Exception
208          */

209         public void callStartDocument() throws Exception JavaDoc;
210         /**
211          * Signal the end of a document
212          *
213          * @exception java.lang.Exception
214          */

215         public void callEndDocument() throws Exception JavaDoc;
216         /**
217          * Signal the XML declaration of a document
218          *
219          * @param version the handle in the string pool for the version number
220          * @param encoding the handle in the string pool for the encoding
221          * @param standalong the handle in the string pool for the standalone value
222          * @exception java.lang.Exception
223          */

224         public void callXMLDecl(int version, int encoding, int standalone) throws Exception JavaDoc;
225         /**
226          * Signal the Text declaration of an external entity.
227          *
228          * @param version the handle in the string pool for the version number
229          * @param encoding the handle in the string pool for the encoding
230          * @exception java.lang.Exception
231          */

232         public void callTextDecl(int version, int encoding) throws Exception JavaDoc;
233         /**
234          * signal the scanning of a start element tag
235          *
236          * @param element Element name scanned.
237          * @exception java.lang.Exception
238          */

239         public void callStartElement(QName element) throws Exception JavaDoc;
240         /**
241          * Signal the scanning of an element name in a start element tag.
242          *
243          * @param element Element name scanned.
244          */

245         public void element(QName element) throws Exception JavaDoc;
246         /**
247          * Signal the scanning of an attribute associated to the previous
248          * start element tag.
249          *
250          * @param element Element name scanned.
251          * @param attrName Attribute name scanned.
252          * @param attrValue The string pool index of the attribute value.
253          */

254         public boolean attribute(QName element, QName attrName, int attrValue) throws Exception JavaDoc;
255         /**
256          * signal the scanning of an end element tag
257          *
258          * @param readerId the Id of the reader being used to scan the end tag.
259          * @exception java.lang.Exception
260          */

261         public void callEndElement(int readerId) throws Exception JavaDoc;
262         /**
263          * Signal the start of a CDATA section
264          * @exception java.lang.Exception
265          */

266         public void callStartCDATA() throws Exception JavaDoc;
267         /**
268          * Signal the end of a CDATA section
269          * @exception java.lang.Exception
270          */

271         public void callEndCDATA() throws Exception JavaDoc;
272         /**
273          * Report the scanning of character data
274          *
275          * @param ch the handle in the string pool of the character data that was scanned
276          * @exception java.lang.Exception
277          */

278         public void callCharacters(int ch) throws Exception JavaDoc;
279         /**
280          * Report the scanning of a processing instruction
281          *
282          * @param piTarget the handle in the string pool of the processing instruction targe
283          * @param piData the handle in the string pool of the processing instruction data
284          * @exception java.lang.Exception
285          */

286         public void callProcessingInstruction(int piTarget, int piData) throws Exception JavaDoc;
287         /**
288          * Report the scanning of a comment
289          *
290          * @param data the handle in the string pool of the comment text
291          * @exception java.lang.Exception
292          */

293         public void callComment(int data) throws Exception JavaDoc;
294     }
295
296     /**
297      * Constructor
298      */

299     public XMLDocumentScanner(StringPool stringPool,
300                               XMLErrorReporter errorReporter,
301                               XMLEntityHandler entityHandler,
302                               XMLEntityHandler.CharBuffer literalData) {
303         fStringPool = stringPool;
304         fErrorReporter = errorReporter;
305         fEntityHandler = entityHandler;
306         fLiteralData = literalData;
307         fDispatcher = new XMLDeclDispatcher();
308         fAttrList = new XMLAttrList(fStringPool);
309     }
310
311     /**
312      * Set the event handler
313      *
314      * @param eventHandler The place to send our callbacks.
315      */

316     public void setEventHandler(XMLDocumentScanner.EventHandler eventHandler) {
317         fEventHandler = eventHandler;
318     }
319
320     /** Set the DTD handler. */
321     public void setDTDHandler(XMLDocumentHandler.DTDHandler dtdHandler) {
322         fDTDHandler = dtdHandler;
323     }
324
325     /** Sets the grammar resolver. */
326     public void setGrammarResolver(GrammarResolver resolver) {
327         fGrammarResolver = resolver;
328     }
329
330     /**
331      * reset the parser so that the instance can be reused
332      *
333      * @param stringPool the string pool instance to be used by the reset parser
334      */

335     public void reset(StringPool stringPool, XMLEntityHandler.CharBuffer literalData) {
336         fStringPool = stringPool;
337         fLiteralData = literalData;
338         fParseTextDecl = false;
339         fSeenRootElement = false;
340         fSeenDoctypeDecl = false;
341         fStandalone = false;
342         fScanningDTD = false;
343         fDispatcher = new XMLDeclDispatcher();
344         fScannerState = SCANNER_STATE_XML_DECL;
345         fScannerMarkupDepth = 0;
346         fAttrList = new XMLAttrList(fStringPool);
347     }
348
349     //
350
// From the standard:
351
//
352
// [1] document ::= prolog element Misc*
353
//
354
// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
355
// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
356
// [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
357
//
358
// The beginning of XMLDecl simplifies to:
359
// '<?xml' S ...
360
//
361
// [27] Misc ::= Comment | PI | S
362
// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
363
// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
364
// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
365
//
366
// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
367
// ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
368
//
369
/**
370      * Entry point for parsing
371      *
372      * @param doItAll if true the entire document is parsed otherwise just
373      * the next segment of the document is parsed
374      */

375     public boolean parseSome(boolean doItAll) throws Exception JavaDoc
376     {
377         do {
378             if (!fDispatcher.dispatch(doItAll))
379                 return false;
380         } while (doItAll);
381         return true;
382     }
383
384     /**
385      * Change readers
386      *
387      * @param nextReader the new reader that the scanner will use
388      * @param nextReaderId id of the reader to change to
389      * @exception throws java.lang.Exception
390      */

391     public void readerChange(XMLEntityHandler.EntityReader nextReader, int nextReaderId) throws Exception JavaDoc {
392         fEntityReader = nextReader;
393         fReaderId = nextReaderId;
394         if (fScannerState == SCANNER_STATE_ATTRIBUTE_VALUE) {
395             fAttValueOffset = fEntityReader.currentOffset();
396             fAttValueMark = fAttValueOffset;
397         }
398
399         //also propagate the change to DTDScanner if there is one
400
if (fDTDScanner != null && fScanningDTD)
401             fDTDScanner.readerChange(nextReader, nextReaderId);
402     }
403
404     /**
405      * Handle the end of input
406      *
407      * @param entityName the handle in the string pool of the name of the entity which has reached end of input
408      * @param moreToFollow if true, there is still input left to process in other readers
409      * @exception java.lang.Exception
410      */

411     public void endOfInput(int entityName, boolean moreToFollow) throws Exception JavaDoc {
412         if (fDTDScanner != null && fScanningDTD){
413             fDTDScanner.endOfInput(entityName, moreToFollow);
414         }
415         fDispatcher.endOfInput(entityName, moreToFollow);
416     }
417
418     /**
419      * Tell if scanner has reached end of input
420      * @return true if scanner has reached end of input.
421      */

422     public boolean atEndOfInput() {
423         return fScannerState == SCANNER_STATE_END_OF_INPUT;
424     }
425
426     //
427
// [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
428
//
429
/**
430      * Scan an attribute value
431      *
432      * @param elementType handle to the element whose attribute value is being scanned
433      * @param attrName handle in the string pool of the name of attribute being scanned
434      * @param asSymbol controls whether the value is a string (duplicates allowed) or a symbol (duplicates not allowed)
435      * @return handle in the string pool of the scanned value
436      * @exception java.lang.Exception
437      */

438     public int scanAttValue(QName element, QName attribute, boolean asSymbol) throws Exception JavaDoc {
439         boolean single;
440         if (!(single = fEntityReader.lookingAtChar('\'', true)) && !fEntityReader.lookingAtChar('\"', true)) {
441             reportFatalXMLError(XMLMessages.MSG_QUOTE_REQUIRED_IN_ATTVALUE,
442                                 XMLMessages.P10_QUOTE_REQUIRED,
443                                 element.rawname,
444                                 attribute.rawname);
445             return -1;
446         }
447         char qchar = single ? '\'' : '\"';
448         fAttValueMark = fEntityReader.currentOffset();
449         int attValue = fEntityReader.scanAttValue(qchar, asSymbol);
450         if (attValue >= 0)
451             return attValue;
452         int previousState = setScannerState(SCANNER_STATE_ATTRIBUTE_VALUE);
453         fAttValueReader = fReaderId;
454         // REVISIT: What should this be?
455
fAttValueElementType = element.rawname;
456         // REVISIT: What should this be?
457
fAttValueAttrName = attribute.rawname;
458         fAttValueOffset = fEntityReader.currentOffset();
459         int dataOffset = fLiteralData.length();
460         if (fAttValueOffset - fAttValueMark > 0)
461             fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
462         fAttValueMark = fAttValueOffset;
463         boolean setMark = false;
464         boolean skippedCR;
465         while (true) {
466             if (fEntityReader.lookingAtChar(qchar, true)) {
467                 if (fReaderId == fAttValueReader)
468                     break;
469             } else if (fEntityReader.lookingAtChar(' ', true)) {
470                 //
471
// no action required
472
//
473
} else if ((skippedCR = fEntityReader.lookingAtChar((char)0x0D, true)) || fEntityReader.lookingAtSpace(true)) {
474                 if (fAttValueOffset - fAttValueMark > 0)
475                     fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
476                 setMark = true;
477                 fLiteralData.append(' ');
478                 if (skippedCR) {
479                     //
480
// REVISIT - HACK !!! code changed to pass incorrect OASIS test 'valid-sa-110'
481
// Uncomment the next line to conform to the spec...
482
//
483
//fEntityReader.lookingAtChar((char)0x0A, true);
484
}
485             } else if (fEntityReader.lookingAtChar('&', true)) {
486                 if (fAttValueOffset - fAttValueMark > 0)
487                     fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
488                 setMark = true;
489                 //
490
// Check for character reference first.
491
//
492
if (fEntityReader.lookingAtChar('#', true)) {
493                     int ch = scanCharRef();
494                     if (ch != -1) {
495                         if (ch < 0x10000)
496                             fLiteralData.append((char)ch);
497                         else {
498                             fLiteralData.append((char)(((ch-0x00010000)>>10)+0xd800));
499                             fLiteralData.append((char)(((ch-0x00010000)&0x3ff)+0xdc00));
500                         }
501                     }
502                 } else {
503                     //
504
// Entity reference
505
//
506
int nameOffset = fEntityReader.currentOffset();
507                     fEntityReader.skipPastName(';');
508                     int nameLength = fEntityReader.currentOffset() - nameOffset;
509                     if (nameLength == 0) {
510                         reportFatalXMLError(XMLMessages.MSG_NAME_REQUIRED_IN_REFERENCE,
511                                             XMLMessages.P68_NAME_REQUIRED);
512                     } else if (!fEntityReader.lookingAtChar(';', true)) {
513                         reportFatalXMLError(XMLMessages.MSG_SEMICOLON_REQUIRED_IN_REFERENCE,
514                                             XMLMessages.P68_SEMICOLON_REQUIRED,
515                                             fEntityReader.addString(nameOffset, nameLength));
516                     } else {
517                         int entityName = fEntityReader.addSymbol(nameOffset, nameLength);
518                         fEntityHandler.startReadingFromEntity(entityName, fScannerMarkupDepth, XMLEntityHandler.ENTITYREF_IN_ATTVALUE);
519                     }
520                 }
521             } else if (fEntityReader.lookingAtChar('<', true)) {
522                 if (fAttValueOffset - fAttValueMark > 0)
523                     fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
524                 setMark = true;
525                 reportFatalXMLError(XMLMessages.MSG_LESSTHAN_IN_ATTVALUE,
526                                     XMLMessages.WFC_NO_LESSTHAN_IN_ATTVALUE,
527                                     element.rawname,
528                                     attribute.rawname);
529             } else if (!fEntityReader.lookingAtValidChar(true)) {
530                 if (fAttValueOffset - fAttValueMark > 0)
531                     fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
532                 setMark = true;
533                 int invChar = fEntityReader.scanInvalidChar();
534                 if (fScannerState == SCANNER_STATE_END_OF_INPUT)
535                     return -1;
536                 if (invChar >= 0) {
537                     reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_ATTVALUE,
538                                         XMLMessages.P10_INVALID_CHARACTER,
539                                         fStringPool.toString(element.rawname),
540                                         fStringPool.toString(attribute.rawname),
541                                         Integer.toHexString(invChar));
542                 }
543             }
544             fAttValueOffset = fEntityReader.currentOffset();
545             if (setMark) {
546                 fAttValueMark = fAttValueOffset;
547                 setMark = false;
548             }
549         }
550         restoreScannerState(previousState);
551         int dataLength = fLiteralData.length() - dataOffset;
552         if (dataLength == 0) {
553             return fEntityReader.addString(fAttValueMark, fAttValueOffset - fAttValueMark);
554         }
555         if (fAttValueOffset - fAttValueMark > 0) {
556             fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark);
557             dataLength = fLiteralData.length() - dataOffset;
558         }
559         int value = fLiteralData.addString(dataOffset, dataLength);
560         return value;
561     }
562
563     //
564
//
565
//
566
void reportFatalXMLError(int majorCode, int minorCode) throws Exception JavaDoc {
567         fErrorReporter.reportError(fErrorReporter.getLocator(),
568                                    XMLMessages.XML_DOMAIN,
569                                    majorCode,
570                                    minorCode,
571                                    null,
572                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
573     }
574     void reportFatalXMLError(int majorCode, int minorCode, int stringIndex1) throws Exception JavaDoc {
575         Object JavaDoc[] args = { fStringPool.toString(stringIndex1) };
576         fErrorReporter.reportError(fErrorReporter.getLocator(),
577                                    XMLMessages.XML_DOMAIN,
578                                    majorCode,
579                                    minorCode,
580                                    args,
581                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
582     }
583     void reportFatalXMLError(int majorCode, int minorCode, String JavaDoc string1) throws Exception JavaDoc {
584         Object JavaDoc[] args = { string1 };
585         fErrorReporter.reportError(fErrorReporter.getLocator(),
586                                    XMLMessages.XML_DOMAIN,
587                                    majorCode,
588                                    minorCode,
589                                    args,
590                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
591     }
592     void reportFatalXMLError(int majorCode, int minorCode, int stringIndex1, int stringIndex2) throws Exception JavaDoc {
593         Object JavaDoc[] args = { fStringPool.toString(stringIndex1),
594                           fStringPool.toString(stringIndex2) };
595         fErrorReporter.reportError(fErrorReporter.getLocator(),
596                                    XMLMessages.XML_DOMAIN,
597                                    majorCode,
598                                    minorCode,
599                                    args,
600                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
601     }
602     void reportFatalXMLError(int majorCode, int minorCode, String JavaDoc string1, String JavaDoc string2) throws Exception JavaDoc {
603         Object JavaDoc[] args = { string1, string2 };
604         fErrorReporter.reportError(fErrorReporter.getLocator(),
605                                    XMLMessages.XML_DOMAIN,
606                                    majorCode,
607                                    minorCode,
608                                    args,
609                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
610     }
611     void reportFatalXMLError(int majorCode, int minorCode, String JavaDoc string1, String JavaDoc string2, String JavaDoc string3) throws Exception JavaDoc {
612         Object JavaDoc[] args = { string1, string2, string3 };
613         fErrorReporter.reportError(fErrorReporter.getLocator(),
614                                    XMLMessages.XML_DOMAIN,
615                                    majorCode,
616                                    minorCode,
617                                    args,
618                                    XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
619     }
620     void abortMarkup(int majorCode, int minorCode) throws Exception JavaDoc {
621         reportFatalXMLError(majorCode, minorCode);
622         skipPastEndOfCurrentMarkup();
623     }
624     void abortMarkup(int majorCode, int minorCode, int stringIndex1) throws Exception JavaDoc {
625         reportFatalXMLError(majorCode, minorCode, stringIndex1);
626         skipPastEndOfCurrentMarkup();
627     }
628     void abortMarkup(int majorCode, int minorCode, String JavaDoc string1) throws Exception JavaDoc {
629         reportFatalXMLError(majorCode, minorCode, string1);
630         skipPastEndOfCurrentMarkup();
631     }
632     void abortMarkup(int majorCode, int minorCode, int stringIndex1, int stringIndex2) throws Exception JavaDoc {
633         reportFatalXMLError(majorCode, minorCode, stringIndex1, stringIndex2);
634         skipPastEndOfCurrentMarkup();
635     }
636     void skipPastEndOfCurrentMarkup() throws Exception JavaDoc {
637         fEntityReader.skipToChar('>');
638         if (fEntityReader.lookingAtChar('>', true))
639             fScannerMarkupDepth--;
640     }
641     //
642
//
643
//
644
int setScannerState(int state) {
645         int oldState = fScannerState;
646         fScannerState = state;
647         return oldState;
648     }
649     void restoreScannerState(int state) {
650         if (fScannerState != SCANNER_STATE_END_OF_INPUT)
651             fScannerState = state;
652     }
653     //
654
//
655
//
656
/**
657      * The main loop of the scanner is implemented by calling the dispatch method
658      * of ScannerDispatcher with a flag which tells the dispatcher whether to continue
659      * or return. The scanner logic is split up into dispatchers for various syntatic
660      * components of XML. //REVISIT more rationale needed
661      */

662     interface ScannerDispatcher {
663         /**
664          * scan an XML syntactic component
665          *
666          * @param keepgoing if true continue on to the next dispatcher, otherwise return
667          * @return true if scanning was successful //REVISIT - does it ever return false or does it just throw?
668          * @exception java.lang.Exception
669          */

670         boolean dispatch(boolean keepgoing) throws Exception JavaDoc;
671         /**
672          * endOfInput encapsulates the end of entity handling for each dispatcher
673          *
674          * @param entityName StringPool handle of the entity that has reached the end
675          * @param moreToFollow true if there is more input to be read
676          * @exception
677          */

678         void endOfInput(int entityName, boolean moreToFollow) throws Exception JavaDoc;
679     }
680     final class XMLDeclDispatcher implements ScannerDispatcher {
681         public boolean dispatch(boolean keepgoing) throws Exception JavaDoc {
682             fEventHandler.callStartDocument();
683             if (fEntityReader.lookingAtChar('<', true)) {
684                 fScannerMarkupDepth++;
685                 setScannerState(SCANNER_STATE_START_OF_MARKUP);
686                 if (fEntityReader.lookingAtChar('?', true)) {
687                     int piTarget = fEntityReader.scanName(' ');
688                     if (piTarget == -1) {
689                         abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
690                                     XMLMessages.P16_PITARGET_REQUIRED);
691                     } else if ("xml".equals(fStringPool.toString(piTarget))) {
692                         if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
693
scanXMLDeclOrTextDecl(false);
694                         } else { // a PI target matching 'xml'
695
abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
696                                         XMLMessages.P17_RESERVED_PITARGET);
697                         }
698                     } else { // PI
699
scanPI(piTarget);
700                     }
701                     fDispatcher = new PrologDispatcher();
702                     restoreScannerState(SCANNER_STATE_PROLOG);
703                     return true;
704                 }
705                 if (fEntityReader.lookingAtChar('!', true)) {
706                     if (fEntityReader.lookingAtChar('-', true)) { // comment ?
707
if (fEntityReader.lookingAtChar('-', true)) {
708                             scanComment(); // scan through the closing '-->'
709
} else {
710                             abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
711                                         XMLMessages.P22_NOT_RECOGNIZED);
712                         }
713                     } else {
714                         if (fEntityReader.skippedString(doctype_string)) {
715                             setScannerState(SCANNER_STATE_DOCTYPE);
716                             fSeenDoctypeDecl = true;
717                             scanDoctypeDecl(fStandalone); // scan through the closing '>'
718
fScannerMarkupDepth--;
719                             fDispatcher = new PrologDispatcher();
720                             restoreScannerState(SCANNER_STATE_PROLOG);
721                             return true;
722                         } else {
723                             abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
724                                         XMLMessages.P22_NOT_RECOGNIZED);
725                         }
726                     }
727                 } else {
728                     fDispatcher = new ContentDispatcher();
729                     restoreScannerState(SCANNER_STATE_ROOT_ELEMENT);
730                     return true;
731                 }
732             } else {
733                 if (fEntityReader.lookingAtSpace(true)) {
734                     fEntityReader.skipPastSpaces();
735                 } else if (!fEntityReader.lookingAtValidChar(false)) {
736                     int invChar = fEntityReader.scanInvalidChar();
737     &nb