KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xerces > impl > XMLScanner


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.xerces.impl;
18
19 import java.io.IOException JavaDoc;
20
21 import org.apache.xerces.impl.msg.XMLMessageFormatter;
22 import org.apache.xerces.util.SymbolTable;
23 import org.apache.xerces.util.XMLChar;
24 import org.apache.xerces.util.XMLResourceIdentifierImpl;
25 import org.apache.xerces.util.XMLStringBuffer;
26 import org.apache.xerces.xni.Augmentations;
27 import org.apache.xerces.xni.XMLResourceIdentifier;
28 import org.apache.xerces.xni.XMLString;
29 import org.apache.xerces.xni.XNIException;
30 import org.apache.xerces.xni.parser.XMLComponent;
31 import org.apache.xerces.xni.parser.XMLComponentManager;
32 import org.apache.xerces.xni.parser.XMLConfigurationException;
33
34 /**
35  * This class is responsible for holding scanning methods common to
36  * scanning the XML document structure and content as well as the DTD
37  * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
38  * from this base class.
39  *
40  * <p>
41  * This component requires the following features and properties from the
42  * component manager that uses it:
43  * <ul>
44  * <li>http://xml.org/sax/features/validation</li>
45  * <li>http://xml.org/sax/features/namespaces</li>
46  * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
47  * <li>http://apache.org/xml/properties/internal/symbol-table</li>
48  * <li>http://apache.org/xml/properties/internal/error-reporter</li>
49  * <li>http://apache.org/xml/properties/internal/entity-manager</li>
50  * </ul>
51  *
52  * @xerces.internal
53  *
54  * @author Andy Clark, IBM
55  * @author Arnaud Le Hors, IBM
56  * @author Eric Ye, IBM
57  *
58  * @version $Id: XMLScanner.java,v 1.51 2004/10/04 21:45:48 mrglavas Exp $
59  */

60 public abstract class XMLScanner
61     implements XMLComponent {
62
63     //
64
// Constants
65
//
66

67     // feature identifiers
68

69     /** Feature identifier: validation. */
70     protected static final String JavaDoc VALIDATION =
71         Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE;
72
73     /** Feature identifier: namespaces. */
74     protected static final String JavaDoc NAMESPACES =
75         Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
76
77     /** Feature identifier: notify character references. */
78     protected static final String JavaDoc NOTIFY_CHAR_REFS =
79         Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE;
80     
81     protected static final String JavaDoc PARSER_SETTINGS =
82                 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;
83
84     // property identifiers
85

86     /** Property identifier: symbol table. */
87     protected static final String JavaDoc SYMBOL_TABLE =
88         Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
89
90     /** Property identifier: error reporter. */
91     protected static final String JavaDoc ERROR_REPORTER =
92         Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
93
94     /** Property identifier: entity manager. */
95     protected static final String JavaDoc ENTITY_MANAGER =
96         Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;
97
98     // debugging
99

100     /** Debug attribute normalization. */
101     protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
102
103     //
104
// Data
105
//
106

107
108     // features
109

110     /**
111      * Validation. This feature identifier is:
112      * http://xml.org/sax/features/validation
113      */

114     protected boolean fValidation = false;
115     
116     /** Namespaces. */
117     protected boolean fNamespaces;
118
119     /** Character references notification. */
120     protected boolean fNotifyCharRefs = false;
121     
122     /** Internal parser-settings feature */
123     protected boolean fParserSettings = true;
124     
125     // properties
126

127     /** Symbol table. */
128     protected SymbolTable fSymbolTable;
129
130     /** Error reporter. */
131     protected XMLErrorReporter fErrorReporter;
132
133     /** Entity manager. */
134     protected XMLEntityManager fEntityManager;
135
136     // protected data
137

138     /** Entity scanner. */
139     protected XMLEntityScanner fEntityScanner;
140
141     /** Entity depth. */
142     protected int fEntityDepth;
143
144     /** Literal value of the last character refence scanned. */
145     protected String JavaDoc fCharRefLiteral = null;
146
147     /** Scanning attribute. */
148     protected boolean fScanningAttribute;
149
150     /** Report entity boundary. */
151     protected boolean fReportEntity;
152
153     // symbols
154

155     /** Symbol: "version". */
156     protected final static String JavaDoc fVersionSymbol = "version".intern();
157
158     /** Symbol: "encoding". */
159     protected final static String JavaDoc fEncodingSymbol = "encoding".intern();
160
161     /** Symbol: "standalone". */
162     protected final static String JavaDoc fStandaloneSymbol = "standalone".intern();
163
164     /** Symbol: "amp". */
165     protected final static String JavaDoc fAmpSymbol = "amp".intern();
166
167     /** Symbol: "lt". */
168     protected final static String JavaDoc fLtSymbol = "lt".intern();
169
170     /** Symbol: "gt". */
171     protected final static String JavaDoc fGtSymbol = "gt".intern();
172
173     /** Symbol: "quot". */
174     protected final static String JavaDoc fQuotSymbol = "quot".intern();
175
176     /** Symbol: "apos". */
177     protected final static String JavaDoc fAposSymbol = "apos".intern();
178
179     // temporary variables
180

181     // NOTE: These objects are private to help prevent accidental modification
182
// of values by a subclass. If there were protected *and* the sub-
183
// modified the values, it would be difficult to track down the real
184
// cause of the bug. By making these private, we avoid this
185
// possibility.
186

187     /** String. */
188     private XMLString fString = new XMLString();
189
190     /** String buffer. */
191     private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
192
193     /** String buffer. */
194     private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
195
196     /** String buffer. */
197     private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
198
199     // temporary location for Resource identification information.
200
protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
201
202     //
203
// XMLComponent methods
204
//
205

206     /**
207      *
208      *
209      * @param componentManager The component manager.
210      *
211      * @throws SAXException Throws exception if required features and
212      * properties cannot be found.
213      */

214     public void reset(XMLComponentManager componentManager)
215         throws XMLConfigurationException {
216
217         try {
218             fParserSettings = componentManager.getFeature(PARSER_SETTINGS);
219         } catch (XMLConfigurationException e) {
220             fParserSettings = true;
221         }
222
223         if (!fParserSettings) {
224             // parser settings have not been changed
225
init();
226             return;
227         }
228
229         // Xerces properties
230
fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
231         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
232         fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER);
233
234         // sax features
235
try {
236             fValidation = componentManager.getFeature(VALIDATION);
237         }
238         catch (XMLConfigurationException e) {
239             fValidation = false;
240         }
241         try {
242             fNamespaces = componentManager.getFeature(NAMESPACES);
243         }
244         catch (XMLConfigurationException e) {
245             fNamespaces = true;
246         }
247         try {
248             fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS);
249         }
250         catch (XMLConfigurationException e) {
251             fNotifyCharRefs = false;
252         }
253         
254         init();
255
256     } // reset(XMLComponentManager)
257

258     /**
259      * Sets the value of a property during parsing.
260      *
261      * @param propertyId
262      * @param value
263      */

264     public void setProperty(String JavaDoc propertyId, Object JavaDoc value)
265         throws XMLConfigurationException {
266         
267         // Xerces properties
268
if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
269             final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
270             
271             if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() &&
272                 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
273                 fSymbolTable = (SymbolTable)value;
274             }
275             else if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() &&
276                 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
277                 fErrorReporter = (XMLErrorReporter)value;
278             }
279             else if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() &&
280                 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
281                 fEntityManager = (XMLEntityManager)value;
282             }
283         }
284
285     } // setProperty(String,Object)
286

287     /*
288      * Sets the feature of the scanner.
289      */

290     public void setFeature(String JavaDoc featureId, boolean value)
291         throws XMLConfigurationException {
292             
293         if (VALIDATION.equals(featureId)) {
294             fValidation = value;
295         } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
296             fNotifyCharRefs = value;
297         }
298     }
299     
300     /*
301      * Gets the state of the feature of the scanner.
302      */

303     public boolean getFeature(String JavaDoc featureId)
304         throws XMLConfigurationException {
305             
306         if (VALIDATION.equals(featureId)) {
307             return fValidation;
308         } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
309             return fNotifyCharRefs;
310         }
311         throw new XMLConfigurationException(XMLConfigurationException.NOT_RECOGNIZED, featureId);
312     }
313     
314     //
315
// Protected methods
316
//
317

318     // anybody calling this had better have set Symtoltable!
319
protected void reset() {
320         init();
321
322         // DTD preparsing defaults:
323
fValidation = true;
324         fNotifyCharRefs = false;
325
326     }
327
328     // common scanning methods
329

330     /**
331      * Scans an XML or text declaration.
332      * <p>
333      * <pre>
334      * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
335      * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
336      * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
337      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
338      * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
339      * | ('"' ('yes' | 'no') '"'))
340      *
341      * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
342      * </pre>
343      *
344      * @param scanningTextDecl True if a text declaration is to
345      * be scanned instead of an XML
346      * declaration.
347      * @param pseudoAttributeValues An array of size 3 to return the version,
348      * encoding and standalone pseudo attribute values
349      * (in that order).
350      *
351      * <strong>Note:</strong> This method uses fString, anything in it
352      * at the time of calling is lost.
353      */

354     protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
355                                          String JavaDoc[] pseudoAttributeValues)
356         throws IOException JavaDoc, XNIException {
357
358         // pseudo-attribute values
359
String JavaDoc version = null;
360         String JavaDoc encoding = null;
361         String JavaDoc standalone = null;
362
363         // scan pseudo-attributes
364
final int STATE_VERSION = 0;
365         final int STATE_ENCODING = 1;
366         final int STATE_STANDALONE = 2;
367         final int STATE_DONE = 3;
368         int state = STATE_VERSION;
369
370         boolean dataFoundForTarget = false;
371         boolean sawSpace = fEntityScanner.skipDeclSpaces();
372         // since pseudoattributes are *not* attributes,
373
// their quotes don't need to be preserved in external parameter entities.
374
// the XMLEntityScanner#scanLiteral method will continue to
375
// emit -1 in such cases when it finds a quote; this is
376
// fine for other methods that parse scanned entities,
377
// but not for the scanning of pseudoattributes. So,
378
// temporarily, we must mark the current entity as not being "literal"
379
XMLEntityManager.ScannedEntity currEnt = fEntityManager.getCurrentEntity();
380         boolean currLiteral = currEnt.literal;
381         currEnt.literal = false;
382         while (fEntityScanner.peekChar() != '?') {
383             dataFoundForTarget = true;
384             String JavaDoc name = scanPseudoAttribute(scanningTextDecl, fString);
385             switch (state) {
386                 case STATE_VERSION: {
387                     if (name == fVersionSymbol) {
388                         if (!sawSpace) {
389                             reportFatalError(scanningTextDecl
390                                        ? "SpaceRequiredBeforeVersionInTextDecl"
391                                        : "SpaceRequiredBeforeVersionInXMLDecl",
392                                              null);
393                         }
394                         version = fString.toString();
395                         state = STATE_ENCODING;
396                         if (!versionSupported(version)) {
397                             reportFatalError(getVersionNotSupportedKey(),
398                                              new Object JavaDoc[]{version});
399                         }
400                     }
401                     else if (name == fEncodingSymbol) {
402                         if (!scanningTextDecl) {
403                             reportFatalError("VersionInfoRequired", null);
404                         }
405                         if (!sawSpace) {
406                             reportFatalError(scanningTextDecl
407                                       ? "SpaceRequiredBeforeEncodingInTextDecl"
408                                       : "SpaceRequiredBeforeEncodingInXMLDecl",
409                                              null);
410                         }
411                         encoding = fString.toString();
412                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
413                     }
414                     else {
415                         if (scanningTextDecl) {
416                             reportFatalError("EncodingDeclRequired", null);
417                         }
418                         else {
419                             reportFatalError("VersionInfoRequired", null);
420                         }
421                     }
422                     break;
423                 }
424                 case STATE_ENCODING: {
425                     if (name == fEncodingSymbol) {
426                         if (!sawSpace) {
427                             reportFatalError(scanningTextDecl
428                                       ? "SpaceRequiredBeforeEncodingInTextDecl"
429                                       : "SpaceRequiredBeforeEncodingInXMLDecl",
430                                              null);
431                         }
432                         encoding = fString.toString();
433                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
434                         // TODO: check encoding name; set encoding on
435
// entity scanner
436
}
437                     else if (!scanningTextDecl && name == fStandaloneSymbol) {
438                         if (!sawSpace) {
439                             reportFatalError("SpaceRequiredBeforeStandalone",
440                                              null);
441                         }
442                         standalone = fString.toString();
443                         state = STATE_DONE;
444                         if (!standalone.equals("yes") && !standalone.equals("no")) {
445                             reportFatalError("SDDeclInvalid", new Object JavaDoc[] {standalone});
446                         }
447                     }
448                     else {
449                         reportFatalError("EncodingDeclRequired", null);
450                     }
451                     break;
452                 }
453                 case STATE_STANDALONE: {
454                     if (name == fStandaloneSymbol) {
455                         if (!sawSpace) {
456                             reportFatalError("SpaceRequiredBeforeStandalone",
457                                              null);
458                         }
459                         standalone = fString.toString();
460                         state = STATE_DONE;
461                         if (!standalone.equals("yes") && !standalone.equals("no")) {
462                             reportFatalError("SDDeclInvalid", new Object JavaDoc[] {standalone});
463                         }
464                     }
465                     else {
466                         reportFatalError("EncodingDeclRequired", null);
467                     }
468                     break;
469                 }
470                 default: {
471                     reportFatalError("NoMorePseudoAttributes", null);
472                 }
473             }
474             sawSpace = fEntityScanner.skipDeclSpaces();
475         }
476         // restore original literal value
477
if(currLiteral)
478             currEnt.literal = true;
479         // REVISIT: should we remove this error reporting?
480
if (scanningTextDecl && state != STATE_DONE) {
481             reportFatalError("MorePseudoAttributes", null);
482         }
483         
484         // If there is no data in the xml or text decl then we fail to report error
485
// for version or encoding info above.
486
if (scanningTextDecl) {
487             if (!dataFoundForTarget && encoding == null) {
488                 reportFatalError("EncodingDeclRequired", null);
489             }
490         }
491         else {
492             if (!dataFoundForTarget && version == null) {
493                 reportFatalError("VersionInfoRequired", null);
494             }
495         }
496
497         // end
498
if (!fEntityScanner.skipChar('?')) {
499             reportFatalError("XMLDeclUnterminated", null);
500         }
501         if (!fEntityScanner.skipChar('>')) {
502             reportFatalError("XMLDeclUnterminated", null);
503
504         }
505         
506         // fill in return array
507
pseudoAttributeValues[0] = version;
508         pseudoAttributeValues[1] = encoding;
509         pseudoAttributeValues[2] = standalone;
510
511     } // scanXMLDeclOrTextDecl(boolean)
512

513     /**
514      * Scans a pseudo attribute.
515      *
516      * @param scanningTextDecl True if scanning this pseudo-attribute for a
517      * TextDecl; false if scanning XMLDecl. This
518      * flag is needed to report the correct type of
519      * error.
520      * @param value The string to fill in with the attribute
521      * value.
522      *
523      * @return The name of the attribute
524      *
525      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
526      * at the time of calling is lost.
527      */

528     public String JavaDoc scanPseudoAttribute(boolean scanningTextDecl,
529                                       XMLString value)
530         throws IOException JavaDoc, XNIException {
531
532         // REVISIT: This method is used for generic scanning of
533
// pseudo attributes, but since there are only three such
534
// attributes: version, encoding, and standalone there are
535
// for performant ways of scanning them. Every decl must
536
// have a version, and in TextDecls this version must
537
// be followed by an encoding declaration. Also the
538
// methods we invoke on the scanners allow non-ASCII
539
// characters to be parsed in the decls, but since
540
// we don't even know what the actual encoding of the
541
// document is until we scan the encoding declaration
542
// you cannot reliably read any characters outside
543
// of the ASCII range here. -- mrglavas
544
String JavaDoc name = fEntityScanner.scanName();
545         XMLEntityManager.print(fEntityManager.getCurrentEntity());
546         if (name == null) {
547             reportFatalError("PseudoAttrNameExpected", null);
548         }
549         fEntityScanner.skipDeclSpaces();
550         if (!fEntityScanner.skipChar('=')) {
551             reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
552                              : "EqRequiredInXMLDecl", new Object JavaDoc[]{name});
553         }
554         fEntityScanner.skipDeclSpaces();
555         int quote = fEntityScanner.peekChar();
556         if (quote != '\'' && quote != '"') {
557             reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
558                              : "QuoteRequiredInXMLDecl" , new Object JavaDoc[]{name});
559         }
560         fEntityScanner.scanChar();
561         int c = fEntityScanner.scanLiteral(quote, value);
562         if (c != quote) {
563             fStringBuffer2.clear();
564             do {
565                 fStringBuffer2.append(value);
566                 if (c != -1) {
567                     if (c == '&' || c == '%' || c == '<' || c == ']') {
568                         fStringBuffer2.append((char)fEntityScanner.scanChar());
569                     }
570                     // REVISIT: Even if you could reliably read non-ASCII chars
571
// why bother scanning for surrogates here? Only ASCII chars
572
// match the productions in XMLDecls and TextDecls. -- mrglavas
573
else if (XMLChar.isHighSurrogate(c)) {
574                         scanSurrogates(fStringBuffer2);
575                     }
576                     else if (isInvalidLiteral(c)) {
577                         String JavaDoc key = scanningTextDecl
578                             ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
579                         reportFatalError(key,
580                                        new Object JavaDoc[] {Integer.toString(c, 16)});
581                         fEntityScanner.scanChar();
582                     }
583                 }
584                 c = fEntityScanner.scanLiteral(quote, value);
585             } while (c != quote);
586             fStringBuffer2.append(value);
587             value.setValues(fStringBuffer2);
588         }
589         if (!fEntityScanner.skipChar(quote)) {
590             reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl"
591                              : "CloseQuoteMissingInXMLDecl",
592                              new Object JavaDoc[]{name});
593         }
594
595         // return
596
return name;
597
598     } // scanPseudoAttribute(XMLString):String
599

600     /**
601      * Scans a processing instruction.
602      * <p>
603      * <pre>
604      * [16] PI ::= '&lt;?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
605      * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
606      * </pre>
607      * <strong>Note:</strong> This method uses fString, anything in it
608      * at the time of calling is lost.
609      */

610     protected void scanPI() throws IOException JavaDoc, XNIException {
611
612         // target
613
fReportEntity = false;
614         String JavaDoc target = null;
615         if(fNamespaces) {
616             target = fEntityScanner.scanNCName();
617         } else {
618             target = fEntityScanner.scanName();
619         }
620         if (target == null) {
621             reportFatalError("PITargetRequired", null);
622         }
623
624         // scan data
625
scanPIData(target, fString);
626         fReportEntity = true;
627
628     } // scanPI()
629

630     /**
631      * Scans a processing data. This is needed to handle the situation
632      * where a document starts with a processing instruction whose
633      * target name <em>starts with</em> "xml". (e.g. xmlfoo)
634      *
635      * <strong>Note:</strong> This method uses fStringBuffer, anything in it
636      * at the time of calling is lost.
637      *
638      * @param target The PI target
639      * @param data The string to fill in with the data
640      */

641     protected void scanPIData(String JavaDoc target, XMLString data)
642         throws IOException JavaDoc, XNIException {
643
644         // check target
645
if (target.length() == 3) {
646             char c0 = Character.toLowerCase(target.charAt(0));
647             char c1 = Character.toLowerCase(target.charAt(1));
648             char c2 = Character.toLowerCase(target.charAt(2));
649             if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
650                 reportFatalError("ReservedPITarget", null);
651             }
652         }
653
654         // spaces
655
if (!fEntityScanner.skipSpaces()) {
656             if (fEntityScanner.skipString("?>")) {
657                 // we found the end, there is no data
658
data.clear();
659                 return;
660             }
661             else {
662                 if(fNamespaces && fEntityScanner.peekChar() == ':') {
663                     fEntityScanner.scanChar();
664                     XMLStringBuffer colonName = new XMLStringBuffer(target);
665                     colonName.append(":");
666                     String JavaDoc str = fEntityScanner.scanName();
667                     if (str != null)
668                         colonName.append(str);
669                     reportFatalError("ColonNotLegalWithNS", new Object JavaDoc[] {colonName.toString()});
670                     fEntityScanner.skipSpaces();
671                 } else {
672                     // if there is data there should be some space
673
reportFatalError("SpaceRequiredInPI", null);
674                 }
675             }
676         }
677
678         fStringBuffer.clear();
679         // data
680
if (fEntityScanner.scanData("?>", fStringBuffer)) {
681             do {
682                 int c = fEntityScanner.peekChar();
683                 if (c != -1) {
684                     if (XMLChar.isHighSurrogate(c)) {
685                         scanSurrogates(fStringBuffer);
686                     }
687                     else if (isInvalidLiteral(c)) {
688                         reportFatalError("InvalidCharInPI",
689                                          new Object JavaDoc[]{Integer.toHexString(c)});
690                         fEntityScanner.scanChar();
691                     }
692                 }
693             } while (fEntityScanner.scanData("?>", fStringBuffer));
694         }
695         data.setValues(fStringBuffer);
696
697     } // scanPIData(String,XMLString)
698

699     /**
700      * Scans a comment.
701      * <p>
702      * <pre>
703      * [15] Comment ::= '&lt!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
704      * </pre>
705      * <p>
706      * <strong>Note:</strong> Called after scanning past '&lt;!--'
707      * <strong>Note:</strong> This method uses fString, anything in it
708      * at the time of calling is lost.
709      *
710      * @param text The buffer to fill in with the text.
711      */

712     protected void scanComment(XMLStringBuffer text)
713         throws IOException JavaDoc, XNIException {
714
715         // text
716
// REVISIT: handle invalid character, eof
717
text.clear();
718         while (fEntityScanner.scanData("--", text)) {
719             int c = fEntityScanner.peekChar();
720             if (c != -1) {
721                 if (XMLChar.isHighSurrogate(c)) {
722                     scanSurrogates(text);
723                 }
724                 else if (isInvalidLiteral(c)) {
725                     reportFatalError("InvalidCharInComment",
726                                      new Object JavaDoc[] { Integer.toHexString(c) });
727                     fEntityScanner.scanChar();
728                 }
729             }
730         }
731         if (!fEntityScanner.skipChar('>')) {
732             reportFatalError("DashDashInComment", null);
733         }
734
735     } // scanComment()
736

737     /**
738      * Scans an attribute value and normalizes whitespace converting all
739      * whitespace characters to space characters.
740      *
741      * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
742      *
743      * @param value The XMLString to fill in with the value.
744      * @param nonNormalizedValue The XMLString to fill in with the
745      * non-normalized value.
746      * @param atName The name of the attribute being parsed (for error msgs).
747      * @param checkEntities true if undeclared entities should be reported as VC violation,
748      * false if undeclared entities should be reported as WFC violation.
749      * @param eleName The name of element to which this attribute belongs.
750      *
751      * @return true if the non-normalized and normalized value are the same
752      *
753      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
754      * at the time of calling is lost.
755      **/

756     protected boolean scanAttributeValue(XMLString value,
757                                       XMLString nonNormalizedValue,
758                                       String JavaDoc atName,
759                                       boolean checkEntities,String JavaDoc eleName)
760         throws IOException JavaDoc, XNIException
761     {
762         // quote
763
int quote = fEntityScanner.peekChar();
764         if (quote != '\'' && quote != '"') {
765             reportFatalError("OpenQuoteExpected", new Object JavaDoc[]{eleName,atName});
766         }
767
768         fEntityScanner.scanChar();
769         int entityDepth = fEntityDepth;
770
771         int c = fEntityScanner.scanLiteral(quote, value);
772         if (DEBUG_ATTR_NORMALIZATION) {
773             System.out.println("** scanLiteral -> \""
774                                + value.toString() + "\"");
775         }
776         
777         int fromIndex = 0;
778         if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
779             /** Both the non-normalized and normalized attribute values are equal. **/
780             nonNormalizedValue.setValues(value);
781             int cquote = fEntityScanner.scanChar();
782             if (cquote != quote) {
783                 reportFatalError("CloseQuoteExpected", new Object JavaDoc[]{eleName,atName});
784             }
785             return true;
786         }
787         fStringBuffer2.clear();
788         fStringBuffer2.append(value);
789         normalizeWhitespace(value, fromIndex);
790         if (DEBUG_ATTR_NORMALIZATION) {
791             System.out.println("** normalizeWhitespace -> \""
792                                + value.toString() + "\"");
793         }
794         if (c != quote) {
795             fScanningAttribute = true;
796             fStringBuffer.clear();
797             do {
798                 fStringBuffer.append(value);
799                 if (DEBUG_ATTR_NORMALIZATION) {
800                     System.out.println("** value2: \""
801                                        + fStringBuffer.toString() + "\"");
802                 }
803                 if (c == '&') {
804                     fEntityScanner.skipChar('&');
805                     if (entityDepth == fEntityDepth) {
806                         fStringBuffer2.append('&');
807                     }
808                     if (fEntityScanner.skipChar('#')) {
809                         if (entityDepth == fEntityDepth) {
810                             fStringBuffer2.append('#');
811                         }
812                         int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
813                         if (ch != -1) {
814                             if (DEBUG_ATTR_NORMALIZATION) {
815                                 System.out.println("** value3: \""
816                                                    + fStringBuffer.toString()
817                                                    + "\"");
818                             }
819                         }
820                     }
821                     else {
822                         String JavaDoc entityName = fEntityScanner.scanName();
823                         if (entityName == null) {
824                             reportFatalError("NameRequiredInReference", null);
825                         }
826                         else if (entityDepth == fEntityDepth) {
827                             fStringBuffer2.append(entityName);
828                         }
829                         if (!fEntityScanner.skipChar(';')) {
830                             reportFatalError("SemicolonRequiredInReference",
831                                              new Object JavaDoc []{entityName});
832                         }
833                         else if (entityDepth == fEntityDepth) {
834                             fStringBuffer2.append(';');
835                         }
836                         if (entityName == fAmpSymbol) {
837                             fStringBuffer.append('&');
838                             if (DEBUG_ATTR_NORMALIZATION) {
839                                 System.out.println("** value5: \""
840                                                    + fStringBuffer.toString()
841                                                    + "\"");
842                             }
843                         }
844                         else if (entityName == fAposSymbol) {
845                             fStringBuffer.append('\'');
846                             if (DEBUG_ATTR_NORMALIZATION) {
847                                 System.out.println("** value7: \""
848                                                    + fStringBuffer.toString()
849                                                    + "\"");
850                             }
851                         }
852                         else if (entityName == fLtSymbol) {
853                             fStringBuffer.append('<');
854                             if (DEBUG_ATTR_NORMALIZATION) {
855                                 System.out.println("** value9: \""
856                                                    + fStringBuffer.toString()
857                                                    + "\"");
858                             }
859                         }
860                         else if (entityName == fGtSymbol) {
861                             fStringBuffer.append('>');
862                             if (DEBUG_ATTR_NORMALIZATION) {
863                                 System.out.println("** valueB: \""
864                                                    + fStringBuffer.toString()
865                                                    + "\"");
866                             }
867                         }
868                         else if (entityName == fQuotSymbol) {
869                             fStringBuffer.append('"');
870                             if (DEBUG_ATTR_NORMALIZATION) {
871                                 System.out.println("** valueD: \""
872                                                    + fStringBuffer.toString()
873                                                    + "\"");
874                             }
875                         }
876                         else {
877                             if (fEntityManager.isExternalEntity(entityName)) {
878                                 reportFatalError("ReferenceToExternalEntity",
879                                                  new Object JavaDoc[] { entityName });
880                             }
881                             else {
882                                 if (!fEntityManager.isDeclaredEntity(entityName)) {
883                                     //WFC & VC: Entity Declared
884
if (checkEntities) {
885                                         if (fValidation) {
886                                             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
887                                                                        "EntityNotDeclared",
888                                                                        new Object JavaDoc[]{entityName},
889                                                                        XMLErrorReporter.SEVERITY_ERROR);
890                                         }
891                                     }
892                                     else {
893                                         reportFatalError("EntityNotDeclared",
894                                                          new Object JavaDoc[]{entityName});
895                                     }
896                                 }
897                                 fEntityManager.startEntity(entityName, true);
898                             }
899                         }
900                     }
901                 }
902                 else if (c == '<') {
903                     reportFatalError("LessthanInAttValue",
904                                      new Object JavaDoc[] { eleName, atName });
905                     fEntityScanner.scanChar();
906                     if (entityDepth == fEntityDepth) {
907                         fStringBuffer2.append((char)c);
908                     }
909                 }
910                 else if (c == '%' || c == ']') {
911                     fEntityScanner.scanChar();
912                     fStringBuffer.append((char)c);
913                     if (entityDepth == fEntityDepth) {
914                         fStringBuffer2.append((char)c);
915                     }
916                     if (DEBUG_ATTR_NORMALIZATION) {
917                         System.out.println("** valueF: \""
918                                            + fStringBuffer.toString() + "\"");
919                     }
920                 }
921                 else if (c == '\n' || c == '\r') {
922                     fEntityScanner.scanChar();
923                     fStringBuffer.append(' ');
924                     if (entityDepth == fEntityDepth) {
925                         fStringBuffer2.append('\n');
926                     }
927                 }
928                 else if (c != -1 && XMLChar.isHighSurrogate(c)) {
929                     fStringBuffer3.clear();
930                     if (scanSurrogates(fStringBuffer3)) {
931                         fStringBuffer.append(fStringBuffer3);
932                         if (entityDepth == fEntityDepth) {
933                             fStringBuffer2.append(fStringBuffer3);
934                         }
935                         if (DEBUG_ATTR_NORMALIZATION) {
936                             System.out.println("** valueI: \""
937                                                + fStringBuffer.toString()
938                                                + "\"");
939                         }
940                     }
941                 }
942                 else if (c != -1 && isInvalidLiteral(c)) {
943                     reportFatalError("InvalidCharInAttValue",
944                     new Object JavaDoc[] {eleName, atName, Integer.toString(c, 16)});
945                     fEntityScanner.scanChar();
946                     if (entityDepth == fEntityDepth) {
947                         fStringBuffer2.append((char)c);
948                     }
949                 }
950                 c = fEntityScanner.scanLiteral(quote, value);
951                 if (entityDepth == fEntityDepth) {
952                     fStringBuffer2.append(value);
953                 }
954                 normalizeWhitespace(value);
955             } while (c != quote || entityDepth != fEntityDepth);
956             fStringBuffer.append(value);
957             if (DEBUG_ATTR_NORMALIZATION) {
958                 System.out.println("** valueN: \""
959                                    + fStringBuffer.toString() + "\"");
960             }
961             value.setValues(fStringBuffer);
962             fScanningAttribute = false;
963         }
964         nonNormalizedValue.setValues(fStringBuffer2);
965
966         // quote
967
int cquote = fEntityScanner.scanChar();
968         if (cquote != quote) {
969             reportFatalError("CloseQuoteExpected", new Object JavaDoc[]{eleName,atName});
970         }
971         return nonNormalizedValue.equals(value.ch, value.offset, value.length);
972         
973     } // scanAttributeValue()
974

975
976     /**
977      * Scans External ID and return the public and system IDs.
978      *
979      * @param identifiers An array of size 2 to return the system id,
980      * and public id (in that order).
981      * @param optionalSystemId Specifies whether the system id is optional.
982      *
983      * <strong>Note:</strong> This method uses fString and fStringBuffer,
984      * anything in them at the time of calling is lost.
985      */

986     protected void scanExternalID(String JavaDoc[] identifiers,
987                                   boolean optionalSystemId)
988         throws IOException JavaDoc, XNIException {
989
990         String JavaDoc systemId = null;
991         String JavaDoc publicId = null;
992         if (fEntityScanner.skipString("PUBLIC")) {
993             if (!fEntityScanner.skipSpaces()) {
994                 reportFatalError("SpaceRequiredAfterPUBLIC", null);
995             }
996             scanPubidLiteral(fString);
997             publicId = fString.toString();
998
999             if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
1000                reportFatalError("SpaceRequiredBetweenPublicAndSystem", null);
1001            }
1002        }
1003
1004        if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
1005            if (publicId == null && !fEntityScanner.skipSpaces()) {
1006                reportFatalError("SpaceRequiredAfterSYSTEM", null);
1007            }
1008            int quote = fEntityScanner.peekChar();
1009            if (quote != '\'' && quote != '"') {
1010                if (publicId != null && optionalSystemId) {
1011                    // looks like we don't have any system id
1012
// simply return the public id
1013
identifiers[0] = null;
1014                    identifiers[1] = publicId;
1015                    return;
1016                }
1017                reportFatalError("QuoteRequiredInSystemID", null);
1018            }
1019            fEntityScanner.scanChar();
1020            XMLString ident = fString;
1021            if (fEntityScanner.scanLiteral(quote, ident) != quote) {
1022                fStringBuffer.clear();
1023                do {
1024                    fStringBuffer.append(ident);
1025                    int c = fEntityScanner.peekChar();
1026                    if (XMLChar.isMarkup(c) || c == ']') {
1027                        fStringBuffer.append((char)fEntityScanner.scanChar());
1028                    }
1029                } while (fEntityScanner.scanLiteral(quote, ident) != quote);
1030                fStringBuffer.append(ident);
1031                ident = fStringBuffer;
1032            }
1033            systemId = ident.toString();
1034            if (!fEntityScanner.skipChar(quote)) {
1035                reportFatalError("SystemIDUnterminated", null);
1036            }
1037        }
1038
1039        // store result in array
1040
identifiers[0] = systemId;
1041        identifiers[1] = publicId;
1042    }
1043
1044
1045    /**
1046     * Scans public ID literal.
1047     *
1048     * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1049     * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
1050     *
1051     * The returned string is normalized according to the following rule,
1052     * from http://www.w3.org/TR/REC-xml#dt-pubid:
1053     *
1054     * Before a match is attempted, all strings of white space in the public
1055     * identifier must be normalized to single space characters (#x20), and
1056     * leading and trailing white space must be removed.
1057     *
1058     * @param literal The string to fill in with the public ID literal.
1059     * @return True on success.
1060     *
1061     * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
1062     * the time of calling is lost.
1063     */

1064    protected boolean scanPubidLiteral(XMLString literal)
1065        throws IOException JavaDoc, XNIException
1066    {
1067        int quote = fEntityScanner.scanChar();
1068        if (quote != '\'' && quote != '"') {
1069            reportFatalError("QuoteRequiredInPublicID", null);
1070            return false;
1071        }
1072
1073        fStringBuffer.clear();
1074        // skip leading whitespace
1075
boolean skipSpace = true;
1076        boolean dataok = true;
1077        while (true) {
1078            int c = fEntityScanner.scanChar();
1079            if (c == ' ' || c == '\n' || c == '\r') {
1080                if (!skipSpace) {
1081                    // take the first whitespace as a space and skip the others
1082
fStringBuffer.append(' ');
1083                    skipSpace = true;
1084                }
1085            }
1086            else if (c == quote) {
1087                if (skipSpace) {
1088                    // if we finished on a space let's trim it
1089
fStringBuffer.length--;
1090                }
1091                literal.setValues(fStringBuffer);
1092                break;
1093            }
1094            else if (XMLChar.isPubid(c)) {
1095                fStringBuffer.append((char)c);
1096                skipSpace = false;
1097            }
1098            else if (c == -1) {
1099                reportFatalError("PublicIDUnterminated", null);
1100                return false;
1101            }
1102            else {
1103                dataok = false;
1104                reportFatalError("InvalidCharInPublicID",
1105                                 new Object JavaDoc[]{Integer.toHexString(c)});
1106            }
1107        }
1108        return dataok;
1109   }
1110
1111
1112    /**
1113     * Normalize whitespace in an XMLString converting all whitespace
1114     * characters to space characters.
1115     */

1116    protected void normalizeWhitespace(XMLString value) {
1117        int end = value.offset + value.length;
1118        for (int i = value.offset; i < end; ++i) {
1119            int c = value.ch[i];
1120            // Performance: For XML 1.0 documents take advantage of
1121
// the fact that the only legal characters below 0x20
1122
// are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
1123
// already determined the well-formedness of these
1124
// characters it is sufficient (and safe) to check
1125
// against 0x20. -- mrglavas
1126
if (c < 0x20) {
1127                value.ch[i] = ' ';
1128            }
1129        }
1130    }
1131    
1132    /**
1133     * Normalize whitespace in an XMLString converting all whitespace
1134     * characters to space characters.
1135     */

1136    protected void normalizeWhitespace(XMLString value, int fromIndex) {
1137        int end = value.offset + value.length;
1138        for (int i = value.offset + fromIndex; i < end; ++i) {
1139            int c = value.ch[i];
1140            // Performance: For XML 1.0 documents take advantage of
1141
// the fact that the only legal characters below 0x20
1142
// are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
1143
// already determined the well-formedness of these
1144
// characters it is sufficient (and safe) to check
1145
// against 0x20. -- mrglavas
1146
if (c < 0x20) {
1147                value.ch[i] = ' ';
1148            }
1149        }
1150    }
1151    
1152    /**
1153     * Checks whether this string would be unchanged by normalization.
1154     *
1155     * @return -1 if the value would be unchanged by normalization,
1156     * otherwise the index of the first whitespace character which
1157     * would be transformed.
1158     */

1159    protected int isUnchangedByNormalization(XMLString value) {
1160        int end = value.offset + value.length;
1161        for (int i = value.offset; i < end; ++i) {
1162            int c = value.ch[i];
1163            // Performance: For XML 1.0 documents take advantage of
1164
// the fact that the only legal characters below 0x20
1165
// are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
1166
// already determined the well-formedness of these
1167
// characters it is sufficient (and safe) to check
1168
// against 0x20. -- mrglavas
1169
if (c < 0x20) {
1170                return i - value.offset;
1171            }
1172        }
1173        return -1;
1174    }
1175
1176    //
1177
// XMLEntityHandler methods
1178
//
1179

1180    /**
1181     * This method notifies of the start of an entity. The document entity
1182     * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1183     * parameter entity names start with '%'; and general entities are just
1184     * specified by their name.
1185     *
1186     * @param name The name of the entity.
1187     * @param identifier The resource identifier.
1188     * @param encoding The auto-detected IANA encoding name of the entity
1189     * stream. This value will be null in those situations
1190     * where the entity encoding is not auto-detected (e.g.
1191     * internal entities or a document entity that is
1192     * parsed from a java.io.Reader).
1193     * @param augs Additional information that may include infoset augmentations
1194     *
1195     * @throws XNIException Thrown by handler to signal an error.
1196     */

1197    public void startEntity(String JavaDoc name,
1198                            XMLResourceIdentifier identifier,
1199                            String JavaDoc encoding, Augmentations augs) throws XNIException {
1200
1201        // keep track of the entity depth
1202
fEntityDepth++;
1203        // must reset entity scanner
1204
fEntityScanner = fEntityManager.getEntityScanner();
1205
1206    } // startEntity(String,XMLResourceIdentifier,String)
1207

1208    /**
1209     * This method notifies the end of an entity. The document entity has
1210     * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1211     * parameter entity names start with '%'; and general entities are just
1212     * specified by their name.
1213     *
1214     * @param name The name of the entity.
1215     * @param augs Additional information that may include infoset augmentations
1216     *
1217     * @throws XNIException Thrown by handler to signal an error.
1218     */

1219    public void endEntity(String JavaDoc name, Augmentations augs) throws XNIException {
1220
1221        // keep track of the entity depth
1222
fEntityDepth--;
1223
1224    } // endEntity(String)
1225

1226    /**
1227     * Scans a character reference and append the corresponding chars to the
1228     * specified buffer.
1229     *
1230     * <p>
1231     * <pre>
1232     * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1233     * </pre>
1234     *
1235     * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1236     * at the time of calling is lost.
1237     *
1238     * @param buf the character buffer to append chars to
1239     * @param buf2 the character buffer to append non-normalized chars to
1240     *
1241     * @return the character value or (-1) on conversion failure
1242     */

1243    protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)
1244        throws IOException JavaDoc, XNIException {
1245
1246        // scan hexadecimal value
1247
boolean hex = false;
1248        if (fEntityScanner.skipChar('x')) {
1249            if (buf2 != null) { buf2.append('x'); }
1250            hex = true;
1251            fStringBuffer3.clear();
1252            boolean digit = true;
1253            
1254            int c = fEntityScanner.peekChar();
1255            digit = (c >= '0' && c <= '9') ||
1256                    (c >= 'a' && c <= 'f') ||
1257                    (c >= 'A' && c <= 'F');
1258            if (digit) {
1259                if (buf2 != null) { buf2.append((char)c); }
1260                fEntityScanner.scanChar();
1261                fStringBuffer3.append((char)c);
1262                
1263                do {
1264                    c = fEntityScanner.peekChar();
1265                    digit = (c >= '0' && c <= '9') ||
1266                            (c >= 'a' && c <= 'f') ||
1267                            (c >= 'A' && c <= 'F');
1268                    if (digit) {
1269                        if (buf2 != null) { buf2.append((char)c); }
1270                        fEntityScanner.scanChar();
1271                        fStringBuffer3.append((char)c);
1272                    }
1273                } while (digit);
1274            }
1275            else {
1276                reportFatalError("HexdigitRequiredInCharRef", null);
1277            }
1278        }
1279
1280        // scan decimal value
1281
else {
1282            fStringBuffer3.clear();
1283            boolean digit = true;
1284            
1285            int c = fEntityScanner.peekChar();
1286            digit = c >= '0' && c <= '9';
1287            if (digit) {
1288                if (buf2 != null) { buf2.append((char)c); }
1289                fEntityScanner.scanChar();
1290                fStringBuffer3.append((char)c);
1291                
1292                do {
1293                    c = fEntityScanner.peekChar();
1294                    digit = c >= '0' && c <= '9';
1295                    if (digit) {
1296                        if (buf2 != null) { buf2.append((char)c); }
1297                        fEntityScanner.scanChar();
1298                        fStringBuffer3.append((char)c);
1299                    }
1300                } while (digit);
1301            }
1302            else {
1303                reportFatalError("DigitRequiredInCharRef", null);
1304            }
1305        }
1306
1307        // end
1308
if (!fEntityScanner.skipChar(';')) {
1309            reportFatalError("SemicolonRequiredInCharRef", null);
1310        }
1311        if (buf2 != null) { buf2.append(';'); }
1312        
1313        // convert string to number
1314
int value = -1;
1315        try {
1316            value = Integer.parseInt(fStringBuffer3.toString(),
1317                                     hex ? 16 : 10);
1318            
1319            // character reference must be a valid XML character
1320
if (isInvalid(value)) {
1321                StringBuffer JavaDoc errorBuf = new StringBuffer JavaDoc(fStringBuffer3.length + 1);
1322                if (hex) errorBuf.append('x');
1323                errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1324                reportFatalError("InvalidCharRef",
1325                                 new Object JavaDoc[]{errorBuf.toString()});
1326            }
1327        }
1328        catch (NumberFormatException JavaDoc e) {
1329            // Conversion failed, let -1 value drop through.
1330
// If we end up here, the character reference was invalid.
1331
StringBuffer JavaDoc errorBuf = new StringBuffer JavaDoc(fStringBuffer3.length + 1);
1332            if (hex) errorBuf.append('x');
1333            errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1334            reportFatalError("InvalidCharRef",
1335                             new Object JavaDoc[]{errorBuf.toString()});
1336        }
1337
1338        // append corresponding chars to the given buffer
1339
if (!XMLChar.isSupplemental(value)) {
1340            buf.append((char) value);
1341        }
1342        else {
1343            // character is supplemental, split it into surrogate chars
1344
buf.append(XMLChar.highSurrogate(value));
1345            buf.append(XMLChar.lowSurrogate(value));
1346        }
1347
1348        // char refs notification code
1349
if (fNotifyCharRefs && value != -1) {
1350            String JavaDoc literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
1351            if (!fScanningAttribute) {
1352                fCharRefLiteral = literal;
1353            }
1354        }
1355                
1356        return value;
1357    }
1358
1359    // returns true if the given character is not
1360
// valid with respect to the version of
1361
// XML understood by this scanner.
1362
protected boolean isInvalid(int value) {
1363        return (XMLChar.isInvalid(value));
1364    } // isInvalid(int): boolean
1365

1366    // returns true if the given character is not
1367
// valid or may not be used outside a character reference
1368
// with respect to the version of XML understood by this scanner.
1369
protected boolean isInvalidLiteral(int value) {
1370        return (XMLChar.isInvalid(value));
1371    } // isInvalidLiteral(int): boolean
1372

1373    // returns true if the given character is
1374
// a valid nameChar with respect to the version of
1375
// XML understood by this scanner.
1376
protected boolean isValidNameChar(int value) {
1377        return (XMLChar.isName(value));
1378    } // isValidNameChar(int): boolean
1379

1380    // returns true if the given character is
1381
// a valid nameStartChar with respect to the version of
1382
// XML understood by this scanner.
1383
protected boolean isValidNameStartChar(int value) {
1384        return (XMLChar.isNameStart(value));
1385    } // isValidNameStartChar(int): boolean
1386

1387    // returns true if the given character is
1388
// a valid NCName character with respect to the version of
1389
// XML understood by this scanner.
1390
protected boolean isValidNCName(int value) {
1391        return (XMLChar.isNCName(value));
1392    } // isValidNCName(int): boolean
1393

1394    // returns true if the given character is
1395
// a valid high surrogate for a nameStartChar
1396
// with respect to the version of XML understood
1397
// by this scanner.
1398
protected boolean isValidNameStartHighSurrogate(int value) {
1399        return false;
1400    } // isValidNameStartHighSurrogate(int): boolean
1401

1402    protected boolean versionSupported(String JavaDoc version ) {
1403        return version.equals("1.0");
1404    } // version Supported
1405

1406    // returns the error message key for unsupported
1407
// versions of XML with respect to the version of
1408
// XML understood by this scanner.
1409
protected String JavaDoc getVersionNotSupportedKey () {
1410        return "VersionNotSupported";
1411    } // getVersionNotSupportedKey: String
1412

1413    /**
1414     * Scans surrogates and append them to the specified buffer.
1415     * <p>
1416     * <strong>Note:</strong> This assumes the current char has already been
1417     * identified as a high surrogate.
1418     *
1419     * @param buf The StringBuffer to append the read surrogates to.
1420     * @return True if it succeeded.
1421     */

1422    protected boolean scanSurrogates(XMLStringBuffer buf)
1423        throws IOException JavaDoc, XNIException {
1424
1425        int high = fEntityScanner.scanChar();
1426        int low = fEntityScanner.peekChar();
1427        if (!XMLChar.isLowSurrogate(low)) {
1428            reportFatalError("InvalidCharInContent",
1429                             new Object JavaDoc[] {Integer.toString(high, 16)});
1430            return false;
1431        }
1432        fEntityScanner.scanChar();
1433
1434        // convert surrogates to supplemental character
1435
int c = XMLChar.supplemental((char)high, (char)low);
1436
1437        // supplemental character must be a valid XML character
1438
if (isInvalid(c)) {
1439            reportFatalError("InvalidCharInContent",
1440                             new Object JavaDoc[]{Integer.toString(c, 16)});
1441            return false;
1442        }
1443
1444        // fill in the buffer
1445
buf.append((char)high);
1446        buf.append((char)low);
1447
1448        return true;
1449
1450    } // scanSurrogates():boolean
1451

1452
1453    /**
1454     * Convenience function used in all XML scanners.
1455     */

1456    protected void reportFatalError(String JavaDoc msgId, Object JavaDoc[] args)
1457        throws XNIException {
1458        fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1459                                   msgId, args,
1460                                   XMLErrorReporter.SEVERITY_FATAL_ERROR);
1461    }
1462
1463    // private methods
1464
private void init() {
1465        fEntityScanner = null;
1466        // initialize vars
1467
fEntityDepth = 0;
1468        fReportEntity = true;
1469        fResourceIdentifier.clear();
1470    }
1471
1472} // class XMLScanner
1473
Popular Tags