KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xerces > impl > XML11DocumentScannerImpl


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.xerces.impl;
18
19 import java.io.IOException JavaDoc;
20
21 import org.apache.xerces.impl.msg.XMLMessageFormatter;
22 import org.apache.xerces.util.XML11Char;
23 import org.apache.xerces.util.XMLChar;
24 import org.apache.xerces.util.XMLStringBuffer;
25 import org.apache.xerces.xni.XMLString;
26 import org.apache.xerces.xni.XNIException;
27
28 /**
29  * This class is responsible for scanning XML document structure
30  * and content. The scanner acts as the source for the document
31  * information which is communicated to the document handler.
32  * <p>
33  * This component requires the following features and properties from the
34  * component manager that uses it:
35  * <ul>
36  * <li>http://xml.org/sax/features/namespaces</li>
37  * <li>http://xml.org/sax/features/validation</li>
38  * <li>http://apache.org/xml/features/nonvalidating/load-external-dtd</li>
39  * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
40  * <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
41  * <li>http://apache.org/xml/properties/internal/symbol-table</li>
42  * <li>http://apache.org/xml/properties/internal/error-reporter</li>
43  * <li>http://apache.org/xml/properties/internal/entity-manager</li>
44  * <li>http://apache.org/xml/properties/internal/dtd-scanner</li>
45  * </ul>
46  *
47  * @xerces.internal
48  *
49  * @author Glenn Marcy, IBM
50  * @author Andy Clark, IBM
51  * @author Arnaud Le Hors, IBM
52  * @author Eric Ye, IBM
53  *
54  * @version $Id: XML11DocumentScannerImpl.java,v 1.22 2004/10/04 21:45:49 mrglavas Exp $
55  */

56 public class XML11DocumentScannerImpl
57     extends XMLDocumentScannerImpl {
58
59
60     /** Array of 3 strings. */
61     private String JavaDoc[] fStrings = new String JavaDoc[3];
62
63     /** String. */
64     private XMLString fString = new XMLString();
65
66     /** String buffer. */
67     private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
68     private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
69     private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
70
71     //
72
// Constructors
73
//
74

75     /** Default constructor. */
76     public XML11DocumentScannerImpl() {super();} // <init>()
77

78     //
79
// overridden methods
80
//
81

82     // XMLDocumentFragmentImpl methods
83

84     /**
85      * Scans element content.
86      *
87      * @return Returns the next character on the stream.
88      */

89     protected int scanContent() throws IOException JavaDoc, XNIException {
90
91         XMLString content = fString;
92         int c = fEntityScanner.scanContent(content);
93         if (c == '\r' || c == 0x85 || c == 0x2028) {
94             // happens when there is the character reference &#13;
95
// but scanContent doesn't do entity expansions...
96
// is this *really* necessary??? - NG
97
fEntityScanner.scanChar();
98             fStringBuffer.clear();
99             fStringBuffer.append(fString);
100             fStringBuffer.append((char)c);
101             content = fStringBuffer;
102             c = -1;
103         }
104         if (fDocumentHandler != null && content.length > 0) {
105             fDocumentHandler.characters(content, null);
106         }
107
108         if (c == ']' && fString.length == 0) {
109             fStringBuffer.clear();
110             fStringBuffer.append((char)fEntityScanner.scanChar());
111             // remember where we are in case we get an endEntity before we
112
// could flush the buffer out - this happens when we're parsing an
113
// entity which ends with a ]
114
fInScanContent = true;
115             //
116
// We work on a single character basis to handle cases such as:
117
// ']]]>' which we might otherwise miss.
118
//
119
if (fEntityScanner.skipChar(']')) {
120                 fStringBuffer.append(']');
121                 while (fEntityScanner.skipChar(']')) {
122                     fStringBuffer.append(']');
123                 }
124                 if (fEntityScanner.skipChar('>')) {
125                     reportFatalError("CDEndInContent", null);
126                 }
127             }
128             if (fDocumentHandler != null && fStringBuffer.length != 0) {
129                 fDocumentHandler.characters(fStringBuffer, null);
130             }
131             fInScanContent = false;
132             c = -1;
133         }
134         return c;
135
136     } // scanContent():int
137

138     /**
139      * Scans an attribute value and normalizes whitespace converting all
140      * whitespace characters to space characters.
141      *
142      * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
143      *
144      * @param value The XMLString to fill in with the value.
145      * @param nonNormalizedValue The XMLString to fill in with the
146      * non-normalized value.
147      * @param atName The name of the attribute being parsed (for error msgs).
148      * @param checkEntities true if undeclared entities should be reported as VC violation,
149      * false if undeclared entities should be reported as WFC violation.
150      * @param eleName The name of element to which this attribute belongs.
151      *
152      * @return true if the non-normalized and normalized value are the same
153      *
154      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
155      * at the time of calling is lost.
156      **/

157     protected boolean scanAttributeValue(XMLString value,
158                                       XMLString nonNormalizedValue,
159                                       String JavaDoc atName,
160                                       boolean checkEntities,String JavaDoc eleName)
161         throws IOException JavaDoc, XNIException
162     {
163         // quote
164
int quote = fEntityScanner.peekChar();
165         if (quote != '\'' && quote != '"') {
166             reportFatalError("OpenQuoteExpected", new Object JavaDoc[]{eleName,atName});
167         }
168
169         fEntityScanner.scanChar();
170         int entityDepth = fEntityDepth;
171
172         int c = fEntityScanner.scanLiteral(quote, value);
173         if (DEBUG_ATTR_NORMALIZATION) {
174             System.out.println("** scanLiteral -> \""
175                                + value.toString() + "\"");
176         }
177         
178         int fromIndex = 0;
179         if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
180             /** Both the non-normalized and normalized attribute values are equal. **/
181             nonNormalizedValue.setValues(value);
182             int cquote = fEntityScanner.scanChar();
183             if (cquote != quote) {
184                 reportFatalError("CloseQuoteExpected", new Object JavaDoc[]{eleName,atName});
185             }
186             return true;
187         }
188         fStringBuffer2.clear();
189         fStringBuffer2.append(value);
190         normalizeWhitespace(value, fromIndex);
191         if (DEBUG_ATTR_NORMALIZATION) {
192             System.out.println("** normalizeWhitespace -> \""
193                                + value.toString() + "\"");
194         }
195         if (c != quote) {
196             fScanningAttribute = true;
197             fStringBuffer.clear();
198             do {
199                 fStringBuffer.append(value);
200                 if (DEBUG_ATTR_NORMALIZATION) {
201                     System.out.println("** value2: \""
202                                        + fStringBuffer.toString() + "\"");
203                 }
204                 if (c == '&') {
205                     fEntityScanner.skipChar('&');
206                     if (entityDepth == fEntityDepth) {
207                         fStringBuffer2.append('&');
208                     }
209                     if (fEntityScanner.skipChar('#')) {
210                         if (entityDepth == fEntityDepth) {
211                             fStringBuffer2.append('#');
212                         }
213                         int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
214                         if (ch != -1) {
215                             if (DEBUG_ATTR_NORMALIZATION) {
216                                 System.out.println("** value3: \""
217                                                    + fStringBuffer.toString()
218                                                    + "\"");
219                             }
220                         }
221                     }
222                     else {
223                         String JavaDoc entityName = fEntityScanner.scanName();
224                         if (entityName == null) {
225                             reportFatalError("NameRequiredInReference", null);
226                         }
227                         else if (entityDepth == fEntityDepth) {
228                             fStringBuffer2.append(entityName);
229                         }
230                         if (!fEntityScanner.skipChar(';')) {
231                             reportFatalError("SemicolonRequiredInReference",
232                                              new Object JavaDoc []{entityName});
233                         }
234                         else if (entityDepth == fEntityDepth) {
235                             fStringBuffer2.append(';');
236                         }
237                         if (entityName == fAmpSymbol) {
238                             fStringBuffer.append('&');
239                             if (DEBUG_ATTR_NORMALIZATION) {
240                                 System.out.println("** value5: \""
241                                                    + fStringBuffer.toString()
242                                                    + "\"");
243                             }
244                         }
245                         else if (entityName == fAposSymbol) {
246                             fStringBuffer.append('\'');
247                             if (DEBUG_ATTR_NORMALIZATION) {
248                                 System.out.println("** value7: \""
249                                                    + fStringBuffer.toString()
250                                                    + "\"");
251                             }
252                         }
253                         else if (entityName == fLtSymbol) {
254                             fStringBuffer.append('<');
255                             if (DEBUG_ATTR_NORMALIZATION) {
256                                 System.out.println("** value9: \""
257                                                    + fStringBuffer.toString()
258                                                    + "\"");
259                             }
260                         }
261                         else if (entityName == fGtSymbol) {
262                             fStringBuffer.append('>');
263                             if (DEBUG_ATTR_NORMALIZATION) {
264                                 System.out.println("** valueB: \""
265                                                    + fStringBuffer.toString()
266                                                    + "\"");
267                             }
268                         }
269                         else if (entityName == fQuotSymbol) {
270                             fStringBuffer.append('"');
271                             if (DEBUG_ATTR_NORMALIZATION) {
272                                 System.out.println("** valueD: \""
273                                                    + fStringBuffer.toString()
274                                                    + "\"");
275                             }
276                         }
277                         else {
278                             if (fEntityManager.isExternalEntity(entityName)) {
279                                 reportFatalError("ReferenceToExternalEntity",
280                                                  new Object JavaDoc[] { entityName });
281                             }
282                             else {
283                                 if (!fEntityManager.isDeclaredEntity(entityName)) {
284                                     //WFC & VC: Entity Declared
285
if (checkEntities) {
286                                         if (fValidation) {
287                                             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
288                                                                        "EntityNotDeclared",
289                                                                        new Object JavaDoc[]{entityName},
290                                                                        XMLErrorReporter.SEVERITY_ERROR);
291                                         }
292                                     }
293                                     else {
294                                         reportFatalError("EntityNotDeclared",
295                                                          new Object JavaDoc[]{entityName});
296                                     }
297                                 }
298                                 fEntityManager.startEntity(entityName, true);
299                             }
300                         }
301                     }
302                 }
303                 else if (c == '<') {
304                     reportFatalError("LessthanInAttValue",
305                                      new Object JavaDoc[] { eleName, atName });
306                     fEntityScanner.scanChar();
307                     if (entityDepth == fEntityDepth) {
308                         fStringBuffer2.append((char)c);
309                     }
310                 }
311                 else if (c == '%' || c == ']') {
312                     fEntityScanner.scanChar();
313                     fStringBuffer.append((char)c);
314                     if (entityDepth == fEntityDepth) {
315                         fStringBuffer2.append((char)c);
316                     }
317                     if (DEBUG_ATTR_NORMALIZATION) {
318                         System.out.println("** valueF: \""
319                                            + fStringBuffer.toString() + "\"");
320                     }
321                 }
322                 // note that none of these characters should ever get through
323
// XML11EntityScanner. Not sure why
324
// this check was originally necessary. - NG
325
else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
326                     fEntityScanner.scanChar();
327                     fStringBuffer.append(' ');
328                     if (entityDepth == fEntityDepth) {
329                         fStringBuffer2.append('\n');
330                     }
331                 }
332                 else if (c != -1 && XMLChar.isHighSurrogate(c)) {
333                     fStringBuffer3.clear();
334                     if (scanSurrogates(fStringBuffer3)) {
335                         fStringBuffer.append(fStringBuffer3);
336                         if (entityDepth == fEntityDepth) {
337                             fStringBuffer2.append(fStringBuffer3);
338                         }
339                         if (DEBUG_ATTR_NORMALIZATION) {
340                             System.out.println("** valueI: \""
341                                                + fStringBuffer.toString()
342                                                + "\"");
343                         }
344                     }
345                 }
346                 else if (c != -1 && XML11Char.isXML11Invalid(c)) {
347                     reportFatalError("InvalidCharInAttValue",
348                                      new Object JavaDoc[] {eleName, atName, Integer.toString(c, 16)});
349                     fEntityScanner.scanChar();
350                     if (entityDepth == fEntityDepth) {
351                         fStringBuffer2.append((char)c);
352                     }
353                 }
354                 c = fEntityScanner.scanLiteral(quote, value);
355                 if (entityDepth == fEntityDepth) {
356                     fStringBuffer2.append(value);
357                 }
358                 normalizeWhitespace(value);
359             } while (c != quote || entityDepth != fEntityDepth);
360             fStringBuffer.append(value);
361             if (DEBUG_ATTR_NORMALIZATION) {
362                 System.out.println("** valueN: \""
363                                    + fStringBuffer.toString() + "\"");
364             }
365             value.setValues(fStringBuffer);
366             fScanningAttribute = false;
367         }
368         nonNormalizedValue.setValues(fStringBuffer2);
369
370         // quote
371
int cquote = fEntityScanner.scanChar();
372         if (cquote != quote) {
373             reportFatalError("CloseQuoteExpected", new Object JavaDoc[]{eleName,atName});
374         }
375         return nonNormalizedValue.equals(value.ch, value.offset, value.length);
376     } // scanAttributeValue()
377

378     //
379
// XMLScanner methods
380
//
381
// NOTE: this is a carbon copy of the code in XML11DTDScannerImpl;
382
// we need to override these methods in both places.
383
// this needs to be refactored!!! - NG
384
/**
385      * Scans public ID literal.
386      *
387      * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
388      * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
389      *
390      * The returned string is normalized according to the following rule,
391      * from http://www.w3.org/TR/REC-xml#dt-pubid:
392      *
393      * Before a match is attempted, all strings of white space in the public
394      * identifier must be normalized to single space characters (#x20), and
395      * leading and trailing white space must be removed.
396      *
397      * @param literal The string to fill in with the public ID literal.
398      * @return True on success.
399      *
400      * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
401      * the time of calling is lost.
402      */

403     protected boolean scanPubidLiteral(XMLString literal)
404         throws IOException JavaDoc, XNIException
405     {
406         int quote = fEntityScanner.scanChar();
407         if (quote != '\'' && quote != '"') {
408             reportFatalError("QuoteRequiredInPublicID", null);
409             return false;
410         }
411
412         fStringBuffer.clear();
413         // skip leading whitespace
414
boolean skipSpace = true;
415         boolean dataok = true;
416         while (true) {
417             int c = fEntityScanner.scanChar();
418             // REVISIT: none of these except \n and 0x20 should make it past the entity scanner
419
if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
420                 if (!skipSpace) {
421                     // take the first whitespace as a space and skip the others
422
fStringBuffer.append(' ');
423                     skipSpace = true;
424                 }
425             }
426             else if (c == quote) {
427                 if (skipSpace) {
428                     // if we finished on a space let's trim it
429
fStringBuffer.length--;
430                 }
431                 literal.setValues(fStringBuffer);
432                 break;
433             }
434             else if (XMLChar.isPubid(c)) {
435                 fStringBuffer.append((char)c);
436                 skipSpace = false;
437             }
438             else if (c == -1) {
439                 reportFatalError("PublicIDUnterminated", null);
440                 return false;
441             }
442             else {
443                 dataok = false;
444                 reportFatalError("InvalidCharInPublicID",
445                                  new Object JavaDoc[]{Integer.toHexString(c)});
446             }
447         }
448         return dataok;
449    }
450    
451     /**
452      * Normalize whitespace in an XMLString converting all whitespace
453      * characters to space characters.
454      */

455     protected void normalizeWhitespace(XMLString value) {
456         int end = value.offset + value.length;
457         for (int i = value.offset; i < end; ++i) {
458            int c = value.ch[i];
459            if (XMLChar.isSpace(c)) {
460                value.ch[i] = ' ';
461            }
462        }
463     }
464     
465     /**
466      * Normalize whitespace in an XMLString converting all whitespace
467      * characters to space characters.
468      */

469     protected void normalizeWhitespace(XMLString value, int fromIndex) {
470         int end = value.offset + value.length;
471         for (int i = value.offset + fromIndex; i < end; ++i) {
472             int c = value.ch[i];
473             if (XMLChar.isSpace(c)) {
474                 value.ch[i] = ' ';
475             }
476         }
477     }
478     
479     /**
480      * Checks whether this string would be unchanged by normalization.
481      *
482      * @return -1 if the value would be unchanged by normalization,
483      * otherwise the index of the first whitespace character which
484      * would be transformed.
485      */

486     protected int isUnchangedByNormalization(XMLString value) {
487         int end = value.offset + value.length;
488         for (int i = value.offset; i < end; ++i) {
489             int c = value.ch[i];
490             if (XMLChar.isSpace(c)) {
491                 return i - value.offset;
492             }
493         }
494         return -1;
495     }
496
497     // returns true if the given character is not
498
// valid with respect to the version of
499
// XML understood by this scanner.
500
protected boolean isInvalid(int value) {
501         return (XML11Char.isXML11Invalid(value));
502     } // isInvalid(int): boolean
503

504     // returns true if the given character is not
505
// valid or may not be used outside a character reference
506
// with respect to the version of XML understood by this scanner.
507
protected boolean isInvalidLiteral(int value) {
508         return (!XML11Char.isXML11ValidLiteral(value));
509     } // isInvalidLiteral(int): boolean
510

511     // returns true if the given character is
512
// a valid nameChar with respect to the version of
513
// XML understood by this scanner.
514
protected boolean isValidNameChar(int value) {
515         return (XML11Char.isXML11Name(value));
516     } // isValidNameChar(int): boolean
517

518     // returns true if the given character is
519
// a valid nameStartChar with respect to the version of
520
// XML understood by this scanner.
521
protected boolean isValidNameStartChar(int value) {
522         return (XML11Char.isXML11NameStart(value));
523     } // isValidNameStartChar(int): boolean
524

525     // returns true if the given character is
526
// a valid NCName character with respect to the version of
527
// XML understood by this scanner.
528
protected boolean isValidNCName(int value) {
529         return (XML11Char.isXML11NCName(value));
530     } // isValidNCName(int): boolean
531

532     // returns true if the given character is
533
// a valid high surrogate for a nameStartChar
534
// with respect to the version of XML understood
535
// by this scanner.
536
protected boolean isValidNameStartHighSurrogate(int value) {
537         return XML11Char.isXML11NameHighSurrogate(value);
538     } // isValidNameStartHighSurrogate(int): boolean
539

540     protected boolean versionSupported(String JavaDoc version) {
541         return (version.equals("1.1") || version.equals("1.0"));
542     } // versionSupported(String): boolean
543

544     // returns the error message key for unsupported
545
// versions of XML with respect to the version of
546
// XML understood by this scanner.
547
protected String JavaDoc getVersionNotSupportedKey () {
548         return "VersionNotSupported11";
549     } // getVersionNotSupportedKey: String
550

551 } // class XML11DocumentScannerImpl
552
Popular Tags