KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xerces > internal > impl > XML11DocumentScannerImpl


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999-2004 The Apache Software Foundation.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package com.sun.org.apache.xerces.internal.impl;
59
60 import java.io.IOException JavaDoc;
61
62 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
63 import com.sun.org.apache.xerces.internal.util.XML11Char;
64 import com.sun.org.apache.xerces.internal.util.XMLChar;
65 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
66 import com.sun.org.apache.xerces.internal.xni.XMLString;
67 import com.sun.org.apache.xerces.internal.xni.XNIException;
68
69 /**
70  * This class is responsible for scanning XML document structure
71  * and content. The scanner acts as the source for the document
72  * information which is communicated to the document handler.
73  * <p>
74  * This component requires the following features and properties from the
75  * component manager that uses it:
76  * <ul>
77  * <li>http://xml.org/sax/features/namespaces</li>
78  * <li>http://xml.org/sax/features/validation</li>
79  * <li>http://apache.org/xml/features/nonvalidating/load-external-dtd</li>
80  * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
81  * <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
82  * <li>http://apache.org/xml/properties/internal/symbol-table</li>
83  * <li>http://apache.org/xml/properties/internal/error-reporter</li>
84  * <li>http://apache.org/xml/properties/internal/entity-manager</li>
85  * <li>http://apache.org/xml/properties/internal/dtd-scanner</li>
86  * </ul>
87  *
88  * @author Glenn Marcy, IBM
89  * @author Andy Clark, IBM
90  * @author Arnaud Le Hors, IBM
91  * @author Eric Ye, IBM
92  *
93  * @version $Id: XML11DocumentScannerImpl.java,v 1.19 2004/04/25 05:05:50 mrglavas Exp $
94  */

95 public class XML11DocumentScannerImpl
96     extends XMLDocumentScannerImpl {
97
98
99     /** Array of 3 strings. */
100     private String JavaDoc[] fStrings = new String JavaDoc[3];
101
102     /** String. */
103     private XMLString fString = new XMLString();
104
105     /** String buffer. */
106     private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
107     private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
108     private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
109
110     //
111
// Constructors
112
//
113

114     /** Default constructor. */
115     public XML11DocumentScannerImpl() {super();} // <init>()
116

117     //
118
// overridden methods
119
//
120

121     // XMLDocumentFragmentImpl methods
122

123     /**
124      * Scans element content.
125      *
126      * @return Returns the next character on the stream.
127      */

128     protected int scanContent() throws IOException JavaDoc, XNIException {
129
130         XMLString content = fString;
131         int c = fEntityScanner.scanContent(content);
132         if (c == '\r' || c == 0x85 || c == 0x2028) {
133             // happens when there is the character reference &#13;
134
// but scanContent doesn't do entity expansions...
135
// is this *really* necessary??? - NG
136
fEntityScanner.scanChar();
137             fStringBuffer.clear();
138             fStringBuffer.append(fString);
139             fStringBuffer.append((char)c);
140             content = fStringBuffer;
141             c = -1;
142         }
143         if (fDocumentHandler != null && content.length > 0) {
144             fDocumentHandler.characters(content, null);
145         }
146
147         if (c == ']' && fString.length == 0) {
148             fStringBuffer.clear();
149             fStringBuffer.append((char)fEntityScanner.scanChar());
150             // remember where we are in case we get an endEntity before we
151
// could flush the buffer out - this happens when we're parsing an
152
// entity which ends with a ]
153
fInScanContent = true;
154             //
155
// We work on a single character basis to handle cases such as:
156
// ']]]>' which we might otherwise miss.
157
//
158
if (fEntityScanner.skipChar(']')) {
159                 fStringBuffer.append(']');
160                 while (fEntityScanner.skipChar(']')) {
161                     fStringBuffer.append(']');
162                 }
163                 if (fEntityScanner.skipChar('>')) {
164                     reportFatalError("CDEndInContent", null);
165                 }
166             }
167             if (fDocumentHandler != null && fStringBuffer.length != 0) {
168                 fDocumentHandler.characters(fStringBuffer, null);
169             }
170             fInScanContent = false;
171             c = -1;
172         }
173         return c;
174
175     } // scanContent():int
176

177     /**
178      * Scans an attribute value and normalizes whitespace converting all
179      * whitespace characters to space characters.
180      *
181      * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
182      *
183      * @param value The XMLString to fill in with the value.
184      * @param nonNormalizedValue The XMLString to fill in with the
185      * non-normalized value.
186      * @param atName The name of the attribute being parsed (for error msgs).
187      * @param checkEntities true if undeclared entities should be reported as VC violation,
188      * false if undeclared entities should be reported as WFC violation.
189      * @param eleName The name of element to which this attribute belongs.
190      *
191      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
192      * at the time of calling is lost.
193      **/

194     protected void scanAttributeValue(XMLString value,
195                                       XMLString nonNormalizedValue,
196                                       String JavaDoc atName,
197                                       boolean checkEntities,String JavaDoc eleName)
198         throws IOException JavaDoc, XNIException
199     {
200         // quote
201
int quote = fEntityScanner.peekChar();
202         if (quote != '\'' && quote != '"') {
203             reportFatalError("OpenQuoteExpected", new Object JavaDoc[]{eleName,atName});
204         }
205
206         fEntityScanner.scanChar();
207         int entityDepth = fEntityDepth;
208
209         int c = fEntityScanner.scanLiteral(quote, value);
210         if (DEBUG_ATTR_NORMALIZATION) {
211             System.out.println("** scanLiteral -> \""
212                                + value.toString() + "\"");
213         }
214         fStringBuffer2.clear();
215         fStringBuffer2.append(value);
216         normalizeWhitespace(value);
217         if (DEBUG_ATTR_NORMALIZATION) {
218             System.out.println("** normalizeWhitespace -> \""
219                                + value.toString() + "\"");
220         }
221         if (c != quote) {
222             fScanningAttribute = true;
223             fStringBuffer.clear();
224             do {
225                 fStringBuffer.append(value);
226                 if (DEBUG_ATTR_NORMALIZATION) {
227                     System.out.println("** value2: \""
228                                        + fStringBuffer.toString() + "\"");
229                 }
230                 if (c == '&') {
231                     fEntityScanner.skipChar('&');
232                     if (entityDepth == fEntityDepth) {
233                         fStringBuffer2.append('&');
234                     }
235                     if (fEntityScanner.skipChar('#')) {
236                         if (entityDepth == fEntityDepth) {
237                             fStringBuffer2.append('#');
238                         }
239                         int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
240                         if (ch != -1) {
241                             if (DEBUG_ATTR_NORMALIZATION) {
242                                 System.out.println("** value3: \""
243                                                    + fStringBuffer.toString()
244                                                    + "\"");
245                             }
246                         }
247                     }
248                     else {
249                         String JavaDoc entityName = fEntityScanner.scanName();
250                         if (entityName == null) {
251                             reportFatalError("NameRequiredInReference", null);
252                         }
253                         else if (entityDepth == fEntityDepth) {
254                             fStringBuffer2.append(entityName);
255                         }
256                         if (!fEntityScanner.skipChar(';')) {
257                             reportFatalError("SemicolonRequiredInReference",
258                                              new Object JavaDoc []{entityName});
259                         }
260                         else if (entityDepth == fEntityDepth) {
261                             fStringBuffer2.append(';');
262                         }
263                         if (entityName == fAmpSymbol) {
264                             fStringBuffer.append('&');
265                             if (DEBUG_ATTR_NORMALIZATION) {
266                                 System.out.println("** value5: \""
267                                                    + fStringBuffer.toString()
268                                                    + "\"");
269                             }
270                         }
271                         else if (entityName == fAposSymbol) {
272                             fStringBuffer.append('\'');
273                             if (DEBUG_ATTR_NORMALIZATION) {
274                                 System.out.println("** value7: \""
275                                                    + fStringBuffer.toString()
276                                                    + "\"");
277                             }
278                         }
279                         else if (entityName == fLtSymbol) {
280                             fStringBuffer.append('<');
281                             if (DEBUG_ATTR_NORMALIZATION) {
282                                 System.out.println("** value9: \""
283                                                    + fStringBuffer.toString()
284                                                    + "\"");
285                             }
286                         }
287                         else if (entityName == fGtSymbol) {
288                             fStringBuffer.append('>');
289                             if (DEBUG_ATTR_NORMALIZATION) {
290                                 System.out.println("** valueB: \""
291                                                    + fStringBuffer.toString()
292                                                    + "\"");
293                             }
294                         }
295                         else if (entityName == fQuotSymbol) {
296                             fStringBuffer.append('"');
297                             if (DEBUG_ATTR_NORMALIZATION) {
298                                 System.out.println("** valueD: \""
299                                                    + fStringBuffer.toString()
300                                                    + "\"");
301                             }
302                         }
303                         else {
304                             if (fEntityManager.isExternalEntity(entityName)) {
305                                 reportFatalError("ReferenceToExternalEntity",
306                                                  new Object JavaDoc[] { entityName });
307                             }
308                             else {
309                                 if (!fEntityManager.isDeclaredEntity(entityName)) {
310                                     //WFC & VC: Entity Declared
311
if (checkEntities) {
312                                         if (fValidation) {
313                                             fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
314                                                                        "EntityNotDeclared",
315                                                                        new Object JavaDoc[]{entityName},
316                                                                        XMLErrorReporter.SEVERITY_ERROR);
317                                         }
318                                     }
319                                     else {
320                                         reportFatalError("EntityNotDeclared",
321                                                          new Object JavaDoc[]{entityName});
322                                     }
323                                 }
324                                 fEntityManager.startEntity(entityName, true);
325                             }
326                         }
327                     }
328                 }
329                 else if (c == '<') {
330                     reportFatalError("LessthanInAttValue",
331                                      new Object JavaDoc[] { eleName, atName });
332                     fEntityScanner.scanChar();
333                     if (entityDepth == fEntityDepth) {
334                         fStringBuffer2.append((char)c);
335                     }
336                 }
337                 else if (c == '%' || c == ']') {
338                     fEntityScanner.scanChar();
339                     fStringBuffer.append((char)c);
340                     if (entityDepth == fEntityDepth) {
341                         fStringBuffer2.append((char)c);
342                     }
343                     if (DEBUG_ATTR_NORMALIZATION) {
344                         System.out.println("** valueF: \""
345                                            + fStringBuffer.toString() + "\"");
346                     }
347                 }
348                 // note that none of these characters should ever get through
349
// XML11EntityScanner. Not sure why
350
// this check was originally necessary. - NG
351
else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
352                     fEntityScanner.scanChar();
353                     fStringBuffer.append(' ');
354                     if (entityDepth == fEntityDepth) {
355                         fStringBuffer2.append('\n');
356                     }
357                 }
358                 else if (c != -1 && XMLChar.isHighSurrogate(c)) {
359                     fStringBuffer3.clear();
360                     if (scanSurrogates(fStringBuffer3)) {
361                         fStringBuffer.append(fStringBuffer3);
362                         if (entityDepth == fEntityDepth) {
363                             fStringBuffer2.append(fStringBuffer3);
364                         }
365                         if (DEBUG_ATTR_NORMALIZATION) {
366                             System.out.println("** valueI: \""
367                                                + fStringBuffer.toString()
368                                                + "\"");
369                         }
370                     }
371                 }
372                 else if (c != -1 && XML11Char.isXML11Invalid(c)) {
373                     reportFatalError("InvalidCharInAttValue",
374                                      new Object JavaDoc[] {eleName, atName, Integer.toString(c, 16)});
375                     fEntityScanner.scanChar();
376                     if (entityDepth == fEntityDepth) {
377                         fStringBuffer2.append((char)c);
378                     }
379                 }
380                 c = fEntityScanner.scanLiteral(quote, value);
381                 if (entityDepth == fEntityDepth) {
382                     fStringBuffer2.append(value);
383                 }
384                 normalizeWhitespace(value);
385             } while (c != quote || entityDepth != fEntityDepth);
386             fStringBuffer.append(value);
387             if (DEBUG_ATTR_NORMALIZATION) {
388                 System.out.println("** valueN: \""
389                                    + fStringBuffer.toString() + "\"");
390             }
391             value.setValues(fStringBuffer);
392             fScanningAttribute = false;
393         }
394         nonNormalizedValue.setValues(fStringBuffer2);
395
396         // quote
397
int cquote = fEntityScanner.scanChar();
398         if (cquote != quote) {
399             reportFatalError("CloseQuoteExpected", new Object JavaDoc[]{eleName,atName});
400         }
401     } // scanAttributeValue()
402

403     //
404
// XMLScanner methods
405
//
406
// NOTE: this is a carbon copy of the code in XML11DTDScannerImpl;
407
// we need to override these methods in both places.
408
// this needs to be refactored!!! - NG
409
/**
410      * Scans public ID literal.
411      *
412      * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
413      * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
414      *
415      * The returned string is normalized according to the following rule,
416      * from http://www.w3.org/TR/REC-xml#dt-pubid:
417      *
418      * Before a match is attempted, all strings of white space in the public
419      * identifier must be normalized to single space characters (#x20), and
420      * leading and trailing white space must be removed.
421      *
422      * @param literal The string to fill in with the public ID literal.
423      * @return True on success.
424      *
425      * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
426      * the time of calling is lost.
427      */

428     protected boolean scanPubidLiteral(XMLString literal)
429         throws IOException JavaDoc, XNIException
430     {
431         int quote = fEntityScanner.scanChar();
432         if (quote != '\'' && quote != '"') {
433             reportFatalError("QuoteRequiredInPublicID", null);
434             return false;
435         }
436
437         fStringBuffer.clear();
438         // skip leading whitespace
439
boolean skipSpace = true;
440         boolean dataok = true;
441         while (true) {
442             int c = fEntityScanner.scanChar();
443             // REVISIT: none of these except \n and 0x20 should make it past the entity scanner
444
if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
445                 if (!skipSpace) {
446                     // take the first whitespace as a space and skip the others
447
fStringBuffer.append(' ');
448                     skipSpace = true;
449                 }
450             }
451             else if (c == quote) {
452                 if (skipSpace) {
453                     // if we finished on a space let's trim it
454
fStringBuffer.length--;
455                 }
456                 literal.setValues(fStringBuffer);
457                 break;
458             }
459             else if (XMLChar.isPubid(c)) {
460                 fStringBuffer.append((char)c);
461                 skipSpace = false;
462             }
463             else if (c == -1) {
464                 reportFatalError("PublicIDUnterminated", null);
465                 return false;
466             }
467             else {
468                 dataok = false;
469                 reportFatalError("InvalidCharInPublicID",
470                                  new Object JavaDoc[]{Integer.toHexString(c)});
471             }
472         }
473         return dataok;
474    }
475    
476     /**
477      * Normalize whitespace in an XMLString converting all whitespace
478      * characters to space characters.
479      */

480     protected void normalizeWhitespace(XMLString value) {
481         int end = value.offset + value.length;
482         for (int i = value.offset; i < end; i++) {
483            int c = value.ch[i];
484            if (XMLChar.isSpace(c)) {
485                value.ch[i] = ' ';
486            }
487        }
488     }
489
490     // returns true if the given character is not
491
// valid with respect to the version of
492
// XML understood by this scanner.
493
protected boolean isInvalid(int value) {
494         return (XML11Char.isXML11Invalid(value));
495     } // isInvalid(int): boolean
496

497     // returns true if the given character is not
498
// valid or may not be used outside a character reference
499
// with respect to the version of XML understood by this scanner.
500
protected boolean isInvalidLiteral(int value) {
501         return (!XML11Char.isXML11ValidLiteral(value));
502     } // isInvalidLiteral(int): boolean
503

504     // returns true if the given character is
505
// a valid nameChar with respect to the version of
506
// XML understood by this scanner.
507
protected boolean isValidNameChar(int value) {
508         return (XML11Char.isXML11Name(value));
509     } // isValidNameChar(int): boolean
510

511     // returns true if the given character is
512
// a valid nameStartChar with respect to the version of
513
// XML understood by this scanner.
514
protected boolean isValidNameStartChar(int value) {
515         return (XML11Char.isXML11NameStart(value));
516     } // isValidNameStartChar(int): boolean
517

518     // returns true if the given character is
519
// a valid NCName character with respect to the version of
520
// XML understood by this scanner.
521
protected boolean isValidNCName(int value) {
522         return (XML11Char.isXML11NCName(value));
523     } // isValidNCName(int): boolean
524

525     // returns true if the given character is
526
// a valid high surrogate for a nameStartChar
527
// with respect to the version of XML understood
528
// by this scanner.
529
protected boolean isValidNameStartHighSurrogate(int value) {
530         return XML11Char.isXML11NameHighSurrogate(value);
531     } // isValidNameStartHighSurrogate(int): boolean
532

533     protected boolean versionSupported(String JavaDoc version) {
534         return (version.equals("1.1") || version.equals("1.0"));
535     } // versionSupported(String): boolean
536

537     // returns the error message key for unsupported
538
// versions of XML with respect to the version of
539
// XML understood by this scanner.
540
protected String JavaDoc getVersionNotSupportedKey () {
541         return "VersionNotSupported11";
542     } // getVersionNotSupportedKey: String
543

544 } // class XML11DocumentScannerImpl
545
Popular Tags