XMLEncodingDetector


1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   * ====================================================================
17   *
18   * This software consists of voluntary contributions made by many
19   * individuals on behalf of the Apache Software Foundation and was
20   * originally based on software copyright (c) 1999, International
21   * Business Machines, Inc., http://www.apache.org.  For more
22   * information on the Apache Software Foundation, please see
23   * <http://www.apache.org/>.
24   */
25  
26  package org.apache.jasper.xmlparser;
27  
28  import java.io.EOFException  ;
29  import java.io.InputStream  ;
30  import java.io.InputStreamReader  ;
31  import java.io.IOException  ;
32  import java.io.Reader  ;
33  import java.util.Locale  ;
34  import java.util.jar.JarFile  ;
35  
36  import org.apache.jasper.JasperException;
37  import org.apache.jasper.JspCompilationContext;
38  import org.apache.jasper.compiler.ErrorDispatcher;
39  import org.apache.jasper.compiler.JspUtil;
40  
41  public class XMLEncodingDetector {
42      
43      private InputStream   stream;
44      private String   encoding;
45      private boolean isEncodingSetInProlog;
46      private boolean isBomPresent;
47      private Boolean   isBigEndian;
48      private Reader   reader;
49      
50      // org.apache.xerces.impl.XMLEntityManager fields
51      public static final int DEFAULT_BUFFER_SIZE = 2048;
52      public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
53      private boolean fAllowJavaEncodings;
54      private SymbolTable fSymbolTable;
55      private XMLEncodingDetector fCurrentEntity;
56      private int fBufferSize = DEFAULT_BUFFER_SIZE;
57      
58      // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
59      private int lineNumber = 1;
60      private int columnNumber = 1;
61      private boolean literal;
62      private char[] ch = new char[DEFAULT_BUFFER_SIZE];
63      private int position;
64      private int count;
65      private boolean mayReadChunks = false;
66      
67      // org.apache.xerces.impl.XMLScanner fields
68      private XMLString fString = new XMLString();    
69      private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
70      private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
71      private final static String   fVersionSymbol = "version";
72      private final static String   fEncodingSymbol = "encoding";
73      private final static String   fStandaloneSymbol = "standalone";
74      
75      // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
76      private int fMarkupDepth = 0;
77      private String  [] fStrings = new String  [3];
78  
79      private ErrorDispatcher err;
80  
81      /**
82       * Constructor
83       */
84      public XMLEncodingDetector() {
85          fSymbolTable = new SymbolTable();
86          fCurrentEntity = this;
87      }
88  
89      /**
90       * Autodetects the encoding of the XML document supplied by the given
91       * input stream.
92       *
93       * Encoding autodetection is done according to the XML 1.0 specification,
94       * Appendix F.1: Detection Without External Encoding Information.
95       *
96       * @return Two-element array, where the first element (of type
97       * java.lang.String) contains the name of the (auto)detected encoding, and
98       * the second element (of type java.lang.Boolean) specifies whether the 
99       * encoding was specified using the 'encoding' attribute of an XML prolog
100      * (TRUE) or autodetected (FALSE).
101      */
102     public static Object  [] getEncoding(String   fname, JarFile   jarFile,
103                                        JspCompilationContext ctxt,
104                                        ErrorDispatcher err)
105         throws IOException  , JasperException
106     {
107         InputStream   inStream = JspUtil.getInputStream(fname, jarFile, ctxt,
108                                                       err);
109         XMLEncodingDetector detector = new XMLEncodingDetector();
110         Object  [] ret = detector.getEncoding(inStream, err);
111         inStream.close();
112 
113         return ret;
114     }
115 
116     private Object  [] getEncoding(InputStream   in, ErrorDispatcher err)
117         throws IOException  , JasperException
118     {
119         this.stream = in;
120         this.err=err;
121         createInitialReader();
122         scanXMLDecl();
123     
124         return new Object  [] { this.encoding,
125                               new Boolean  (this.isEncodingSetInProlog),
126                               new Boolean  (this.isBomPresent) };
127     }
128     
129     // stub method
130     void endEntity() {
131     }
132     
133     // Adapted from:
134     // org.apache.xerces.impl.XMLEntityManager.startEntity()
135     private void createInitialReader() throws IOException  , JasperException {
136 
137     // wrap this stream in RewindableInputStream
138     stream = new RewindableInputStream(stream);
139 
140     // perform auto-detect of encoding if necessary
141     if (encoding == null) {
142         // read first four bytes and determine encoding
143         final byte[] b4 = new byte[4];
144         int count = 0;
145         for (; count<4; count++ ) {
146         b4[count] = (byte)stream.read();
147         }
148         if (count == 4) {
149         Object   [] encodingDesc = getEncodingName(b4, count);
150         encoding = (String  )(encodingDesc[0]);
151         isBigEndian = (Boolean  )(encodingDesc[1]);
152         if (encodingDesc.length > 2) {
153             isBomPresent = (Boolean  )(encodingDesc[2]);
154         } else {
155             isBomPresent = true;
156         }
157 
158         stream.reset();
159         // Special case UTF-8 files with BOM created by Microsoft
160         // tools. It's more efficient to consume the BOM than make
161         // the reader perform extra checks. -Ac
162         if (count > 2 && encoding.equals("UTF-8")) {
163             int b0 = b4[0] & 0xFF;
164             int b1 = b4[1] & 0xFF;
165             int b2 = b4[2] & 0xFF;
166             if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
167             // ignore first three bytes...
168             stream.skip(3);
169             }
170         }
171         reader = createReader(stream, encoding, isBigEndian);
172         } else {
173         reader = createReader(stream, encoding, isBigEndian);
174         }
175     }
176     }
177 
178     // Adapted from:
179     // org.apache.xerces.impl.XMLEntityManager.createReader
180     /**
181      * Creates a reader capable of reading the given input stream in
182      * the specified encoding.
183      *
184      * @param inputStream  The input stream.
185      * @param encoding     The encoding name that the input stream is
186      *                     encoded using. If the user has specified that
187      *                     Java encoding names are allowed, then the
188      *                     encoding name may be a Java encoding name;
189      *                     otherwise, it is an ianaEncoding name.
190      * @param isBigEndian   For encodings (like uCS-4), whose names cannot
191      *                      specify a byte order, this tells whether the order
192      *                      is bigEndian. null means unknown or not relevant.
193      *
194      * @return Returns a reader.
195      */
196     private Reader   createReader(InputStream   inputStream, String   encoding,
197                 Boolean   isBigEndian)
198                 throws IOException  , JasperException {
199 
200         // normalize encoding name
201         if (encoding == null) {
202             encoding = "UTF-8";
203         }
204 
205         // try to use an optimized reader
206         String   ENCODING = encoding.toUpperCase(Locale.ENGLISH);
207         if (ENCODING.equals("UTF-8")) {
208             return new UTF8Reader(inputStream, fBufferSize);
209         }
210         if (ENCODING.equals("US-ASCII")) {
211             return new ASCIIReader(inputStream, fBufferSize);
212         }
213         if (ENCODING.equals("ISO-10646-UCS-4")) {
214             if (isBigEndian != null) {
215                 boolean isBE = isBigEndian.booleanValue();
216                 if (isBE) {
217                     return new UCSReader(inputStream, UCSReader.UCS4BE);
218                 } else {
219                     return new UCSReader(inputStream, UCSReader.UCS4LE);
220                 }
221             } else {
222                 err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
223                  encoding);
224             }
225         }
226         if (ENCODING.equals("ISO-10646-UCS-2")) {
227             if (isBigEndian != null) { // sould never happen with this encoding...
228                 boolean isBE = isBigEndian.booleanValue();
229                 if (isBE) {
230                     return new UCSReader(inputStream, UCSReader.UCS2BE);
231                 } else {
232                     return new UCSReader(inputStream, UCSReader.UCS2LE);
233                 }
234             } else {
235                 err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
236                  encoding);
237             }
238         }
239 
240         // check for valid name
241         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
242         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
243         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
244             err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
245             // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
246             //       because every byte is a valid ISO Latin 1 character.
247             //       It may not translate correctly but if we failed on
248             //       the encoding anyway, then we're expecting the content
249             //       of the document to be bad. This will just prevent an
250             //       invalid UTF-8 sequence to be detected. This is only
251             //       important when continue-after-fatal-error is turned
252             //       on. -Ac
253             encoding = "ISO-8859-1";
254         }
255 
256         // try to use a Java reader
257         String   javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
258         if (javaEncoding == null) {
259             if (fAllowJavaEncodings) {
260         javaEncoding = encoding;
261             } else {
262                 err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
263                 // see comment above.
264                 javaEncoding = "ISO8859_1";
265             }
266         }
267         return new InputStreamReader  (inputStream, javaEncoding);
268 
269     } // createReader(InputStream,String, Boolean): Reader
270 
271     // Adapted from:
272     // org.apache.xerces.impl.XMLEntityManager.getEncodingName
273     /**
274      * Returns the IANA encoding name that is auto-detected from
275      * the bytes specified, with the endian-ness of that encoding where
276      * appropriate.
277      *
278      * @param b4    The first four bytes of the input.
279      * @param count The number of bytes actually read.
280      * @return a 2-element array:  the first element, an IANA-encoding string,
281      *  the second element a Boolean which is true iff the document is big
282      *  endian, false if it's little-endian, and null if the distinction isn't
283      *  relevant.
284      */
285     private Object  [] getEncodingName(byte[] b4, int count) {
286 
287         if (count < 2) {
288             return new Object  []{"UTF-8", null, Boolean.FALSE};
289         }
290 
291         // UTF-16, with BOM
292         int b0 = b4[0] & 0xFF;
293         int b1 = b4[1] & 0xFF;
294         if (b0 == 0xFE && b1 == 0xFF) {
295             // UTF-16, big-endian
296             return new Object   [] {"UTF-16BE", Boolean.TRUE};
297         }
298         if (b0 == 0xFF && b1 == 0xFE) {
299             // UTF-16, little-endian
300             return new Object   [] {"UTF-16LE", Boolean.FALSE};
301         }
302 
303         // default to UTF-8 if we don't have enough bytes to make a
304         // good determination of the encoding
305         if (count < 3) {
306             return new Object   [] {"UTF-8", null, Boolean.FALSE};
307         }
308 
309         // UTF-8 with a BOM
310         int b2 = b4[2] & 0xFF;
311         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
312             return new Object   [] {"UTF-8", null};
313         }
314 
315         // default to UTF-8 if we don't have enough bytes to make a
316         // good determination of the encoding
317         if (count < 4) {
318             return new Object   [] {"UTF-8", null};
319         }
320 
321         // other encodings
322         int b3 = b4[3] & 0xFF;
323         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
324             // UCS-4, big endian (1234)
325             return new Object   [] {"ISO-10646-UCS-4", new Boolean  (true)};
326         }
327         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
328             // UCS-4, little endian (4321)
329             return new Object   [] {"ISO-10646-UCS-4", new Boolean  (false)};
330         }
331         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
332             // UCS-4, unusual octet order (2143)
333             // REVISIT: What should this be?
334             return new Object   [] {"ISO-10646-UCS-4", null};
335         }
336         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
337             // UCS-4, unusual octect order (3412)
338             // REVISIT: What should this be?
339             return new Object   [] {"ISO-10646-UCS-4", null};
340         }
341         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
342             // UTF-16, big-endian, no BOM
343             // (or could turn out to be UCS-2...
344             // REVISIT: What should this be?
345             return new Object   [] {"UTF-16BE", new Boolean  (true)};
346         }
347         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
348             // UTF-16, little-endian, no BOM
349             // (or could turn out to be UCS-2...
350             return new Object   [] {"UTF-16LE", new Boolean  (false)};
351         }
352         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
353             // EBCDIC
354             // a la xerces1, return CP037 instead of EBCDIC here
355             return new Object   [] {"CP037", null};
356         }
357 
358         // default encoding
359         return new Object   [] {"UTF-8", null, Boolean.FALSE};
360 
361     }
362 
363     // Adapted from:
364     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
365     /** Returns true if the current entity being scanned is external. */
366     public boolean isExternal() {
367     return true;
368     }
369 
370     // Adapted from:
371     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
372     /**
373      * Returns the next character on the input.
374      * <p>
375      * <strong>Note:</strong> The character is <em>not</em> consumed.
376      *
377      * @throws IOException  Thrown if i/o error occurs.
378      * @throws EOFException Thrown on end of file.
379      */
380     public int peekChar() throws IOException   {
381     
382     // load more characters, if needed
383     if (fCurrentEntity.position == fCurrentEntity.count) {
384         load(0, true);
385     }
386     
387     // peek at character
388     int c = fCurrentEntity.ch[fCurrentEntity.position];
389 
390     // return peeked character
391     if (fCurrentEntity.isExternal()) {
392         return c != '\r' ? c : '\n';
393     }
394     else {
395         return c;
396     }
397     
398     } // peekChar():int
399     
400     // Adapted from:
401     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
402     /**
403      * Returns the next character on the input.
404      * <p>
405      * <strong>Note:</strong> The character is consumed.
406      *
407      * @throws IOException  Thrown if i/o error occurs.
408      * @throws EOFException Thrown on end of file.
409      */
410     public int scanChar() throws IOException   {
411 
412     // load more characters, if needed
413     if (fCurrentEntity.position == fCurrentEntity.count) {
414         load(0, true);
415     }
416 
417     // scan character
418     int c = fCurrentEntity.ch[fCurrentEntity.position++];
419     boolean external = false;
420     if (c == '\n' ||
421         (c == '\r' && (external = fCurrentEntity.isExternal()))) {
422         fCurrentEntity.lineNumber++;
423         fCurrentEntity.columnNumber = 1;
424         if (fCurrentEntity.position == fCurrentEntity.count) {
425         fCurrentEntity.ch[0] = (char)c;
426         load(1, false);
427         }
428         if (c == '\r' && external) {
429         if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
430             fCurrentEntity.position--;
431         }
432         c = '\n';
433         }
434     }
435 
436     // return character that was scanned
437     fCurrentEntity.columnNumber++;
438     return c;
439     
440     }
441 
442     // Adapted from:
443     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
444     /**
445      * Returns a string matching the Name production appearing immediately
446      * on the input as a symbol, or null if no Name string is present.
447      * <p>
448      * <strong>Note:</strong> The Name characters are consumed.
449      * <p>
450      * <strong>Note:</strong> The string returned must be a symbol. The
451      * SymbolTable can be used for this purpose.
452      *
453      * @throws IOException  Thrown if i/o error occurs.
454      * @throws EOFException Thrown on end of file.
455      *
456      * @see SymbolTable
457      * @see XMLChar#isName
458      * @see XMLChar#isNameStart
459      */
460     public String   scanName() throws IOException   {
461     
462     // load more characters, if needed
463     if (fCurrentEntity.position == fCurrentEntity.count) {
464         load(0, true);
465     }
466     
467     // scan name
468     int offset = fCurrentEntity.position;
469     if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
470         if (++fCurrentEntity.position == fCurrentEntity.count) {
471         fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
472         offset = 0;
473         if (load(1, false)) {
474             fCurrentEntity.columnNumber++;
475             String   symbol = fSymbolTable.addSymbol(fCurrentEntity.ch,
476                                0, 1);
477             return symbol;
478         }
479         }
480         while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
481         if (++fCurrentEntity.position == fCurrentEntity.count) {
482             int length = fCurrentEntity.position - offset;
483             if (length == fBufferSize) {
484             // bad luck we have to resize our buffer
485             char[] tmp = new char[fBufferSize * 2];
486             System.arraycopy(fCurrentEntity.ch, offset,
487                      tmp, 0, length);
488             fCurrentEntity.ch = tmp;
489             fBufferSize *= 2;
490             } else {
491             System.arraycopy(fCurrentEntity.ch, offset,
492                      fCurrentEntity.ch, 0, length);
493             }
494             offset = 0;
495             if (load(length, false)) {
496             break;
497             }
498         }
499         }
500     }
501     int length = fCurrentEntity.position - offset;
502     fCurrentEntity.columnNumber += length;
503 
504     // return name
505     String   symbol = null;
506     if (length > 0) {
507         symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
508     }
509     return symbol;
510     
511     }
512 
513     // Adapted from:
514     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
515     /**
516      * Scans a range of attribute value data, setting the fields of the
517      * XMLString structure, appropriately.
518      * <p>
519      * <strong>Note:</strong> The characters are consumed.
520      * <p>
521      * <strong>Note:</strong> This method does not guarantee to return
522      * the longest run of attribute value data. This method may return
523      * before the quote character due to reaching the end of the input
524      * buffer or any other reason.
525      * <p>
526      * <strong>Note:</strong> The fields contained in the XMLString
527      * structure are not guaranteed to remain valid upon subsequent calls
528      * to the entity scanner. Therefore, the caller is responsible for
529      * immediately using the returned character data or making a copy of
530      * the character data.
531      *
532      * @param quote   The quote character that signifies the end of the
533      *                attribute value data.
534      * @param content The content structure to fill.
535      *
536      * @return Returns the next character on the input, if known. This
537      *         value may be -1 but this does <em>note</em> designate
538      *         end of file.
539      *
540      * @throws IOException  Thrown if i/o error occurs.
541      * @throws EOFException Thrown on end of file.
542      */
543     public int scanLiteral(int quote, XMLString content)
544     throws IOException   {
545 
546     // load more characters, if needed
547     if (fCurrentEntity.position == fCurrentEntity.count) {
548         load(0, true);
549     } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
550         fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
551         load(1, false);
552         fCurrentEntity.position = 0;
553     }
554 
555     // normalize newlines
556     int offset = fCurrentEntity.position;
557     int c = fCurrentEntity.ch[offset];
558     int newlines = 0;
559     boolean external = fCurrentEntity.isExternal();
560     if (c == '\n' || (c == '\r' && external)) {
561         do {
562         c = fCurrentEntity.ch[fCurrentEntity.position++];
563         if (c == '\r' && external) {
564             newlines++;
565             fCurrentEntity.lineNumber++;
566             fCurrentEntity.columnNumber = 1;
567             if (fCurrentEntity.position == fCurrentEntity.count) {
568             offset = 0;
569             fCurrentEntity.position = newlines;
570             if (load(newlines, false)) {
571                 break;
572             }
573             }
574             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
575             fCurrentEntity.position++;
576             offset++;
577             }
578             /*** NEWLINE NORMALIZATION ***/
579             else {
580             newlines++;
581             }
582             /***/
583         }
584         else if (c == '\n') {
585             newlines++;
586             fCurrentEntity.lineNumber++;
587             fCurrentEntity.columnNumber = 1;
588             if (fCurrentEntity.position == fCurrentEntity.count) {
589             offset = 0;
590             fCurrentEntity.position = newlines;
591             if (load(newlines, false)) {
592                 break;
593             }
594             }
595             /*** NEWLINE NORMALIZATION ***
596              if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
597              && external) {
598              fCurrentEntity.position++;
599              offset++;
600              }
601              /***/
602         }
603         else {
604             fCurrentEntity.position--;
605             break;
606         }
607         } while (fCurrentEntity.position < fCurrentEntity.count - 1);
608         for (int i = offset; i < fCurrentEntity.position; i++) {
609         fCurrentEntity.ch[i] = '\n';
610         }
611         int length = fCurrentEntity.position - offset;
612         if (fCurrentEntity.position == fCurrentEntity.count - 1) {
613         content.setValues(fCurrentEntity.ch, offset, length);
614         return -1;
615         }
616     }
617 
618     // scan literal value
619     while (fCurrentEntity.position < fCurrentEntity.count) {
620         c = fCurrentEntity.ch[fCurrentEntity.position++];
621         if ((c == quote &&
622          (!fCurrentEntity.literal || external))
623         || c == '%' || !XMLChar.isContent(c)) {
624         fCurrentEntity.position--;
625         break;
626         }
627     }
628     int length = fCurrentEntity.position - offset;
629     fCurrentEntity.columnNumber += length - newlines;
630     content.setValues(fCurrentEntity.ch, offset, length);
631 
632     // return next character
633     if (fCurrentEntity.position != fCurrentEntity.count) {
634         c = fCurrentEntity.ch[fCurrentEntity.position];
635         // NOTE: We don't want to accidentally signal the
636         //       end of the literal if we're expanding an
637         //       entity appearing in the literal. -Ac
638         if (c == quote && fCurrentEntity.literal) {
639         c = -1;
640         }
641     }
642     else {
643         c = -1;
644     }
645     return c;
646 
647     }
648 
649     /**
650      * Scans a range of character data up to the specified delimiter,
651      * setting the fields of the XMLString structure, appropriately.
652      * <p>
653      * <strong>Note:</strong> The characters are consumed.
654      * <p>
655      * <strong>Note:</strong> This assumes that the internal buffer is
656      * at least the same size, or bigger, than the length of the delimiter
657      * and that the delimiter contains at least one character.
658      * <p>
659      * <strong>Note:</strong> This method does not guarantee to return
660      * the longest run of character data. This method may return before
661      * the delimiter due to reaching the end of the input buffer or any
662      * other reason.
663      * <p>
664      * <strong>Note:</strong> The fields contained in the XMLString
665      * structure are not guaranteed to remain valid upon subsequent calls
666      * to the entity scanner. Therefore, the caller is responsible for
667      * immediately using the returned character data or making a copy of
668      * the character data.
669      *
670      * @param delimiter The string that signifies the end of the character
671      *                  data to be scanned.
672      * @param buffer    The data structure to fill.
673      *
674      * @return Returns true if there is more data to scan, false otherwise.
675      *
676      * @throws IOException  Thrown if i/o error occurs.
677      * @throws EOFException Thrown on end of file.
678      */
679     public boolean scanData(String   delimiter, XMLStringBuffer buffer)
680     throws IOException   {
681 
682     boolean done = false;
683     int delimLen = delimiter.length();
684     char charAt0 = delimiter.charAt(0);
685     boolean external = fCurrentEntity.isExternal();
686     do {
687     
688         // load more characters, if needed
689     
690         if (fCurrentEntity.position == fCurrentEntity.count) {
691         load(0, true);
692         }
693         else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
694         System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
695                  fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
696         load(fCurrentEntity.count - fCurrentEntity.position, false);
697         fCurrentEntity.position = 0;
698         } 
699         if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
700         // something must be wrong with the input: e.g., file ends an
701         // unterminated comment
702         int length = fCurrentEntity.count - fCurrentEntity.position;
703         buffer.append (fCurrentEntity.ch, fCurrentEntity.position,
704                    length); 
705         fCurrentEntity.columnNumber += fCurrentEntity.count;
706         fCurrentEntity.position = fCurrentEntity.count;
707         load(0,true);
708         return false;
709         }
710     
711         // normalize newlines
712         int offset = fCurrentEntity.position;
713         int c = fCurrentEntity.ch[offset];
714         int newlines = 0;
715         if (c == '\n' || (c == '\r' && external)) {
716         do {
717             c = fCurrentEntity.ch[fCurrentEntity.position++];
718             if (c == '\r' && external) {
719             newlines++;
720             fCurrentEntity.lineNumber++;
721             fCurrentEntity.columnNumber = 1;
722             if (fCurrentEntity.position == fCurrentEntity.count) {
723                 offset = 0;
724                 fCurrentEntity.position = newlines;
725                 if (load(newlines, false)) {
726                 break;
727                 }
728             }
729             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
730                 fCurrentEntity.position++;
731                 offset++;
732             }
733             /*** NEWLINE NORMALIZATION ***/
734             else {
735                 newlines++;
736             }
737             }
738             else if (c == '\n') {
739             newlines++;
740             fCurrentEntity.lineNumber++;
741             fCurrentEntity.columnNumber = 1;
742             if (fCurrentEntity.position == fCurrentEntity.count) {
743                 offset = 0;
744                 fCurrentEntity.position = newlines;
745                 fCurrentEntity.count = newlines;
746                 if (load(newlines, false)) {
747                 break;
748                 }
749             }
750             }
751             else {
752             fCurrentEntity.position--;
753             break;
754             }
755         } while (fCurrentEntity.position < fCurrentEntity.count - 1);
756         for (int i = offset; i < fCurrentEntity.position; i++) {
757             fCurrentEntity.ch[i] = '\n';
758         }
759         int length = fCurrentEntity.position - offset;
760         if (fCurrentEntity.position == fCurrentEntity.count - 1) {
761             buffer.append(fCurrentEntity.ch, offset, length);
762             return true;
763         }
764         }
765     
766         // iterate over buffer looking for delimiter
767     OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
768         c = fCurrentEntity.ch[fCurrentEntity.position++];
769         if (c == charAt0) {
770         // looks like we just hit the delimiter
771         int delimOffset = fCurrentEntity.position - 1;
772         for (int i = 1; i < delimLen; i++) {
773             if (fCurrentEntity.position == fCurrentEntity.count) {
774             fCurrentEntity.position -= i;
775             break OUTER;
776             }
777             c = fCurrentEntity.ch[fCurrentEntity.position++];
778             if (delimiter.charAt(i) != c) {
779             fCurrentEntity.position--;
780             break;
781             }
782         }
783         if (fCurrentEntity.position == delimOffset + delimLen) {
784             done = true;
785             break;
786         }
787         }
788         else if (c == '\n' || (external && c == '\r')) {
789         fCurrentEntity.position--;
790         break;
791         }
792         else if (XMLChar.isInvalid(c)) {
793         fCurrentEntity.position--;
794         int length = fCurrentEntity.position - offset;
795         fCurrentEntity.columnNumber += length - newlines;
796         buffer.append(fCurrentEntity.ch, offset, length); 
797         return true;
798         }
799     }
800         int length = fCurrentEntity.position - offset;
801         fCurrentEntity.columnNumber += length - newlines;
802         if (done) {
803         length -= delimLen;
804         }
805         buffer.append (fCurrentEntity.ch, offset, length);
806     
807         // return true if string was skipped
808     } while (!done);
809     return !done;
810 
811     }
812 
813     // Adapted from:
814     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
815     /**
816      * Skips a character appearing immediately on the input.
817      * <p>
818      * <strong>Note:</strong> The character is consumed only if it matches
819      * the specified character.
820      *
821      * @param c The character to skip.
822      *
823      * @return Returns true if the character was skipped.
824      *
825      * @throws IOException  Thrown if i/o error occurs.
826      * @throws EOFException Thrown on end of file.
827      */
828     public boolean skipChar(int c) throws IOException   {
829 
830     // load more characters, if needed
831     if (fCurrentEntity.position == fCurrentEntity.count) {
832         load(0, true);
833     }
834 
835     // skip character
836     int cc = fCurrentEntity.ch[fCurrentEntity.position];
837     if (cc == c) {
838         fCurrentEntity.position++;
839         if (c == '\n') {
840         fCurrentEntity.lineNumber++;
841         fCurrentEntity.columnNumber = 1;
842         }
843         else {
844         fCurrentEntity.columnNumber++;
845         }
846         return true;
847     } else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
848         // handle newlines
849         if (fCurrentEntity.position == fCurrentEntity.count) {
850         fCurrentEntity.ch[0] = (char)cc;
851         load(1, false);
852         }
853         fCurrentEntity.position++;
854         if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
855         fCurrentEntity.position++;
856         }
857         fCurrentEntity.lineNumber++;
858         fCurrentEntity.columnNumber = 1;
859         return true;
860     }
861 
862     // character was not skipped
863     return false;
864 
865     }
866 
867     // Adapted from:
868     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
869     /**
870      * Skips space characters appearing immediately on the input.
871      * <p>
872      * <strong>Note:</strong> The characters are consumed only if they are
873      * space characters.
874      *
875      * @return Returns true if at least one space character was skipped.
876      *
877      * @throws IOException  Thrown if i/o error occurs.
878      * @throws EOFException Thrown on end of file.
879      *
880      * @see XMLChar#isSpace
881      */
882     public boolean skipSpaces() throws IOException   {
883 
884     // load more characters, if needed
885     if (fCurrentEntity.position == fCurrentEntity.count) {
886         load(0, true);
887     }
888 
889     // skip spaces
890     int c = fCurrentEntity.ch[fCurrentEntity.position];
891     if (XMLChar.isSpace(c)) {
892         boolean external = fCurrentEntity.isExternal();
893         do {
894         boolean entityChanged = false;
895         // handle newlines
896         if (c == '\n' || (external && c == '\r')) {
897             fCurrentEntity.lineNumber++;
898             fCurrentEntity.columnNumber = 1;
899             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
900             fCurrentEntity.ch[0] = (char)c;
901             entityChanged = load(1, true);
902             if (!entityChanged)
903                                 // the load change the position to be 1,
904                                 // need to restore it when entity not changed
905                 fCurrentEntity.position = 0;
906             }
907             if (c == '\r' && external) {
908             // REVISIT: Does this need to be updated to fix the
909             //          #x0D ^#x0A newline normalization problem? -Ac
910             if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
911                 fCurrentEntity.position--;
912             }
913             }
914             /*** NEWLINE NORMALIZATION ***
915              else {
916              if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
917              && external) {
918              fCurrentEntity.position++;
919              }
920              }
921              /***/
922         }
923         else {
924             fCurrentEntity.columnNumber++;
925         }
926         // load more characters, if needed
927         if (!entityChanged)
928             fCurrentEntity.position++;
929         if (fCurrentEntity.position == fCurrentEntity.count) {
930             load(0, true);
931         }
932         } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
933         return true;
934     }
935 
936     // no spaces were found
937     return false;
938 
939     }
940 
941     /**
942      * Skips the specified string appearing immediately on the input.
943      * <p>
944      * <strong>Note:</strong> The characters are consumed only if they are
945      * space characters.
946      *
947      * @param s The string to skip.
948      *
949      * @return Returns true if the string was skipped.
950      *
951      * @throws IOException  Thrown if i/o error occurs.
952      * @throws EOFException Thrown on end of file.
953      */
954     public boolean skipString(String   s) throws IOException   {
955 
956     // load more characters, if needed
957     if (fCurrentEntity.position == fCurrentEntity.count) {
958         load(0, true);
959     }
960 
961     // skip string
962     final int length = s.length();
963     for (int i = 0; i < length; i++) {
964         char c = fCurrentEntity.ch[fCurrentEntity.position++];
965         if (c != s.charAt(i)) {
966         fCurrentEntity.position -= i + 1;
967         return false;
968         }
969         if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
970         System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
971         // REVISIT: Can a string to be skipped cross an
972         //          entity boundary? -Ac
973         if (load(i + 1, false)) {
974             fCurrentEntity.position -= i + 1;
975             return false;
976         }
977         }
978     }
979     fCurrentEntity.columnNumber += length;
980     return true;
981 
982     }
983 
984     // Adapted from:
985     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
986     /**
987      * Loads a chunk of text.
988      *
989      * @param offset       The offset into the character buffer to
990      *                     read the next batch of characters.
991      * @param changeEntity True if the load should change entities
992      *                     at the end of the entity, otherwise leave
993      *                     the current entity in place and the entity
994      *                     boundary will be signaled by the return
995      *                     value.
996      *
997      * @returns Returns true if the entity changed as a result of this
998      *          load operation.
999      */
1000    final boolean load(int offset, boolean changeEntity)
1001    throws IOException   {
1002
1003    // read characters
1004    int length = fCurrentEntity.mayReadChunks?
1005        (fCurrentEntity.ch.length - offset):
1006        (DEFAULT_XMLDECL_BUFFER_SIZE);
1007    int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset,
1008                           length);
1009
1010    // reset count and position
1011    boolean entityChanged = false;
1012    if (count != -1) {
1013        if (count != 0) {
1014        fCurrentEntity.count = count + offset;
1015        fCurrentEntity.position = offset;
1016        }
1017    }
1018
1019    // end of this entity
1020    else {
1021        fCurrentEntity.count = offset;
1022        fCurrentEntity.position = offset;
1023        entityChanged = true;
1024        if (changeEntity) {
1025        endEntity();
1026        if (fCurrentEntity == null) {
1027            throw new EOFException  ();
1028        }
1029        // handle the trailing edges
1030        if (fCurrentEntity.position == fCurrentEntity.count) {
1031            load(0, false);
1032        }
1033        }
1034    }
1035
1036    return entityChanged;
1037
1038    }
1039
1040    // Adapted from:
1041    // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
1042    /**
1043     * This class wraps the byte inputstreams we're presented with.
1044     * We need it because java.io.InputStreams don't provide
1045     * functionality to reread processed bytes, and they have a habit
1046     * of reading more than one character when you call their read()
1047     * methods.  This means that, once we discover the true (declared)
1048     * encoding of a document, we can neither backtrack to read the
1049     * whole doc again nor start reading where we are with a new
1050     * reader.
1051     *
1052     * This class allows rewinding an inputStream by allowing a mark
1053     * to be set, and the stream reset to that position.  <strong>The
1054     * class assumes that it needs to read one character per
1055     * invocation when it's read() method is inovked, but uses the
1056     * underlying InputStream's read(char[], offset length) method--it
1057     * won't buffer data read this way!</strong>
1058     *
1059     * @author Neil Graham, IBM
1060     * @author Glenn Marcy, IBM
1061     */
1062    private final class RewindableInputStream extends InputStream   {
1063
1064        private InputStream   fInputStream;
1065        private byte[] fData;
1066        private int fStartOffset;
1067        private int fEndOffset;
1068        private int fOffset;
1069        private int fLength;
1070        private int fMark;
1071
1072        public RewindableInputStream(InputStream   is) {
1073            fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
1074            fInputStream = is;
1075            fStartOffset = 0;
1076            fEndOffset = -1;
1077            fOffset = 0;
1078            fLength = 0;
1079            fMark = 0;
1080        }
1081
1082        public void setStartOffset(int offset) {
1083            fStartOffset = offset;
1084        }
1085
1086        public void rewind() {
1087            fOffset = fStartOffset;
1088        }
1089
1090        public int read() throws IOException   {
1091            int b = 0;
1092            if (fOffset < fLength) {
1093                return fData[fOffset++] & 0xff;
1094            }
1095            if (fOffset == fEndOffset) {
1096                return -1;
1097            }
1098            if (fOffset == fData.length) {
1099                byte[] newData = new byte[fOffset << 1];
1100                System.arraycopy(fData, 0, newData, 0, fOffset);
1101                fData = newData;
1102            }
1103            b = fInputStream.read();
1104            if (b == -1) {
1105                fEndOffset = fOffset;
1106                return -1;
1107            }
1108            fData[fLength++] = (byte)b;
1109            fOffset++;
1110            return b & 0xff;
1111        }
1112
1113        public int read(byte[] b, int off, int len) throws IOException   {
1114            int bytesLeft = fLength - fOffset;
1115            if (bytesLeft == 0) {
1116                if (fOffset == fEndOffset) {
1117                    return -1;
1118                }
1119                // better get some more for the voracious reader...
1120                if (fCurrentEntity.mayReadChunks) {
1121                    return fInputStream.read(b, off, len);
1122                }
1123                int returnedVal = read();
1124                if (returnedVal == -1) {
1125                    fEndOffset = fOffset;
1126                    return -1;
1127                }
1128                b[off] = (byte)returnedVal;
1129                return 1;
1130            }
1131            if (len < bytesLeft) {
1132                if (len <= 0) {
1133                    return 0;
1134                }
1135            }
1136            else {
1137                len = bytesLeft;
1138            }
1139            if (b != null) {
1140                System.arraycopy(fData, fOffset, b, off, len);
1141            }
1142            fOffset += len;
1143            return len;
1144        }
1145
1146        public long skip(long n)
1147            throws IOException  
1148        {
1149            int bytesLeft;
1150            if (n <= 0) {
1151                return 0;
1152            }
1153            bytesLeft = fLength - fOffset;
1154            if (bytesLeft == 0) {
1155                if (fOffset == fEndOffset) {
1156                    return 0;
1157                }
1158                return fInputStream.skip(n);
1159            }
1160            if (n <= bytesLeft) {
1161                fOffset += n;
1162                return n;
1163            }
1164            fOffset += bytesLeft;
1165            if (fOffset == fEndOffset) {
1166                return bytesLeft;
1167            }
1168            n -= bytesLeft;
1169        /*
1170         * In a manner of speaking, when this class isn't permitting more
1171         * than one byte at a time to be read, it is "blocking".  The
1172         * available() method should indicate how much can be read without
1173         * blocking, so while we're in this mode, it should only indicate
1174         * that bytes in its buffer are available; otherwise, the result of
1175         * available() on the underlying InputStream is appropriate.
1176         */
1177            return fInputStream.skip(n) + bytesLeft;
1178        }
1179
1180        public int available() throws IOException   {
1181            int bytesLeft = fLength - fOffset;
1182            if (bytesLeft == 0) {
1183                if (fOffset == fEndOffset) {
1184                    return -1;
1185                }
1186                return fCurrentEntity.mayReadChunks ? fInputStream.available()
1187            : 0;
1188            }
1189            return bytesLeft;
1190        }
1191
1192        public void mark(int howMuch) {
1193            fMark = fOffset;
1194        }
1195
1196        public void reset() {
1197            fOffset = fMark;
1198        }
1199
1200        public boolean markSupported() {
1201            return true;
1202        }
1203
1204        public void close() throws IOException   {
1205            if (fInputStream != null) {
1206                fInputStream.close();
1207                fInputStream = null;
1208            }
1209        }
1210    } // end of RewindableInputStream class
1211
1212    // Adapted from:
1213    // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
1214    private void scanXMLDecl() throws IOException  , JasperException {
1215
1216    if (skipString("<?xml")) {
1217        fMarkupDepth++;
1218        // NOTE: special case where document starts with a PI
1219        //       whose name starts with "xml" (e.g. "xmlfoo")
1220        if (XMLChar.isName(peekChar())) {
1221        fStringBuffer.clear();
1222        fStringBuffer.append("xml");
1223        while (XMLChar.isName(peekChar())) {
1224            fStringBuffer.append((char)scanChar());
1225        }
1226        String   target = fSymbolTable.addSymbol(fStringBuffer.ch,
1227                               fStringBuffer.offset,
1228                               fStringBuffer.length);
1229        scanPIData(target, fString);
1230        }
1231
1232        // standard XML declaration
1233        else {
1234        scanXMLDeclOrTextDecl(false);
1235        }
1236    }
1237    }
1238    
1239    // Adapted from:
1240    // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
1241    /**
1242     * Scans an XML or text declaration.
1243     * <p>
1244     * <pre>
1245     * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1246     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1247     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1248     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1249     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1250     *                 | ('"' ('yes' | 'no') '"'))
1251     *
1252     * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
1253     * </pre>
1254     *
1255     * @param scanningTextDecl True if a text declaration is to
1256     *                         be scanned instead of an XML
1257     *                         declaration.
1258     */
1259    private void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 
1260        throws IOException  , JasperException {
1261
1262        // scan decl
1263        scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
1264        fMarkupDepth--;
1265
1266        // pseudo-attribute values
1267        String   encodingPseudoAttr = fStrings[1];
1268
1269        // set encoding on reader
1270        if (encodingPseudoAttr != null) {
1271            isEncodingSetInProlog = true;
1272        encoding = encodingPseudoAttr;
1273        }
1274    }
1275
1276    // Adapted from:
1277    // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
1278    /**
1279     * Scans an XML or text declaration.
1280     * <p>
1281     * <pre>
1282     * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1283     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1284     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1285     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1286     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1287     *                 | ('"' ('yes' | 'no') '"'))
1288     *
1289     * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1290     * </pre>
1291     *
1292     * @param scanningTextDecl True if a text declaration is to
1293     *                         be scanned instead of an XML
1294     *                         declaration.
1295     * @param pseudoAttributeValues An array of size 3 to return the version,
1296     *                         encoding and standalone pseudo attribute values
1297     *                         (in that order).
1298     *
1299     * <strong>Note:</strong> This method uses fString, anything in it
1300     * at the time of calling is lost.
1301     */
1302    private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
1303                       String  [] pseudoAttributeValues) 
1304                throws IOException  , JasperException {
1305
1306        // pseudo-attribute values
1307        String   version = null;
1308        String   encoding = null;
1309        String   standalone = null;
1310
1311        // scan pseudo-attributes
1312        final int STATE_VERSION = 0;
1313        final int STATE_ENCODING = 1;
1314        final int STATE_STANDALONE = 2;
1315        final int STATE_DONE = 3;
1316        int state = STATE_VERSION;
1317
1318        boolean dataFoundForTarget = false;
1319        boolean sawSpace = skipSpaces();
1320        while (peekChar() != '?') {
1321            dataFoundForTarget = true;
1322            String   name = scanPseudoAttribute(scanningTextDecl, fString);
1323            switch (state) {
1324                case STATE_VERSION: {
1325                    if (name == fVersionSymbol) {
1326                        if (!sawSpace) {
1327                            reportFatalError(scanningTextDecl
1328                                       ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
1329                                       : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
1330                                             null);
1331                        }
1332                        version = fString.toString();
1333                        state = STATE_ENCODING;
1334                        if (!version.equals("1.0")) {
1335                            // REVISIT: XML REC says we should throw an error
1336                // in such cases.
1337                            // some may object the throwing of fatalError.
1338                            err.jspError("jsp.error.xml.versionNotSupported",
1339                     version);
1340                        }
1341                    } else if (name == fEncodingSymbol) {
1342                        if (!scanningTextDecl) {
1343                            err.jspError("jsp.error.xml.versionInfoRequired");
1344                        }
1345                        if (!sawSpace) {
1346                            reportFatalError(scanningTextDecl
1347                                      ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1348                                      : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1349                                             null);
1350                        }
1351                        encoding = fString.toString();
1352                        state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
1353                    } else {
1354                        if (scanningTextDecl) {
1355                            err.jspError("jsp.error.xml.encodingDeclRequired");
1356                        }
1357                        else {
1358                            err.jspError("jsp.error.xml.versionInfoRequired");
1359                        }
1360                    }
1361                    break;
1362                }
1363                case STATE_ENCODING: {
1364                    if (name == fEncodingSymbol) {
1365                        if (!sawSpace) {
1366                            reportFatalError(scanningTextDecl
1367                                      ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1368                                      : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1369                                             null);
1370                        }
1371                        encoding = fString.toString();
1372                        state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
1373                        // TODO: check encoding name; set encoding on
1374                        //       entity scanner
1375                    } else if (!scanningTextDecl && name == fStandaloneSymbol) {
1376                        if (!sawSpace) {
1377                            err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1378                        }
1379                        standalone = fString.toString();
1380                        state = STATE_DONE;
1381                        if (!standalone.equals("yes") && !standalone.equals("no")) {
1382                            err.jspError("jsp.error.xml.sdDeclInvalid");
1383                        }
1384                    } else {
1385                        err.jspError("jsp.error.xml.encodingDeclRequired");
1386                    }
1387                    break;
1388                }
1389                case STATE_STANDALONE: {
1390                    if (name == fStandaloneSymbol) {
1391                        if (!sawSpace) {
1392                            err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1393                        }
1394                        standalone = fString.toString();
1395                        state = STATE_DONE;
1396                        if (!standalone.equals("yes") && !standalone.equals("no")) {
1397                            err.jspError("jsp.error.xml.sdDeclInvalid");
1398                        }
1399                    } else {
1400            err.jspError("jsp.error.xml.encodingDeclRequired");
1401                    }
1402                    break;
1403                }
1404                default: {
1405                    err.jspError("jsp.error.xml.noMorePseudoAttributes");
1406                }
1407            }
1408            sawSpace = skipSpaces();
1409        }
1410        // REVISIT: should we remove this error reporting?
1411        if (scanningTextDecl && state != STATE_DONE) {
1412            err.jspError("jsp.error.xml.morePseudoAttributes");
1413        }
1414        
1415        // If there is no data in the xml or text decl then we fail to report
1416    // error for version or encoding info above.
1417        if (scanningTextDecl) {
1418            if (!dataFoundForTarget && encoding == null) {
1419                err.jspError("jsp.error.xml.encodingDeclRequired");
1420            }
1421        } else {
1422            if (!dataFoundForTarget && version == null) {
1423                err.jspError("jsp.error.xml.versionInfoRequired");
1424            }
1425        }
1426
1427        // end
1428        if (!skipChar('?')) {
1429            err.jspError("jsp.error.xml.xmlDeclUnterminated");
1430        }
1431        if (!skipChar('>')) {
1432            err.jspError("jsp.error.xml.xmlDeclUnterminated");
1433
1434        }
1435        
1436        // fill in return array
1437        pseudoAttributeValues[0] = version;
1438        pseudoAttributeValues[1] = encoding;
1439        pseudoAttributeValues[2] = standalone;
1440    }
1441
1442    // Adapted from:
1443    // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
1444    /**
1445     * Scans a pseudo attribute.
1446     *
1447     * @param scanningTextDecl True if scanning this pseudo-attribute for a
1448     *                         TextDecl; false if scanning XMLDecl. This 
1449     *                         flag is needed to report the correct type of
1450     *                         error.
1451     * @param value            The string to fill in with the attribute 
1452     *                         value.
1453     *
1454     * @return The name of the attribute
1455     *
1456     * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
1457     * at the time of calling is lost.
1458     */
1459    public String   scanPseudoAttribute(boolean scanningTextDecl, 
1460                                      XMLString value) 
1461                throws IOException  , JasperException {
1462
1463        String   name = scanName();
1464        if (name == null) {
1465            err.jspError("jsp.error.xml.pseudoAttrNameExpected");
1466        }
1467        skipSpaces();
1468        if (!skipChar('=')) {
1469            reportFatalError(scanningTextDecl ?
1470                 "jsp.error.xml.eqRequiredInTextDecl"
1471                             : "jsp.error.xml.eqRequiredInXMLDecl",
1472                 name);
1473        }
1474        skipSpaces();
1475        int quote = peekChar();
1476        if (quote != '\'' && quote != '"') {
1477            reportFatalError(scanningTextDecl ?
1478                 "jsp.error.xml.quoteRequiredInTextDecl"
1479                             : "jsp.error.xml.quoteRequiredInXMLDecl" ,
1480                 name);
1481        }
1482        scanChar();
1483        int c = scanLiteral(quote, value);
1484        if (c != quote) {
1485            fStringBuffer2.clear();
1486            do {
1487                fStringBuffer2.append(value);
1488                if (c != -1) {
1489                    if (c == '&' || c == '%' || c == '<' || c == ']') {
1490                        fStringBuffer2.append((char)scanChar());
1491                    }
1492                    else if (XMLChar.isHighSurrogate(c)) {
1493                        scanSurrogates(fStringBuffer2);
1494                    }
1495                    else if (XMLChar.isInvalid(c)) {
1496                        String   key = scanningTextDecl
1497                            ? "jsp.error.xml.invalidCharInTextDecl"
1498                : "jsp.error.xml.invalidCharInXMLDecl";
1499                        reportFatalError(key, Integer.toString(c, 16));
1500                        scanChar();
1501                    }
1502                }
1503                c = scanLiteral(quote, value);
1504            } while (c != quote);
1505            fStringBuffer2.append(value);
1506            value.setValues(fStringBuffer2);
1507        }
1508        if (!skipChar(quote)) {
1509            reportFatalError(scanningTextDecl ?
1510                 "jsp.error.xml.closeQuoteMissingInTextDecl"
1511                             : "jsp.error.xml.closeQuoteMissingInXMLDecl",
1512                 name);
1513        }
1514
1515        // return
1516        return name;
1517
1518    }
1519    
1520    // Adapted from:
1521    // org.apache.xerces.impl.XMLScanner.scanPIData
1522    /**
1523     * Scans a processing data. This is needed to handle the situation
1524     * where a document starts with a processing instruction whose 
1525     * target name <em>starts with</em> "xml". (e.g. xmlfoo)
1526     *
1527     * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1528     * at the time of calling is lost.
1529     *
1530     * @param target The PI target
1531     * @param data The string to fill in with the data
1532     */
1533    private void scanPIData(String   target, XMLString data) 
1534        throws IOException  , JasperException {
1535
1536        // check target
1537        if (target.length() == 3) {
1538            char c0 = Character.toLowerCase(target.charAt(0));
1539            char c1 = Character.toLowerCase(target.charAt(1));
1540            char c2 = Character.toLowerCase(target.charAt(2));
1541            if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
1542                err.jspError("jsp.error.xml.reservedPITarget");
1543            }
1544        }
1545
1546        // spaces
1547        if (!skipSpaces()) {
1548            if (skipString("?>")) {
1549                // we found the end, there is no data
1550                data.clear();
1551                return;
1552            }
1553            else {
1554                // if there is data there should be some space
1555                err.jspError("jsp.error.xml.spaceRequiredInPI");
1556            }
1557        }
1558
1559        fStringBuffer.clear();
1560        // data
1561        if (scanData("?>", fStringBuffer)) {
1562            do {
1563                int c = peekChar();
1564                if (c != -1) {
1565                    if (XMLChar.isHighSurrogate(c)) {
1566                        scanSurrogates(fStringBuffer);
1567                    } else if (XMLChar.isInvalid(c)) {
1568                        err.jspError("jsp.error.xml.invalidCharInPI",
1569                     Integer.toHexString(c));
1570                        scanChar();
1571                    }
1572                }
1573            } while (scanData("?>", fStringBuffer));
1574        }
1575        data.setValues(fStringBuffer);
1576
1577    }
1578
1579    // Adapted from:
1580    // org.apache.xerces.impl.XMLScanner.scanSurrogates
1581    /**
1582     * Scans surrogates and append them to the specified buffer.
1583     * <p>
1584     * <strong>Note:</strong> This assumes the current char has already been
1585     * identified as a high surrogate.
1586     *
1587     * @param buf The StringBuffer to append the read surrogates to.
1588     * @returns True if it succeeded.
1589     */
1590    private boolean scanSurrogates(XMLStringBuffer buf)
1591        throws IOException  , JasperException {
1592
1593        int high = scanChar();
1594        int low = peekChar();
1595        if (!XMLChar.isLowSurrogate(low)) {
1596            err.jspError("jsp.error.xml.invalidCharInContent",
1597             Integer.toString(high, 16));
1598            return false;
1599        }
1600        scanChar();
1601
1602        // convert surrogates to supplemental character
1603        int c = XMLChar.supplemental((char)high, (char)low);
1604
1605        // supplemental character must be a valid XML character
1606        if (!XMLChar.isValid(c)) {
1607            err.jspError("jsp.error.xml.invalidCharInContent",
1608             Integer.toString(c, 16)); 
1609            return false;
1610        }
1611
1612        // fill in the buffer
1613        buf.append((char)high);
1614        buf.append((char)low);
1615
1616        return true;
1617
1618    }
1619
1620    // Adapted from:
1621    // org.apache.xerces.impl.XMLScanner.reportFatalError
1622    /**
1623     * Convenience function used in all XML scanners.
1624     */
1625    private void reportFatalError(String   msgId, String   arg)
1626                throws JasperException {
1627        err.jspError(msgId, arg);
1628    }
1629
1630}
1631
1632
1633
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags