KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > jasper > xmlparser > XMLEncodingDetector


1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  * ====================================================================
17  *
18  * This software consists of voluntary contributions made by many
19  * individuals on behalf of the Apache Software Foundation and was
20  * originally based on software copyright (c) 1999, International
21  * Business Machines, Inc., http://www.apache.org. For more
22  * information on the Apache Software Foundation, please see
23  * <http://www.apache.org/>.
24  */

25
26 package org.apache.jasper.xmlparser;
27
28 import java.io.EOFException JavaDoc;
29 import java.io.InputStream JavaDoc;
30 import java.io.InputStreamReader JavaDoc;
31 import java.io.IOException JavaDoc;
32 import java.io.Reader JavaDoc;
33 import java.util.Locale JavaDoc;
34 import java.util.jar.JarFile JavaDoc;
35
36 import org.apache.jasper.JasperException;
37 import org.apache.jasper.JspCompilationContext;
38 import org.apache.jasper.compiler.ErrorDispatcher;
39 import org.apache.jasper.compiler.JspUtil;
40
41 public class XMLEncodingDetector {
42     
43     private InputStream JavaDoc stream;
44     private String JavaDoc encoding;
45     private boolean isEncodingSetInProlog;
46     private boolean isBomPresent;
47     private Boolean JavaDoc isBigEndian;
48     private Reader JavaDoc reader;
49     
50     // org.apache.xerces.impl.XMLEntityManager fields
51
public static final int DEFAULT_BUFFER_SIZE = 2048;
52     public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
53     private boolean fAllowJavaEncodings;
54     private SymbolTable fSymbolTable;
55     private XMLEncodingDetector fCurrentEntity;
56     private int fBufferSize = DEFAULT_BUFFER_SIZE;
57     
58     // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
59
private int lineNumber = 1;
60     private int columnNumber = 1;
61     private boolean literal;
62     private char[] ch = new char[DEFAULT_BUFFER_SIZE];
63     private int position;
64     private int count;
65     private boolean mayReadChunks = false;
66     
67     // org.apache.xerces.impl.XMLScanner fields
68
private XMLString fString = new XMLString();
69     private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
70     private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
71     private final static String JavaDoc fVersionSymbol = "version";
72     private final static String JavaDoc fEncodingSymbol = "encoding";
73     private final static String JavaDoc fStandaloneSymbol = "standalone";
74     
75     // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
76
private int fMarkupDepth = 0;
77     private String JavaDoc[] fStrings = new String JavaDoc[3];
78
79     private ErrorDispatcher err;
80
81     /**
82      * Constructor
83      */

84     public XMLEncodingDetector() {
85         fSymbolTable = new SymbolTable();
86         fCurrentEntity = this;
87     }
88
89     /**
90      * Autodetects the encoding of the XML document supplied by the given
91      * input stream.
92      *
93      * Encoding autodetection is done according to the XML 1.0 specification,
94      * Appendix F.1: Detection Without External Encoding Information.
95      *
96      * @return Two-element array, where the first element (of type
97      * java.lang.String) contains the name of the (auto)detected encoding, and
98      * the second element (of type java.lang.Boolean) specifies whether the
99      * encoding was specified using the 'encoding' attribute of an XML prolog
100      * (TRUE) or autodetected (FALSE).
101      */

102     public static Object JavaDoc[] getEncoding(String JavaDoc fname, JarFile JavaDoc jarFile,
103                                        JspCompilationContext ctxt,
104                                        ErrorDispatcher err)
105         throws IOException JavaDoc, JasperException
106     {
107         InputStream JavaDoc inStream = JspUtil.getInputStream(fname, jarFile, ctxt,
108                                                       err);
109         XMLEncodingDetector detector = new XMLEncodingDetector();
110         Object JavaDoc[] ret = detector.getEncoding(inStream, err);
111         inStream.close();
112
113         return ret;
114     }
115
116     private Object JavaDoc[] getEncoding(InputStream JavaDoc in, ErrorDispatcher err)
117         throws IOException JavaDoc, JasperException
118     {
119         this.stream = in;
120         this.err=err;
121         createInitialReader();
122         scanXMLDecl();
123     
124         return new Object JavaDoc[] { this.encoding,
125                               new Boolean JavaDoc(this.isEncodingSetInProlog),
126                               new Boolean JavaDoc(this.isBomPresent) };
127     }
128     
129     // stub method
130
void endEntity() {
131     }
132     
133     // Adapted from:
134
// org.apache.xerces.impl.XMLEntityManager.startEntity()
135
private void createInitialReader() throws IOException JavaDoc, JasperException {
136
137     // wrap this stream in RewindableInputStream
138
stream = new RewindableInputStream(stream);
139
140     // perform auto-detect of encoding if necessary
141
if (encoding == null) {
142         // read first four bytes and determine encoding
143
final byte[] b4 = new byte[4];
144         int count = 0;
145         for (; count<4; count++ ) {
146         b4[count] = (byte)stream.read();
147         }
148         if (count == 4) {
149         Object JavaDoc [] encodingDesc = getEncodingName(b4, count);
150         encoding = (String JavaDoc)(encodingDesc[0]);
151         isBigEndian = (Boolean JavaDoc)(encodingDesc[1]);
152         if (encodingDesc.length > 2) {
153             isBomPresent = (Boolean JavaDoc)(encodingDesc[2]);
154         } else {
155             isBomPresent = true;
156         }
157
158         stream.reset();
159         // Special case UTF-8 files with BOM created by Microsoft
160
// tools. It's more efficient to consume the BOM than make
161
// the reader perform extra checks. -Ac
162
if (count > 2 && encoding.equals("UTF-8")) {
163             int b0 = b4[0] & 0xFF;
164             int b1 = b4[1] & 0xFF;
165             int b2 = b4[2] & 0xFF;
166             if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
167             // ignore first three bytes...
168
stream.skip(3);
169             }
170         }
171         reader = createReader(stream, encoding, isBigEndian);
172         } else {
173         reader = createReader(stream, encoding, isBigEndian);
174         }
175     }
176     }
177
178     // Adapted from:
179
// org.apache.xerces.impl.XMLEntityManager.createReader
180
/**
181      * Creates a reader capable of reading the given input stream in
182      * the specified encoding.
183      *
184      * @param inputStream The input stream.
185      * @param encoding The encoding name that the input stream is
186      * encoded using. If the user has specified that
187      * Java encoding names are allowed, then the
188      * encoding name may be a Java encoding name;
189      * otherwise, it is an ianaEncoding name.
190      * @param isBigEndian For encodings (like uCS-4), whose names cannot
191      * specify a byte order, this tells whether the order
192      * is bigEndian. null means unknown or not relevant.
193      *
194      * @return Returns a reader.
195      */

196     private Reader JavaDoc createReader(InputStream JavaDoc inputStream, String JavaDoc encoding,
197                 Boolean JavaDoc isBigEndian)
198                 throws IOException JavaDoc, JasperException {
199
200         // normalize encoding name
201
if (encoding == null) {
202             encoding = "UTF-8";
203         }
204
205         // try to use an optimized reader
206
String JavaDoc ENCODING = encoding.toUpperCase(Locale.ENGLISH);
207         if (ENCODING.equals("UTF-8")) {
208             return new UTF8Reader(inputStream, fBufferSize);
209         }
210         if (ENCODING.equals("US-ASCII")) {
211             return new ASCIIReader(inputStream, fBufferSize);
212         }
213         if (ENCODING.equals("ISO-10646-UCS-4")) {
214             if (isBigEndian != null) {
215                 boolean isBE = isBigEndian.booleanValue();
216                 if (isBE) {
217                     return new UCSReader(inputStream, UCSReader.UCS4BE);
218                 } else {
219                     return new UCSReader(inputStream, UCSReader.UCS4LE);
220                 }
221             } else {
222                 err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
223                  encoding);
224             }
225         }
226         if (ENCODING.equals("ISO-10646-UCS-2")) {
227             if (isBigEndian != null) { // sould never happen with this encoding...
228
boolean isBE = isBigEndian.booleanValue();
229                 if (isBE) {
230                     return new UCSReader(inputStream, UCSReader.UCS2BE);
231                 } else {
232                     return new UCSReader(inputStream, UCSReader.UCS2LE);
233                 }
234             } else {
235                 err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
236                  encoding);
237             }
238         }
239
240         // check for valid name
241
boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
242         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
243         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
244             err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
245             // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
246
// because every byte is a valid ISO Latin 1 character.
247
// It may not translate correctly but if we failed on
248
// the encoding anyway, then we're expecting the content
249
// of the document to be bad. This will just prevent an
250
// invalid UTF-8 sequence to be detected. This is only
251
// important when continue-after-fatal-error is turned
252
// on. -Ac
253
encoding = "ISO-8859-1";
254         }
255
256         // try to use a Java reader
257
String JavaDoc javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
258         if (javaEncoding == null) {
259             if (fAllowJavaEncodings) {
260         javaEncoding = encoding;
261             } else {
262                 err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
263                 // see comment above.
264
javaEncoding = "ISO8859_1";
265             }
266         }
267         return new InputStreamReader JavaDoc(inputStream, javaEncoding);
268
269     } // createReader(InputStream,String, Boolean): Reader
270

271     // Adapted from:
272
// org.apache.xerces.impl.XMLEntityManager.getEncodingName
273
/**
274      * Returns the IANA encoding name that is auto-detected from
275      * the bytes specified, with the endian-ness of that encoding where
276      * appropriate.
277      *
278      * @param b4 The first four bytes of the input.
279      * @param count The number of bytes actually read.
280      * @return a 2-element array: the first element, an IANA-encoding string,
281      * the second element a Boolean which is true iff the document is big
282      * endian, false if it's little-endian, and null if the distinction isn't
283      * relevant.
284      */

285     private Object JavaDoc[] getEncodingName(byte[] b4, int count) {
286
287         if (count < 2) {
288             return new Object JavaDoc[]{"UTF-8", null, Boolean.FALSE};
289         }
290
291         // UTF-16, with BOM
292
int b0 = b4[0] & 0xFF;
293         int b1 = b4[1] & 0xFF;
294         if (b0 == 0xFE && b1 == 0xFF) {
295             // UTF-16, big-endian
296
return new Object JavaDoc [] {"UTF-16BE", Boolean.TRUE};
297         }
298         if (b0 == 0xFF && b1 == 0xFE) {
299             // UTF-16, little-endian
300
return new Object JavaDoc [] {"UTF-16LE", Boolean.FALSE};
301         }
302
303         // default to UTF-8 if we don't have enough bytes to make a
304
// good determination of the encoding
305
if (count < 3) {
306             return new Object JavaDoc [] {"UTF-8", null, Boolean.FALSE};
307         }
308
309         // UTF-8 with a BOM
310
int b2 = b4[2] & 0xFF;
311         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
312             return new Object JavaDoc [] {"UTF-8", null};
313         }
314
315         // default to UTF-8 if we don't have enough bytes to make a
316
// good determination of the encoding
317
if (count < 4) {
318             return new Object JavaDoc [] {"UTF-8", null};
319         }
320
321         // other encodings
322
int b3 = b4[3] & 0xFF;
323         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
324             // UCS-4, big endian (1234)
325
return new Object JavaDoc [] {"ISO-10646-UCS-4", new Boolean JavaDoc(true)};
326         }
327         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
328             // UCS-4, little endian (4321)
329
return new Object JavaDoc [] {"ISO-10646-UCS-4", new Boolean JavaDoc(false)};
330         }
331         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
332             // UCS-4, unusual octet order (2143)
333
// REVISIT: What should this be?
334
return new Object JavaDoc [] {"ISO-10646-UCS-4", null};
335         }
336         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
337             // UCS-4, unusual octect order (3412)
338
// REVISIT: What should this be?
339
return new Object JavaDoc [] {"ISO-10646-UCS-4", null};
340         }
341         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
342             // UTF-16, big-endian, no BOM
343
// (or could turn out to be UCS-2...
344
// REVISIT: What should this be?
345
return new Object JavaDoc [] {"UTF-16BE", new Boolean JavaDoc(true)};
346         }
347         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
348             // UTF-16, little-endian, no BOM
349
// (or could turn out to be UCS-2...
350
return new Object JavaDoc [] {"UTF-16LE", new Boolean JavaDoc(false)};
351         }
352         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
353             // EBCDIC
354
// a la xerces1, return CP037 instead of EBCDIC here
355
return new Object JavaDoc [] {"CP037", null};
356         }
357
358         // default encoding
359
return new Object JavaDoc [] {"UTF-8", null, Boolean.FALSE};
360
361     }
362
363     // Adapted from:
364
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
365
/** Returns true if the current entity being scanned is external. */
366     public boolean isExternal() {
367     return true;
368     }
369
370     // Adapted from:
371
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
372
/**
373      * Returns the next character on the input.
374      * <p>
375      * <strong>Note:</strong> The character is <em>not</em> consumed.
376      *
377      * @throws IOException Thrown if i/o error occurs.
378      * @throws EOFException Thrown on end of file.
379      */

380     public int peekChar() throws IOException JavaDoc {
381     
382     // load more characters, if needed
383
if (fCurrentEntity.position == fCurrentEntity.count) {
384         load(0, true);
385     }
386     
387     // peek at character
388
int c = fCurrentEntity.ch[fCurrentEntity.position];
389
390     // return peeked character
391
if (fCurrentEntity.isExternal()) {
392         return c != '\r' ? c : '\n';
393     }
394     else {
395         return c;
396     }
397     
398     } // peekChar():int
399

400     // Adapted from:
401
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
402
/**
403      * Returns the next character on the input.
404      * <p>
405      * <strong>Note:</strong> The character is consumed.
406      *
407      * @throws IOException Thrown if i/o error occurs.
408      * @throws EOFException Thrown on end of file.
409      */

410     public int scanChar() throws IOException JavaDoc {
411
412     // load more characters, if needed
413
if (fCurrentEntity.position == fCurrentEntity.count) {
414         load(0, true);
415     }
416
417     // scan character
418
int c = fCurrentEntity.ch[fCurrentEntity.position++];
419     boolean external = false;
420     if (c == '\n' ||
421         (c == '\r' && (external = fCurrentEntity.isExternal()))) {
422         fCurrentEntity.lineNumber++;
423         fCurrentEntity.columnNumber = 1;
424         if (fCurrentEntity.position == fCurrentEntity.count) {
425         fCurrentEntity.ch[0] = (char)c;
426         load(1, false);
427         }
428         if (c == '\r' && external) {
429         if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
430             fCurrentEntity.position--;
431         }
432         c = '\n';
433         }
434     }
435
436     // return character that was scanned
437
fCurrentEntity.columnNumber++;
438     return c;
439     
440     }
441
442     // Adapted from:
443
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
444
/**
445      * Returns a string matching the Name production appearing immediately
446      * on the input as a symbol, or null if no Name string is present.
447      * <p>
448      * <strong>Note:</strong> The Name characters are consumed.
449      * <p>
450      * <strong>Note:</strong> The string returned must be a symbol. The
451      * SymbolTable can be used for this purpose.
452      *
453      * @throws IOException Thrown if i/o error occurs.
454      * @throws EOFException Thrown on end of file.
455      *
456      * @see SymbolTable
457      * @see XMLChar#isName
458      * @see XMLChar#isNameStart
459      */

460     public String JavaDoc scanName() throws IOException JavaDoc {
461     
462     // load more characters, if needed
463
if (fCurrentEntity.position == fCurrentEntity.count) {
464         load(0, true);
465     }
466     
467     // scan name
468
int offset = fCurrentEntity.position;
469     if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
470         if (++fCurrentEntity.position == fCurrentEntity.count) {
471         fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
472         offset = 0;
473         if (load(1, false)) {
474             fCurrentEntity.columnNumber++;
475             String JavaDoc symbol = fSymbolTable.addSymbol(fCurrentEntity.ch,
476                                0, 1);
477             return symbol;
478         }
479         }
480         while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
481         if (++fCurrentEntity.position == fCurrentEntity.count) {
482             int length = fCurrentEntity.position - offset;
483             if (length == fBufferSize) {
484             // bad luck we have to resize our buffer
485
char[] tmp = new char[fBufferSize * 2];
486             System.arraycopy(fCurrentEntity.ch, offset,
487                      tmp, 0, length);
488             fCurrentEntity.ch = tmp;
489             fBufferSize *= 2;
490             } else {
491             System.arraycopy(fCurrentEntity.ch, offset,
492                      fCurrentEntity.ch, 0, length);
493             }
494             offset = 0;
495             if (load(length, false)) {
496             break;
497             }
498         }
499         }
500     }
501     int length = fCurrentEntity.position - offset;
502     fCurrentEntity.columnNumber += length;
503
504     // return name
505
String JavaDoc symbol = null;
506     if (length > 0) {
507         symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
508     }
509     return symbol;
510     
511     }
512
513     // Adapted from:
514
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
515
/**
516      * Scans a range of attribute value data, setting the fields of the
517      * XMLString structure, appropriately.
518      * <p>
519      * <strong>Note:</strong> The characters are consumed.
520      * <p>
521      * <strong>Note:</strong> This method does not guarantee to return
522      * the longest run of attribute value data. This method may return
523      * before the quote character due to reaching the end of the input
524      * buffer or any other reason.
525      * <p>
526      * <strong>Note:</strong> The fields contained in the XMLString
527      * structure are not guaranteed to remain valid upon subsequent calls
528      * to the entity scanner. Therefore, the caller is responsible for
529      * immediately using the returned character data or making a copy of
530      * the character data.
531      *
532      * @param quote The quote character that signifies the end of the
533      * attribute value data.
534      * @param content The content structure to fill.
535      *
536      * @return Returns the next character on the input, if known. This
537      * value may be -1 but this does <em>note</em> designate
538      * end of file.
539      *
540      * @throws IOException Thrown if i/o error occurs.
541      * @throws EOFException Thrown on end of file.
542      */

543     public int scanLiteral(int quote, XMLString content)
544     throws IOException JavaDoc {
545
546     // load more characters, if needed
547
if (fCurrentEntity.position == fCurrentEntity.count) {
548         load(0, true);
549     } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
550         fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
551         load(1, false);
552         fCurrentEntity.position = 0;
553     }
554
555     // normalize newlines
556
int offset = fCurrentEntity.position;
557     int c = fCurrentEntity.ch[offset];
558     int newlines = 0;
559     boolean external = fCurrentEntity.isExternal();
560     if (c == '\n' || (c == '\r' && external)) {
561         do {
562         c = fCurrentEntity.ch[fCurrentEntity.position++];
563         if (c == '\r' && external) {
564             newlines++;
565             fCurrentEntity.lineNumber++;
566             fCurrentEntity.columnNumber = 1;
567             if (fCurrentEntity.position == fCurrentEntity.count) {
568             offset = 0;
569             fCurrentEntity.position = newlines;
570             if (load(newlines, false)) {
571                 break;
572             }
573             }
574             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
575             fCurrentEntity.position++;
576             offset++;
577             }
578             /*** NEWLINE NORMALIZATION ***/
579             else {
580             newlines++;
581             }
582             /***/
583         }
584         else if (c == '\n') {
585             newlines++;
586             fCurrentEntity.lineNumber++;
587             fCurrentEntity.columnNumber = 1;
588             if (fCurrentEntity.position == fCurrentEntity.count) {
589             offset = 0;
590             fCurrentEntity.position = newlines;
591             if (load(newlines, false)) {
592                 break;
593             }
594             }
595             /*** NEWLINE NORMALIZATION ***
596              if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
597              && external) {
598              fCurrentEntity.position++;
599              offset++;
600              }
601              /***/

602         }
603         else {
604             fCurrentEntity.position--;
605             break;
606         }
607         } while (fCurrentEntity.position < fCurrentEntity.count - 1);
608         for (int i = offset; i < fCurrentEntity.position; i++) {
609         fCurrentEntity.ch[i] = '\n';
610         }
611         int length = fCurrentEntity.position - offset;
612         if (fCurrentEntity.position == fCurrentEntity.count - 1) {
613         content.setValues(fCurrentEntity.ch, offset, length);
614         return -1;
615         }
616     }
617
618     // scan literal value
619
while (fCurrentEntity.position < fCurrentEntity.count) {
620         c = fCurrentEntity.ch[fCurrentEntity.position++];
621         if ((c == quote &&
622          (!fCurrentEntity.literal || external))
623         || c == '%' || !XMLChar.isContent(c)) {
624         fCurrentEntity.position--;
625         break;
626         }
627     }
628     int length = fCurrentEntity.position - offset;
629     fCurrentEntity.columnNumber += length - newlines;
630     content.setValues(fCurrentEntity.ch, offset, length);
631
632     // return next character
633
if (fCurrentEntity.position != fCurrentEntity.count) {
634         c = fCurrentEntity.ch[fCurrentEntity.position];
635         // NOTE: We don't want to accidentally signal the
636
// end of the literal if we're expanding an
637
// entity appearing in the literal. -Ac
638
if (c == quote && fCurrentEntity.literal) {
639         c = -1;
640         }
641     }
642     else {
643         c = -1;
644     }
645     return c;
646
647     }
648
649     /**
650      * Scans a range of character data up to the specified delimiter,
651      * setting the fields of the XMLString structure, appropriately.
652      * <p>
653      * <strong>Note:</strong> The characters are consumed.
654      * <p>
655      * <strong>Note:</strong> This assumes that the internal buffer is
656      * at least the same size, or bigger, than the length of the delimiter
657      * and that the delimiter contains at least one character.
658      * <p>
659      * <strong>Note:</strong> This method does not guarantee to return
660      * the longest run of character data. This method may return before
661      * the delimiter due to reaching the end of the input buffer or any
662      * other reason.
663      * <p>
664      * <strong>Note:</strong> The fields contained in the XMLString
665      * structure are not guaranteed to remain valid upon subsequent calls
666      * to the entity scanner. Therefore, the caller is responsible for
667      * immediately using the returned character data or making a copy of
668      * the character data.
669      *
670      * @param delimiter The string that signifies the end of the character
671      * data to be scanned.
672      * @param buffer The data structure to fill.
673      *
674      * @return Returns true if there is more data to scan, false otherwise.
675      *
676      * @throws IOException Thrown if i/o error occurs.
677      * @throws EOFException Thrown on end of file.
678      */

679     public boolean scanData(String JavaDoc delimiter, XMLStringBuffer buffer)
680     throws IOException JavaDoc {
681
682     boolean done = false;
683     int delimLen = delimiter.length();
684     char charAt0 = delimiter.charAt(0);
685     boolean external = fCurrentEntity.isExternal();
686     do {
687     
688         // load more characters, if needed
689

690         if (fCurrentEntity.position == fCurrentEntity.count) {
691         load(0, true);
692         }
693         else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
694         System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
695                  fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
696         load(fCurrentEntity.count - fCurrentEntity.position, false);
697         fCurrentEntity.position = 0;
698         }
699         if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
700         // something must be wrong with the input: e.g., file ends an
701
// unterminated comment
702
int length = fCurrentEntity.count - fCurrentEntity.position;
703         buffer.append (fCurrentEntity.ch, fCurrentEntity.position,
704                    length);
705         fCurrentEntity.columnNumber += fCurrentEntity.count;
706         fCurrentEntity.position = fCurrentEntity.count;
707         load(0,true);
708         return false;
709         }
710     
711         // normalize newlines
712
int offset = fCurrentEntity.position;
713         int c = fCurrentEntity.ch[offset];
714         int newlines = 0;
715         if (c == '\n' || (c == '\r' && external)) {
716         do {
717             c = fCurrentEntity.ch[fCurrentEntity.position++];
718             if (c == '\r' && external) {
719             newlines++;
720             fCurrentEntity.lineNumber++;
721             fCurrentEntity.columnNumber = 1;
722             if (fCurrentEntity.position == fCurrentEntity.count) {
723                 offset = 0;
724                 fCurrentEntity.position = newlines;
725                 if (load(newlines, false)) {
726                 break;
727                 }
728             }
729             if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
730                 fCurrentEntity.position++;
731                 offset++;
732             }
733             /*** NEWLINE NORMALIZATION ***/
734             else {
735                 newlines++;
736             }
737             }
738             else if (c == '\n') {
739             newlines++;
740             fCurrentEntity.lineNumber++;
741             fCurrentEntity.columnNumber = 1;
742             if (fCurrentEntity.position == fCurrentEntity.count) {
743                 offset = 0;
744                 fCurrentEntity.position = newlines;
745                 fCurrentEntity.count = newlines;
746                 if (load(newlines, false)) {
747                 break;
748                 }
749             }
750             }
751             else {
752             fCurrentEntity.position--;
753             break;
754             }
755         } while (fCurrentEntity.position < fCurrentEntity.count - 1);
756         for (int i = offset; i < fCurrentEntity.position; i++) {
757             fCurrentEntity.ch[i] = '\n';
758         }
759         int length = fCurrentEntity.position - offset;
760         if (fCurrentEntity.position == fCurrentEntity.count - 1) {
761             buffer.append(fCurrentEntity.ch, offset, length);
762             return true;
763         }
764         }
765     
766         // iterate over buffer looking for delimiter
767
OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
768         c = fCurrentEntity.ch[fCurrentEntity.position++];
769         if (c == charAt0) {
770         // looks like we just hit the delimiter
771
int delimOffset = fCurrentEntity.position - 1;
772         for (int i = 1; i < delimLen; i++) {
773             if (fCurrentEntity.position == fCurrentEntity.count) {
774             fCurrentEntity.position -= i;
775             break OUTER;
776             }
777             c = fCurrentEntity.ch[fCurrentEntity.position++];
778             if (delimiter.charAt(i) != c) {
779             fCurrentEntity.position--;
780             break;
781             }
782         }
783         if (fCurrentEntity.position == delimOffset + delimLen) {
784             done = true;
785             break;
786         }
787         }
788         else if (c == '\n' || (external && c == '\r')) {
789         fCurrentEntity.position--;
790         break;
791         }
792         else if (XMLChar.isInvalid(c)) {
793         fCurrentEntity.position--;
794         int length = fCurrentEntity.position - offset;
795         fCurrentEntity.columnNumber += length - newlines;
796         buffer.append(fCurrentEntity.ch, offset, length);
797         return true;
798         }
799     }
800         int length = fCurrentEntity.position - offset;
801         fCurrentEntity.columnNumber += length - newlines;
802         if (done) {
803         length -= delimLen;
804         }
805         buffer.append (fCurrentEntity.ch, offset, length);
806     
807         // return true if string was skipped
808
} while (!done);
809     return !done;
810
811     }
812
813     // Adapted from:
814
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
815
/**
816      * Skips a character appearing immediately on the input.
817      * <p>
818      * <strong>Note:</strong> The character is consumed only if it matches
819      * the specified character.
820      *
821      * @param c The character to skip.
822      *
823      * @return Returns true if the character was skipped.
824      *
825      * @throws IOException Thrown if i/o error occurs.
826      * @throws EOFException Thrown on end of file.
827      */

828     public boolean skipChar(int c) throws IOException JavaDoc {
829
830     // load more characters, if needed
831
if (fCurrentEntity.position == fCurrentEntity.count) {
832         load(0, true);
833     }
834
835     // skip character
836
int cc = fCurrentEntity.ch[fCurrentEntity.position];
837     if (cc == c) {
838         fCurrentEntity.position++;
839         if (c == '\n') {
840         fCurrentEntity.lineNumber++;
841         fCurrentEntity.columnNumber = 1;
842         }
843         else {
844         fCurrentEntity.columnNumber++;
845         }
846         return true;
847     } else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
848         // handle newlines
849
if (fCurrentEntity.position == fCurrentEntity.count) {
850         fCurrentEntity.ch[0] = (char)cc;
851         load(1, false);
852         }
853         fCurrentEntity.position++;
854         if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
855         fCurrentEntity.position++;
856         }
857         fCurrentEntity.lineNumber++;
858         fCurrentEntity.columnNumber = 1;
859         return true;
860     }
861
862     // character was not skipped
863
return false;
864
865     }
866
867     // Adapted from:
868
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
869
/**
870      * Skips space characters appearing immediately on the input.
871      * <p>
872      * <strong>Note:</strong> The characters are consumed only if they are
873      * space characters.
874      *
875      * @return Returns true if at least one space character was skipped.
876      *
877      * @throws IOException Thrown if i/o error occurs.
878      * @throws EOFException Thrown on end of file.
879      *
880      * @see XMLChar#isSpace
881      */

882     public boolean skipSpaces() throws IOException JavaDoc {
883
884     // load more characters, if needed
885
if (fCurrentEntity.position == fCurrentEntity.count) {
886         load(0, true);
887     }
888
889     // skip spaces
890
int c = fCurrentEntity.ch[fCurrentEntity.position];
891     if (XMLChar.isSpace(c)) {
892         boolean external = fCurrentEntity.isExternal();
893         do {
894         boolean entityChanged = false;
895         // handle newlines
896
if (c == '\n' || (external && c == '\r')) {
897             fCurrentEntity.lineNumber++;
898             fCurrentEntity.columnNumber = 1;
899             if (fCurrentEntity.position == fCurrentEntity.count - 1) {
900             fCurrentEntity.ch[0] = (char)c;
901             entityChanged = load(1, true);
902             if (!entityChanged)
903                                 // the load change the position to be 1,
904
// need to restore it when entity not changed
905
fCurrentEntity.position = 0;
906             }
907             if (c == '\r' && external) {
908             // REVISIT: Does this need to be updated to fix the
909
// #x0D ^#x0A newline normalization problem? -Ac
910
if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
911                 fCurrentEntity.position--;
912             }
913             }
914             /*** NEWLINE NORMALIZATION ***
915              else {
916              if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
917              && external) {
918              fCurrentEntity.position++;
919              }
920              }
921              /***/

922         }
923         else {
924             fCurrentEntity.columnNumber++;
925         }
926         // load more characters, if needed
927
if (!entityChanged)
928             fCurrentEntity.position++;
929         if (fCurrentEntity.position == fCurrentEntity.count) {
930             load(0, true);
931         }
932         } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
933         return true;
934     }
935
936     // no spaces were found
937
return false;
938
939     }
940
941     /**
942      * Skips the specified string appearing immediately on the input.
943      * <p>
944      * <strong>Note:</strong> The characters are consumed only if they are
945      * space characters.
946      *
947      * @param s The string to skip.
948      *
949      * @return Returns true if the string was skipped.
950      *
951      * @throws IOException Thrown if i/o error occurs.
952      * @throws EOFException Thrown on end of file.
953      */

954     public boolean skipString(String JavaDoc s) throws IOException JavaDoc {
955
956     // load more characters, if needed
957
if (fCurrentEntity.position == fCurrentEntity.count) {
958         load(0, true);
959     }
960
961     // skip string
962
final int length = s.length();
963     for (int i = 0; i < length; i++) {
964         char c = fCurrentEntity.ch[fCurrentEntity.position++];
965         if (c != s.charAt(i)) {
966         fCurrentEntity.position -= i + 1;
967         return false;
968         }
969         if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
970         System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
971         // REVISIT: Can a string to be skipped cross an
972
// entity boundary? -Ac
973
if (load(i + 1, false)) {
974             fCurrentEntity.position -= i + 1;
975             return false;
976         }
977         }
978     }
979     fCurrentEntity.columnNumber += length;
980     return true;
981
982     }
983
984     // Adapted from:
985
// org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
986
/**
987      * Loads a chunk of text.
988      *
989      * @param offset The offset into the character buffer to
990      * read the next batch of characters.
991      * @param changeEntity True if the load should change entities
992      * at the end of the entity, otherwise leave
993      * the current entity in place and the entity
994      * boundary will be signaled by the return
995      * value.
996      *
997      * @returns Returns true if the entity changed as a result of this
998      * load operation.
999      */

1000    final boolean load(int offset, boolean changeEntity)
1001    throws IOException JavaDoc {
1002
1003    // read characters
1004
int length = fCurrentEntity.mayReadChunks?
1005        (fCurrentEntity.ch.length - offset):
1006        (DEFAULT_XMLDECL_BUFFER_SIZE);
1007    int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset,
1008                           length);
1009
1010    // reset count and position
1011
boolean entityChanged = false;
1012    if (count != -1) {
1013        if (count != 0) {
1014        fCurrentEntity.count = count + offset;
1015        fCurrentEntity.position = offset;
1016        }
1017    }
1018
1019    // end of this entity
1020
else {
1021        fCurrentEntity.count = offset;
1022        fCurrentEntity.position = offset;
1023        entityChanged = true;
1024        if (changeEntity) {
1025        endEntity();
1026        if (fCurrentEntity == null) {
1027            throw new EOFException JavaDoc();
1028        }
1029        // handle the trailing edges
1030
if (fCurrentEntity.position == fCurrentEntity.count) {
1031            load(0, false);
1032        }
1033        }
1034    }
1035
1036    return entityChanged;
1037
1038    }
1039
1040    // Adapted from:
1041
// org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
1042
/**
1043     * This class wraps the byte inputstreams we're presented with.
1044     * We need it because java.io.InputStreams don't provide
1045     * functionality to reread processed bytes, and they have a habit
1046     * of reading more than one character when you call their read()
1047     * methods. This means that, once we discover the true (declared)
1048     * encoding of a document, we can neither backtrack to read the
1049     * whole doc again nor start reading where we are with a new
1050     * reader.
1051     *
1052     * This class allows rewinding an inputStream by allowing a mark
1053     * to be set, and the stream reset to that position. <strong>The
1054     * class assumes that it needs to read one character per
1055     * invocation when it's read() method is inovked, but uses the
1056     * underlying InputStream's read(char[], offset length) method--it
1057     * won't buffer data read this way!</strong>
1058     *
1059     * @author Neil Graham, IBM
1060     * @author Glenn Marcy, IBM
1061     */

1062    private final class RewindableInputStream extends InputStream JavaDoc {
1063
1064        private InputStream JavaDoc fInputStream;
1065        private byte[] fData;
1066        private int fStartOffset;
1067        private int fEndOffset;
1068        private int fOffset;
1069        private int fLength;
1070        private int fMark;
1071
1072        public RewindableInputStream(InputStream JavaDoc is) {
1073            fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
1074            fInputStream = is;
1075            fStartOffset = 0;
1076            fEndOffset = -1;
1077            fOffset = 0;
1078            fLength = 0;
1079            fMark = 0;
1080        }
1081
1082        public void setStartOffset(int offset) {
1083            fStartOffset = offset;
1084        }
1085
1086        public void rewind() {
1087            fOffset = fStartOffset;
1088        }
1089
1090        public int read() throws IOException JavaDoc {
1091            int b = 0;
1092            if (fOffset < fLength) {
1093                return fData[fOffset++] & 0xff;
1094            }
1095            if (fOffset == fEndOffset) {
1096                return -1;
1097            }
1098            if (fOffset == fData.length) {
1099                byte[] newData = new byte[fOffset << 1];
1100                System.arraycopy(fData, 0, newData, 0, fOffset);
1101                fData = newData;
1102            }
1103            b = fInputStream.read();
1104            if (b == -1) {
1105                fEndOffset = fOffset;
1106                return -1;
1107            }
1108            fData[fLength++] = (byte)b;
1109            fOffset++;
1110            return b & 0xff;
1111        }
1112
1113        public int read(byte[] b, int off, int len) throws IOException JavaDoc {
1114            int bytesLeft = fLength - fOffset;
1115            if (bytesLeft == 0) {
1116                if (fOffset == fEndOffset) {
1117                    return -1;
1118                }
1119                // better get some more for the voracious reader...
1120
if (fCurrentEntity.mayReadChunks) {
1121                    return fInputStream.read(b, off, len);
1122                }
1123                int returnedVal = read();
1124                if (returnedVal == -1) {
1125                    fEndOffset = fOffset;
1126                    return -1;
1127                }
1128                b[off] = (byte)returnedVal;
1129                return 1;
1130            }
1131            if (len < bytesLeft) {
1132                if (len <= 0) {
1133                    return 0;
1134                }
1135            }
1136            else {
1137                len = bytesLeft;
1138            }
1139            if (b != null) {
1140                System.arraycopy(fData, fOffset, b, off, len);
1141            }
1142            fOffset += len;
1143            return len;
1144        }
1145
1146        public long skip(long n)
1147            throws IOException JavaDoc
1148        {
1149            int bytesLeft;
1150            if (n <= 0) {
1151                return 0;
1152            }
1153            bytesLeft = fLength - fOffset;
1154            if (bytesLeft == 0) {
1155                if (fOffset == fEndOffset) {
1156                    return 0;
1157                }
1158                return fInputStream.skip(n);
1159            }
1160            if (n <= bytesLeft) {
1161                fOffset += n;
1162                return n;
1163            }
1164            fOffset += bytesLeft;
1165            if (fOffset == fEndOffset) {
1166                return bytesLeft;
1167            }
1168            n -= bytesLeft;
1169        /*
1170         * In a manner of speaking, when this class isn't permitting more
1171         * than one byte at a time to be read, it is "blocking". The
1172         * available() method should indicate how much can be read without
1173         * blocking, so while we're in this mode, it should only indicate
1174         * that bytes in its buffer are available; otherwise, the result of
1175         * available() on the underlying InputStream is appropriate.
1176         */

1177            return fInputStream.skip(n) + bytesLeft;
1178        }
1179
1180        public int available() throws IOException JavaDoc {
1181            int bytesLeft = fLength - fOffset;
1182            if (bytesLeft == 0) {
1183                if (fOffset == fEndOffset) {
1184                    return -1;
1185                }
1186                return fCurrentEntity.mayReadChunks ? fInputStream.available()
1187            : 0;
1188            }
1189            return bytesLeft;
1190        }
1191
1192        public void mark(int howMuch) {
1193            fMark = fOffset;
1194        }
1195
1196        public void reset() {
1197            fOffset = fMark;
1198        }
1199
1200        public boolean markSupported() {
1201            return true;
1202        }
1203
1204        public void close() throws IOException JavaDoc {
1205            if (fInputStream != null) {
1206                fInputStream.close();
1207                fInputStream = null;
1208            }
1209        }
1210    } // end of RewindableInputStream class
1211

1212    // Adapted from:
1213
// org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
1214
private void scanXMLDecl() throws IOException JavaDoc, JasperException {
1215
1216    if (skipString("<?xml")) {
1217        fMarkupDepth++;
1218        // NOTE: special case where document starts with a PI
1219
// whose name starts with "xml" (e.g. "xmlfoo")
1220
if (XMLChar.isName(peekChar())) {
1221        fStringBuffer.clear();
1222        fStringBuffer.append("xml");
1223        while (XMLChar.isName(peekChar())) {
1224            fStringBuffer.append((char)scanChar());
1225        }
1226        String JavaDoc target = fSymbolTable.addSymbol(fStringBuffer.ch,
1227                               fStringBuffer.offset,
1228                               fStringBuffer.length);
1229        scanPIData(target, fString);
1230        }
1231
1232        // standard XML declaration
1233
else {
1234        scanXMLDeclOrTextDecl(false);
1235        }
1236    }
1237    }
1238    
1239    // Adapted from:
1240
// org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
1241
/**
1242     * Scans an XML or text declaration.
1243     * <p>
1244     * <pre>
1245     * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1246     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1247     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
1248     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1249     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1250     * | ('"' ('yes' | 'no') '"'))
1251     *
1252     * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
1253     * </pre>
1254     *
1255     * @param scanningTextDecl True if a text declaration is to
1256     * be scanned instead of an XML
1257     * declaration.
1258     */

1259    private void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
1260        throws IOException JavaDoc, JasperException {
1261
1262        // scan decl
1263
scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
1264        fMarkupDepth--;
1265
1266        // pseudo-attribute values
1267
String JavaDoc encodingPseudoAttr = fStrings[1];
1268
1269        // set encoding on reader
1270
if (encodingPseudoAttr != null) {
1271            isEncodingSetInProlog = true;
1272        encoding = encodingPseudoAttr;
1273        }
1274    }
1275
1276    // Adapted from:
1277
// org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
1278
/**
1279     * Scans an XML or text declaration.
1280     * <p>
1281     * <pre>
1282     * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1283     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1284     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
1285     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1286     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1287     * | ('"' ('yes' | 'no') '"'))
1288     *
1289     * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1290     * </pre>
1291     *
1292     * @param scanningTextDecl True if a text declaration is to
1293     * be scanned instead of an XML
1294     * declaration.
1295     * @param pseudoAttributeValues An array of size 3 to return the version,
1296     * encoding and standalone pseudo attribute values
1297     * (in that order).
1298     *
1299     * <strong>Note:</strong> This method uses fString, anything in it
1300     * at the time of calling is lost.
1301     */

1302    private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
1303                       String JavaDoc[] pseudoAttributeValues)
1304                throws IOException JavaDoc, JasperException {
1305
1306        // pseudo-attribute values
1307
String JavaDoc version = null;
1308        String JavaDoc encoding = null;
1309        String JavaDoc standalone = null;
1310
1311        // scan pseudo-attributes
1312
final int STATE_VERSION = 0;
1313        final int STATE_ENCODING = 1;
1314        final int STATE_STANDALONE = 2;
1315        final int STATE_DONE = 3;
1316        int state = STATE_VERSION;
1317
1318        boolean dataFoundForTarget = false;
1319        boolean sawSpace = skipSpaces();
1320        while (peekChar() != '?') {
1321            dataFoundForTarget = true;
1322            String JavaDoc name = scanPseudoAttribute(scanningTextDecl, fString);
1323            switch (state) {
1324                case STATE_VERSION: {
1325                    if (name == fVersionSymbol) {
1326                        if (!sawSpace) {
1327                            reportFatalError(scanningTextDecl
1328                                       ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
1329                                       : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
1330                                             null);
1331                        }
1332                        version = fString.toString();
1333                        state = STATE_ENCODING;
1334                        if (!version.equals("1.0")) {
1335                            // REVISIT: XML REC says we should throw an error
1336
// in such cases.
1337
// some may object the throwing of fatalError.
1338
err.jspError("jsp.error.xml.versionNotSupported",
1339                     version);
1340                        }
1341                    } else if (name == fEncodingSymbol) {
1342                        if (!scanningTextDecl) {
1343                            err.jspError("jsp.error.xml.versionInfoRequired");
1344                        }
1345                        if (!sawSpace) {
1346                            reportFatalError(scanningTextDecl
1347                                      ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1348                                      : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1349                                             null);
1350                        }
1351                        encoding = fString.toString();
1352                        state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
1353                    } else {
1354                        if (scanningTextDecl) {
1355                            err.jspError("jsp.error.xml.encodingDeclRequired");
1356                        }
1357                        else {
1358                            err.jspError("jsp.error.xml.versionInfoRequired");
1359                        }
1360                    }
1361                    break;
1362                }
1363                case STATE_ENCODING: {
1364                    if (name == fEncodingSymbol) {
1365                        if (!sawSpace) {
1366                            reportFatalError(scanningTextDecl
1367                                      ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1368                                      : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1369                                             null);
1370                        }
1371                        encoding = fString.toString();
1372                        state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
1373                        // TODO: check encoding name; set encoding on
1374
// entity scanner
1375
} else if (!scanningTextDecl && name == fStandaloneSymbol) {
1376                        if (!sawSpace) {
1377                            err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1378                        }
1379                        standalone = fString.toString();
1380                        state = STATE_DONE;
1381                        if (!standalone.equals("yes") && !standalone.equals("no")) {
1382                            err.jspError("jsp.error.xml.sdDeclInvalid");
1383                        }
1384                    } else {
1385                        err.jspError("jsp.error.xml.encodingDeclRequired");
1386                    }
1387                    break;
1388                }
1389                case STATE_STANDALONE: {
1390                    if (name == fStandaloneSymbol) {
1391                        if (!sawSpace) {
1392                            err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1393                        }
1394                        standalone = fString.toString();
1395                        state = STATE_DONE;
1396                        if (!standalone.equals("yes") && !standalone.equals("no")) {
1397                            err.jspError("jsp.error.xml.sdDeclInvalid");
1398                        }
1399                    } else {
1400            err.jspError("jsp.error.xml.encodingDeclRequired");
1401                    }
1402                    break;
1403                }
1404                default: {
1405                    err.jspError("jsp.error.xml.noMorePseudoAttributes");
1406                }
1407            }
1408            sawSpace = skipSpaces();
1409        }
1410        // REVISIT: should we remove this error reporting?
1411
if (scanningTextDecl && state != STATE_DONE) {
1412            err.jspError("jsp.error.xml.morePseudoAttributes");
1413        }
1414        
1415        // If there is no data in the xml or text decl then we fail to report
1416
// error for version or encoding info above.
1417
if (scanningTextDecl) {
1418            if (!dataFoundForTarget && encoding == null) {
1419                err.jspError("jsp.error.xml.encodingDeclRequired");
1420            }
1421        } else {
1422            if (!dataFoundForTarget && version == null) {
1423                err.jspError("jsp.error.xml.versionInfoRequired");
1424            }
1425        }
1426
1427        // end
1428
if (!skipChar('?')) {
1429            err.jspError("jsp.error.xml.xmlDeclUnterminated");
1430        }
1431        if (!skipChar('>')) {
1432            err.jspError("jsp.error.xml.xmlDeclUnterminated");
1433
1434        }
1435        
1436        // fill in return array
1437
pseudoAttributeValues[0] = version;
1438        pseudoAttributeValues[1] = encoding;
1439        pseudoAttributeValues[2] = standalone;
1440    }
1441
1442    // Adapted from:
1443
// org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
1444
/**
1445     * Scans a pseudo attribute.
1446     *
1447     * @param scanningTextDecl True if scanning this pseudo-attribute for a
1448     * TextDecl; false if scanning XMLDecl. This
1449     * flag is needed to report the correct type of
1450     * error.
1451     * @param value The string to fill in with the attribute
1452     * value.
1453     *
1454     * @return The name of the attribute
1455     *
1456     * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
1457     * at the time of calling is lost.
1458     */

1459    public String JavaDoc scanPseudoAttribute(boolean scanningTextDecl,
1460                                      XMLString value)
1461                throws IOException JavaDoc, JasperException {
1462
1463        String JavaDoc name = scanName();
1464        if (name == null) {
1465            err.jspError("jsp.error.xml.pseudoAttrNameExpected");
1466        }
1467        skipSpaces();
1468        if (!skipChar('=')) {
1469            reportFatalError(scanningTextDecl ?
1470                 "jsp.error.xml.eqRequiredInTextDecl"
1471                             : "jsp.error.xml.eqRequiredInXMLDecl",
1472                 name);
1473        }
1474        skipSpaces();
1475        int quote = peekChar();
1476        if (quote != '\'' && quote != '"') {
1477            reportFatalError(scanningTextDecl ?
1478                 "jsp.error.xml.quoteRequiredInTextDecl"
1479                             : "jsp.error.xml.quoteRequiredInXMLDecl" ,
1480                 name);
1481        }
1482        scanChar();
1483        int c = scanLiteral(quote, value);
1484        if (c != quote) {
1485            fStringBuffer2.clear();
1486            do {
1487                fStringBuffer2.append(value);
1488                if (c != -1) {
1489                    if (c == '&' || c == '%' || c == '<' || c == ']') {
1490                        fStringBuffer2.append((char)scanChar());
1491                    }
1492                    else if (XMLChar.isHighSurrogate(c)) {
1493                        scanSurrogates(fStringBuffer2);
1494                    }
1495                    else if (XMLChar.isInvalid(c)) {
1496                        String JavaDoc key = scanningTextDecl
1497                            ? "jsp.error.xml.invalidCharInTextDecl"
1498                : "jsp.error.xml.invalidCharInXMLDecl";
1499                        reportFatalError(key, Integer.toString(c, 16));
1500                        scanChar();
1501                    }
1502                }
1503                c = scanLiteral(quote, value);
1504            } while (c != quote);
1505            fStringBuffer2.append(value);
1506            value.setValues(fStringBuffer2);
1507        }
1508        if (!skipChar(quote)) {
1509            reportFatalError(scanningTextDecl ?
1510                 "jsp.error.xml.closeQuoteMissingInTextDecl"
1511                             : "jsp.error.xml.closeQuoteMissingInXMLDecl",
1512                 name);
1513        }
1514
1515        // return
1516
return name;
1517
1518    }
1519    
1520    // Adapted from:
1521
// org.apache.xerces.impl.XMLScanner.scanPIData
1522
/**
1523     * Scans a processing data. This is needed to handle the situation
1524     * where a document starts with a processing instruction whose
1525     * target name <em>starts with</em> "xml". (e.g. xmlfoo)
1526     *
1527     * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1528     * at the time of calling is lost.
1529     *
1530     * @param target The PI target
1531     * @param data The string to fill in with the data
1532     */

1533    private void scanPIData(String JavaDoc target, XMLString data)
1534        throws IOException JavaDoc, JasperException {
1535
1536        // check target
1537
if (target.length() == 3) {
1538            char c0 = Character.toLowerCase(target.charAt(0));
1539            char c1 = Character.toLowerCase(target.charAt(1));
1540            char c2 = Character.toLowerCase(target.charAt(2));
1541            if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
1542                err.jspError("jsp.error.xml.reservedPITarget");
1543            }
1544        }
1545
1546        // spaces
1547
if (!skipSpaces()) {
1548            if (skipString("?>")) {
1549                // we found the end, there is no data
1550
data.clear();
1551                return;
1552            }
1553            else {
1554                // if there is data there should be some space
1555
err.jspError("jsp.error.xml.spaceRequiredInPI");
1556            }
1557        }
1558
1559        fStringBuffer.clear();
1560        // data
1561
if (scanData("?>", fStringBuffer)) {
1562            do {
1563                int c = peekChar();
1564                if (c != -1) {
1565                    if (XMLChar.isHighSurrogate(c)) {
1566                        scanSurrogates(fStringBuffer);
1567                    } else if (XMLChar.isInvalid(c)) {
1568                        err.jspError("jsp.error.xml.invalidCharInPI",
1569                     Integer.toHexString(c));
1570                        scanChar();
1571                    }
1572                }
1573            } while (scanData("?>", fStringBuffer));
1574        }
1575        data.setValues(fStringBuffer);
1576
1577    }
1578
1579    // Adapted from:
1580
// org.apache.xerces.impl.XMLScanner.scanSurrogates
1581
/**
1582     * Scans surrogates and append them to the specified buffer.
1583     * <p>
1584     * <strong>Note:</strong> This assumes the current char has already been
1585     * identified as a high surrogate.
1586     *
1587     * @param buf The StringBuffer to append the read surrogates to.
1588     * @returns True if it succeeded.
1589     */

1590    private boolean scanSurrogates(XMLStringBuffer buf)
1591        throws IOException JavaDoc, JasperException {
1592
1593        int high = scanChar();
1594        int low = peekChar();
1595        if (!XMLChar.isLowSurrogate(low)) {
1596            err.jspError("jsp.error.xml.invalidCharInContent",
1597             Integer.toString(high, 16));
1598            return false;
1599        }
1600        scanChar();
1601
1602        // convert surrogates to supplemental character
1603
int c = XMLChar.supplemental((char)high, (char)low);
1604
1605        // supplemental character must be a valid XML character
1606
if (!XMLChar.isValid(c)) {
1607            err.jspError("jsp.error.xml.invalidCharInContent",
1608             Integer.toString(c, 16));
1609            return false;
1610        }
1611
1612        // fill in the buffer
1613
buf.append((char)high);
1614        buf.append((char)low);
1615
1616        return true;
1617
1618    }
1619
1620    // Adapted from:
1621
// org.apache.xerces.impl.XMLScanner.reportFatalError
1622
/**
1623     * Convenience function used in all XML scanners.
1624     */

1625    private void reportFatalError(String JavaDoc msgId, String JavaDoc arg)
1626                throws JasperException {
1627        err.jspError(msgId, arg);
1628    }
1629
1630}
1631
1632
1633
Popular Tags