KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xerces > readers > UTF8Reader


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.enhydra.apache.xerces.readers;
59
60 import java.io.InputStream JavaDoc;
61
62 import org.enhydra.apache.xerces.framework.XMLErrorReporter;
63 import org.enhydra.apache.xerces.utils.QName;
64 import org.enhydra.apache.xerces.utils.StringPool;
65 import org.enhydra.apache.xerces.utils.SymbolCache;
66 import org.enhydra.apache.xerces.utils.UTF8DataChunk;
67 import org.enhydra.apache.xerces.utils.XMLCharacterProperties;
68
69 /**
70  * This is the primary reader used for UTF-8 encoded byte streams.
71  * <p>
72  * This reader processes requests from the scanners against the
73  * underlying UTF-8 byte stream, avoiding when possible any up-front
74  * transcoding. When the StringPool handle interfaces are used,
75  * the information in the data stream will be added to the string
76  * pool and lazy-evaluated until asked for.
77  * <p>
78  * We use the SymbolCache to match expected names (element types in
79  * end tags) and walk the data structures of that class directly.
80  * <p>
81  * There is a significant amount of hand-inlining and some blatant
82  * voilation of good object oriented programming rules, ignoring
83  * boundaries of modularity, etc., in the name of good performance.
84  * <p>
85  * There are also some places where the code here frequently crashes
86  * the SUN java runtime compiler (JIT) and the code here has been
87  * carefully "crafted" to avoid those problems.
88  *
89  * @version $Id: UTF8Reader.java,v 1.2 2005/01/26 08:28:44 jkjome Exp $
90  */

91 final class UTF8Reader extends XMLEntityReader {
92     //
93
//
94
//
95
private final static boolean USE_OUT_OF_LINE_LOAD_NEXT_BYTE = false;
96     private final static boolean USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE = true;
97     //
98
//
99
//
100
public UTF8Reader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, InputStream JavaDoc dataStream, StringPool stringPool) throws Exception JavaDoc {
101         super(entityHandler, errorReporter, sendCharDataAsCharArray);
102         fInputStream = dataStream;
103         fStringPool = stringPool;
104         fCharArrayRange = fStringPool.createCharArrayRange();
105         fCurrentChunk = UTF8DataChunk.createChunk(fStringPool, null);
106         fillCurrentChunk();
107     }
108     /**
109      *
110      */

111     public int addString(int offset, int length) {
112         if (length == 0)
113             return 0;
114         return fCurrentChunk.addString(offset, length);
115     }
116     /**
117      *
118      */

119     public int addSymbol(int offset, int length) {
120         if (length == 0)
121             return 0;
122         return fCurrentChunk.addSymbol(offset, length, 0);
123     }
124     /**
125      *
126      */

127     private int addSymbol(int offset, int length, int hashcode) {
128         if (length == 0)
129             return 0;
130         return fCurrentChunk.addSymbol(offset, length, hashcode);
131     }
132     /**
133      *
134      */

135     public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) {
136         fCurrentChunk.append(charBuffer, offset, length);
137     }
138     //
139
//
140
//
141
private int slowLoadNextByte() throws Exception JavaDoc {
142         fCallClearPreviousChunk = true;
143         if (fCurrentChunk.nextChunk() != null) {
144             fCurrentChunk = fCurrentChunk.nextChunk();
145             fCurrentIndex = 0;
146             fMostRecentData = fCurrentChunk.toByteArray();
147             return(fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
148         } else {
149             fCurrentChunk = UTF8DataChunk.createChunk(fStringPool, fCurrentChunk);
150             return fillCurrentChunk();
151         }
152     }
153     private int loadNextByte() throws Exception JavaDoc {
154         fCurrentOffset++;
155         if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
156             fCurrentIndex++;
157             try {
158                 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
159                 return fMostRecentByte;
160             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
161                 return slowLoadNextByte();
162             }
163         } else {
164             if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
165                 return slowLoadNextByte();
166             else
167                 return(fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
168         }
169     }
170     //
171
//
172
//
173
private boolean atEOF(int offset) {
174         return(offset > fLength);
175     }
176     //
177
//
178
//
179
public XMLEntityHandler.EntityReader changeReaders() throws Exception JavaDoc {
180         XMLEntityHandler.EntityReader nextReader = super.changeReaders();
181         fCurrentChunk.releaseChunk();
182         fCurrentChunk = null;
183         fMostRecentData = null;
184         fMostRecentByte = 0;
185         return nextReader;
186     }
187     //
188
//
189
//
190
public boolean lookingAtChar(char ch, boolean skipPastChar) throws Exception JavaDoc {
191         int b0 = fMostRecentByte;
192         if (b0 != ch) {
193             if (b0 == 0) {
194                 if (atEOF(fCurrentOffset + 1)) {
195                     return changeReaders().lookingAtChar(ch, skipPastChar);
196                 }
197             }
198             if (ch == 0x0A && b0 == 0x0D) {
199                 if (skipPastChar) {
200                     fCarriageReturnCounter++;
201                     fCharacterCounter = 1;
202                     if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
203                         b0 = loadNextByte();
204                     } else {
205                         fCurrentOffset++;
206                         if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
207                             fCurrentIndex++;
208                             try {
209                                 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
210                                 b0 = fMostRecentByte;
211                             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
212                                 b0 = slowLoadNextByte();
213                             }
214                         } else {
215                             if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
216                                 b0 = slowLoadNextByte();
217                             else
218                                 b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
219                         }
220                     }
221                     if (b0 == 0x0A) {
222                         fLinefeedCounter++;
223                         if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
224                             loadNextByte();
225                         } else {
226                             fCurrentOffset++;
227                             if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
228                                 fCurrentIndex++;
229                                 try {
230                                     fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
231                                 } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
232                                     slowLoadNextByte();
233                                 }
234                             } else {
235                                 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
236                                     slowLoadNextByte();
237                                 else
238                                     fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
239                             }
240                         }
241                     }
242                 }
243                 return true;
244             }
245             return false;
246         }
247         if (ch == 0x0D)
248             return false;
249         if (skipPastChar) {
250             fCharacterCounter++;
251             if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
252                 loadNextByte();
253             } else {
254                 fCurrentOffset++;
255                 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
256                     fCurrentIndex++;
257                     try {
258                         fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
259                     } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
260                         slowLoadNextByte();
261                     }
262                 } else {
263                     if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
264                         slowLoadNextByte();
265                     else
266                         fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
267                 }
268             }
269         }
270         return true;
271     }
272     //
273
//
274
//
275
public boolean lookingAtValidChar(boolean skipPastChar) throws Exception JavaDoc {
276         int b0 = fMostRecentByte;
277         if (b0 < 0x80) { // 0xxxxxxx
278
if (b0 >= 0x20 || b0 == 0x09) {
279                 if (skipPastChar) {
280                     fCharacterCounter++;
281                     if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
282                         loadNextByte();
283                     } else {
284                         fCurrentOffset++;
285                         if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
286                             fCurrentIndex++;
287                             try {
288                                 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
289                             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
290                                 slowLoadNextByte();
291                             }
292                         } else {
293                             if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
294                                 slowLoadNextByte();
295                             else
296                                 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
297                         }
298                     }
299                 }
300                 return true;
301             }
302             if (b0 == 0x0A) {
303                 if (skipPastChar) {
304                     fLinefeedCounter++;
305                     fCharacterCounter = 1;
306                     if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
307                         loadNextByte();
308                     } else {
309                         fCurrentOffset++;
310                         if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
311                             fCurrentIndex++;
312                             try {
313                                 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
314                             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
315                                 slowLoadNextByte();
316                             }
317                         } else {
318                             if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
319                                 slowLoadNextByte();
320                             else
321                                 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
322                         }
323                     }
324                 }
325                 return true;
326             }
327             if (b0 == 0x0D) {
328                 if (skipPastChar) {
329                     fCarriageReturnCounter++;
330                     fCharacterCounter = 1;
331                     if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
332                         b0 = loadNextByte();
333                     } else {
334                         fCurrentOffset++;
335                         if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
336                             fCurrentIndex++;
337                             try {
338                                 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
339                                 b0 = fMostRecentByte;
340                             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
341                                 b0 = slowLoadNextByte();
342                             }
343                         } else {
344                             if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
345                                 b0 = slowLoadNextByte();
346                             else
347                                 b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
348                         }
349                     }
350                     if (b0 == 0x0A) {
351                         fLinefeedCounter++;
352                         if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
353                             loadNextByte();
354                         } else {
355                             fCurrentOffset++;
356                             if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
357                                 fCurrentIndex++;
358                                 try {
359                                     fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
360                                 } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
361                                     slowLoadNextByte();
362                                 }
363                             } else {
364                                 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
365                                     slowLoadNextByte();
366                                 else
367                                     fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
368                             }
369                         }
370                     }
371                 }
372                 return true;
373             }
374             if (b0 == 0) {
375                 if (atEOF(fCurrentOffset + 1)) {
376                     return changeReaders().lookingAtValidChar(skipPastChar);
377                 }
378             }
379             return false;
380         }
381         //
382
// REVISIT - optimize this with in-buffer lookahead.
383
//
384
UTF8DataChunk saveChunk = fCurrentChunk;
385         int saveIndex = fCurrentIndex;
386         int saveOffset = fCurrentOffset;
387         int b1 = loadNextByte();
388         if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx (0x80 to 0x7ff)
389
if (skipPastChar) {
390                 fCharacterCounter++;
391                 loadNextByte();
392             } else {
393                 fCurrentChunk = saveChunk;
394                 fCurrentIndex = saveIndex;
395                 fCurrentOffset = saveOffset;
396                 fMostRecentData = saveChunk.toByteArray();
397                 fMostRecentByte = b0;
398             }
399             return true; // [#x20-#xD7FF]
400
}
401         int b2 = loadNextByte();
402         if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
403
// ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
404
// if (!((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE))
405
// if ((ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD))
406
boolean result = false;
407             if (!((b0 == 0xED && b1 >= 0xA0) || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE))) { // [#x20-#xD7FF] | [#xE000-#xFFFD]
408
if (skipPastChar) {
409                     fCharacterCounter++;
410                     loadNextByte();
411                     return true;
412                 }
413                 result = true;
414             }
415             fCurrentChunk = saveChunk;
416             fCurrentIndex = saveIndex;
417             fCurrentOffset = saveOffset;
418             fMostRecentData = saveChunk.toByteArray();
419             fMostRecentByte = b0;
420             return result;
421         }
422         int b3 = loadNextByte(); // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
423
// ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3); // u uuuu zzzz yyyy yyxx xxxx (0x10000 to 0x1ffff)
424
// if (ch >= 0x110000)
425
boolean result = false;
426
427         //if (( 0xf8 & b0 ) == 0xf0 ) {
428
//if (!(b0 > 0xF4 || (b0 == 0xF4 && b1 >= 0x90))) { // [#x10000-#x10FFFF]
429
if ( ((b0&0xf8) == 0xf0) && ((b1&0xc0)==0x80) &&
430              ((b2&0xc0) == 0x80) && ((b3&0xc0)==0x80)){
431             if (!(b0 > 0xF4 || (b0 == 0xF4 && b1 >= 0x90))) { // [#x10000-#x10FFFF]
432

433                 if (skipPastChar) {
434                     fCharacterCounter++;
435                     loadNextByte();
436                     return true;
437                 }
438                 result = true;
439             }
440             fCurrentChunk = saveChunk;
441             fCurrentIndex = saveIndex;
442             fCurrentOffset = saveOffset;
443             fMostRecentData = saveChunk.toByteArray();
444             fMostRecentByte = b0;
445             return result;
446         } else{
447             fCurrentChunk = saveChunk;
448             fCurrentIndex = saveIndex;
449             fCurrentOffset = saveOffset;
450             fMostRecentData = saveChunk.toByteArray();
451             fMostRecentByte = b0;
452             return result;
453         }
454     }
455     //
456
//
457
//
458
public boolean lookingAtSpace(boolean skipPastChar) throws Exception JavaDoc {
459         int ch = fMostRecentByte;
460         if (ch > 0x20)
461             return false;
462         if (ch == 0x20 || ch == 0x09) {
463             if (!skipPastChar)
464                 return true;
465             fCharacterCounter++;
466         } else if (ch == 0x0A) {
467             if (!skipPastChar)
468                 return true;
469             fLinefeedCounter++;
470             fCharacterCounter = 1;
471         } else if (ch == 0x0D) {
472             if (!skipPastChar)
473                 return true;
474             fCarriageReturnCounter++;
475             fCharacterCounter = 1;
476             if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
477                 ch = loadNextByte();
478             } else {
479                 fCurrentOffset++;
480                 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
481                     fCurrentIndex++;
482                     try {
483                         fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
484                         ch = fMostRecentByte;
485                     } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
486                         ch = slowLoadNextByte();
487                     }
488                 } else {
489                     if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
490                         ch = slowLoadNextByte();
491                     else
492                         ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
493                 }
494             }
495             if (ch != 0x0A)
496                 return true;
497             fLinefeedCounter++;
498         } else {
499             if (ch == 0) { // REVISIT - should we be checking this here ?
500
if (atEOF(fCurrentOffset + 1)) {
501                     return changeReaders().lookingAtSpace(skipPastChar);
502                 }
503             }
504             return false;
505         }
506         if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
507             loadNextByte();
508         } else {
509             fCurrentOffset++;
510             if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
511                 fCurrentIndex++;
512                 try {
513                     fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
514                 } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
515                     slowLoadNextByte();
516                 }
517             } else {
518                 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
519                     slowLoadNextByte();
520                 else
521                     fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
522             }
523         }
524         return true;
525     }
526     //
527
//
528
//
529
public void skipToChar(char ch) throws Exception JavaDoc {
530         //
531
// REVISIT - this will skip invalid characters without reporting them.
532
//
533
int b0 = fMostRecentByte;
534         while (true) {
535             if (b0 == ch) // ch will always be an ascii character
536
return;
537             if (b0 == 0) {
538                 if (atEOF(fCurrentOffset + 1)) {
539                     changeReaders().skipToChar(ch);
540                     return;
541                 }
542                 fCharacterCounter++;
543             } else if (b0 == 0x0A) {
544                 fLinefeedCounter++;
545                 fCharacterCounter = 1;
546             } else if (b0 == 0x0D) {
547                 fCarriageReturnCounter++;
548                 fCharacterCounter = 1;
549                 b0 = loadNextByte();
550                 if (b0 != 0x0A)
551                     continue;
552                 fLinefeedCounter++;
553             } else if (b0 < 0x80) { // 0xxxxxxx
554
fCharacterCounter++;
555             } else {
556                 fCharacterCounter++;
557                 if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
558
loadNextByte();
559                 } else if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
560
loadNextByte();
561                     loadNextByte();
562                 } else { // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
563
loadNextByte();
564                     loadNextByte();
565                     loadNextByte();
566                 }
567             }
568             b0 = loadNextByte();
569         }
570     }
571     //
572
//
573
//
574
public void skipPastSpaces() throws Exception JavaDoc {
575         int ch = fMostRecentByte;
576         while (true) {
577             if (ch == 0x20 || ch == 0x09) {
578                 fCharacterCounter++;
579             } else if (ch == 0x0A) {
580                 fLinefeedCounter++;
581                 fCharacterCounter = 1;
582             } else if (ch == 0x0D) {
583                 fCarriageReturnCounter++;
584                 fCharacterCounter = 1;
585                 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
586                     ch = loadNextByte();
587                 } else {
588                     fCurrentOffset++;
589                     if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
590                         fCurrentIndex++;
591                         try {
592                             fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
593                             ch = fMostRecentByte;
594                         } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
595                             ch = slowLoadNextByte();
596                         }
597                     } else {
598                         if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
599                             ch = slowLoadNextByte();
600                         else
601                             ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
602                     }
603                 }
604                 if (ch != 0x0A)
605                     continue;
606                 fLinefeedCounter++;
607             } else {
608                 if (ch == 0 && atEOF(fCurrentOffset + 1))
609                     changeReaders().skipPastSpaces();
610                 return;
611             }
612             if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
613                 ch = loadNextByte();
614             } else {
615                 fCurrentOffset++;
616                 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
617                     fCurrentIndex++;
618                     try {
619                         fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
620                         ch = fMostRecentByte;
621                     } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
622                         ch = slowLoadNextByte();
623                     }
624                 } else {
625                     if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
626                         ch = slowLoadNextByte();
627                     else
628                         ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
629                 }
630             }
631         }
632     }
633     //
634
//
635
//
636
protected boolean skippedMultiByteCharWithFlag(int b0, int flag) throws Exception JavaDoc {
637         UTF8DataChunk saveChunk = fCurrentChunk;
638         int saveOffset = fCurrentOffset;
639         int saveIndex = fCurrentIndex;
640         if (!fCalledCharPropInit) {
641             XMLCharacterProperties.initCharFlags();
642             fCalledCharPropInit = true;
643         }
644         int b1 = loadNextByte();
645         if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
646
if ((XMLCharacterProperties.fgCharFlags[((0x1f & b0)<<6) + (0x3f & b1)] & flag) == 0) { // yyy yyxx xxxx (0x80 to 0x7ff)
647
fCurrentChunk = saveChunk;
648                 fCurrentIndex = saveIndex;
649                 fCurrentOffset = saveOffset;
650                 fMostRecentData = saveChunk.toByteArray();
651                 fMostRecentByte = b0;
652                 return false;
653             }
654             return true;
655         }
656         int b2 = loadNextByte();
657         if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
658
// if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
659
if ((b0 == 0xED && b1 >= 0xA0) || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
660                 fCurrentChunk = saveChunk;
661                 fCurrentIndex = saveIndex;
662                 fCurrentOffset = saveOffset;
663                 fMostRecentData = saveChunk.toByteArray();
664                 fMostRecentByte = b0;
665                 return false;
666             }
667             if ((XMLCharacterProperties.fgCharFlags[((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2)] & flag) == 0) { // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
668
fCurrentChunk = saveChunk;
669                 fCurrentIndex = saveIndex;
670                 fCurrentOffset = saveOffset;
671                 fMostRecentData = saveChunk.toByteArray();
672                 fMostRecentByte = b0;
673                 return false;
674             }
675             return true;
676         } else { // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
677
fCurrentChunk = saveChunk;
678             fCurrentIndex = saveIndex;
679             fCurrentOffset = saveOffset;
680             fMostRecentData = saveChunk.toByteArray();
681             fMostRecentByte = b0;
682             return false;
683         }
684     }
685     public void skipPastName(char fastcheck) throws Exception JavaDoc {
686         int b0 = fMostRecentByte;
687         if (b0 < 0x80) {
688             if (XMLCharacterProperties.fgAsciiInitialNameChar[b0] == 0)
689                 return;
690         } else {
691             if (!fCalledCharPropInit) {
692                 XMLCharacterProperties.initCharFlags();
693                 fCalledCharPropInit = true;
694             }
695             if (!skippedMultiByteCharWithFlag(b0, XMLCharacterProperties.E_InitialNameCharFlag))
696                 return;
697         }
698         while (true) {
699             fCharacterCounter++;
700             b0 = loadNextByte();
701             if (fastcheck == b0)
702                 return;
703             if (b0 < 0x80) {
704                 if (XMLCharacterProperties.fgAsciiNameChar[b0] == 0)
705                     return;
706             } else {
707                 if (!fCalledCharPropInit) {
708                     XMLCharacterProperties.initCharFlags();
709                     fCalledCharPropInit = true;
710                 }
711                 if (!skippedMultiByteCharWithFlag(b0, XMLCharacterProperties.E_NameCharFlag))
712                     return;
713             }
714         }
715     }
716     //
717
//
718
//
719
public void skipPastNmtoken(char fastcheck) throws Exception JavaDoc {
720         int b0 = fMostRecentByte;
721         while (true) {
722             if (fastcheck == b0)
723                 return;
724             if (b0 < 0x80) {
725                 if (XMLCharacterProperties.fgAsciiNameChar[b0] == 0)
726                     return;
727             } else {
728                 if (!skippedMultiByteCharWithFlag(b0, XMLCharacterProperties.E_NameCharFlag))
729                     return;
730             }
731             fCharacterCounter++;
732             b0 = loadNextByte();
733         }
734     }
735     //
736
//
737
//
738
public boolean skippedString(char[] s) throws Exception JavaDoc {
739         int length = s.length;
740         byte[] data = fMostRecentData;
741         int index = fCurrentIndex + length;
742         int sindex = length;
743         try {
744             while (sindex-- > 0) {
745                 if (data[--index] != s[sindex])
746                     return false;
747             }
748             fCurrentIndex += length;
749         } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
750             int i = 0;
751             index = fCurrentIndex;
752             while (index < UTF8DataChunk.CHUNK_SIZE) {
753                 if (data[index++] != s[i++])
754                     return false;
755             }
756             UTF8DataChunk dataChunk = fCurrentChunk;
757             int savedOffset = fCurrentOffset;
758             int savedIndex = fCurrentIndex;
759             slowLoadNextByte();
760             data = fMostRecentData;
761             index = 0;
762             while (i < length) {
763                 if (data[index++] != s[i++]) {
764                     fCurrentChunk = dataChunk;
765                     fCurrentIndex = savedIndex;
766                     fCurrentOffset = savedOffset;
767                     fMostRecentData = fCurrentChunk.toByteArray();
768                     fMostRecentByte = fMostRecentData[savedIndex] & 0xFF;
769                     return false;
770                 }
771             }
772             fCurrentIndex = index;
773         }
774         fCharacterCounter += length;
775         fCurrentOffset += length;
776         try {
777             fMostRecentByte = data[fCurrentIndex] & 0xFF;
778         } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
779             slowLoadNextByte();
780         }
781         return true;
782     }
783     //
784
//
785
//
786
public int scanInvalidChar() throws Exception JavaDoc {
787         int b0 = fMostRecentByte;
788         int ch = b0;
789         if (ch == 0x0A) {
790             fLinefeedCounter++;
791             fCharacterCounter = 1;
792         } else if (ch == 0x0D) {
793             fCarriageReturnCounter++;
794             fCharacterCounter = 1;
795             ch = loadNextByte();
796             if (ch != 0x0A)
797                 return 0x0A;
798             fLinefeedCounter++;
799         } else if (ch == 0) {
800             if (atEOF(fCurrentOffset + 1)) {
801                 return changeReaders().scanInvalidChar();
802             }
803             fCharacterCounter++;
804         } else if (b0 >= 0x80) {
805             fCharacterCounter++;
806             int b1 = loadNextByte();
807             int b2 = 0;
808             if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
809
ch = ((0x1f & b0)<<6) + (0x3f & b1);
810             } else if ( (0xf0 & b0) == 0xe0 ) {
811                 b2 = loadNextByte();
812                 ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
813             } else if (( 0xf8 & b0 ) == 0xf0 ){
814                 b2 = loadNextByte();
815                 int b3 = loadNextByte(); // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
816
ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12)
817                      + ((0x3f & b2)<<6) + (0x3f & b3);
818             }
819         }
820         loadNextByte();
821         return ch;
822     }
823     //
824
//
825
//
826
public int scanCharRef(boolean hex) throws Exception JavaDoc {
827         int ch = fMostRecentByte;
828         if (ch == 0) {
829             if (atEOF(fCurrentOffset + 1)) {
830                 return changeReaders().scanCharRef(hex);
831             }
832             return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
833         }
834         int num = 0;
835         if (hex) {
836             if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
837                 return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
838             num = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
839         } else {
840             if (ch < '0' || ch > '9')
841                 return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
842             num = ch - '0';
843         }
844         fCharacterCounter++;
845         loadNextByte();
846         boolean toobig = false;
847         while (true) {
848             ch = fMostRecentByte;
849             if (ch == 0)
850                 break;
851             if (hex) {
852                 if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
853                     break;
854             } else {
855                 if (ch < '0' || ch > '9')
856                     break;
857             }
858             fCharacterCounter++;
859             loadNextByte();
860             if (hex) {
861                 int dig = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
862                 num = (num << 4) + dig;
863             } else {
864                 int dig = ch - '0';
865                 num = (num * 10) + dig;
866             }
867             if (num > 0x10FFFF) {
868                 toobig = true;
869                 num = 0;
870             }
871         }
872         if (ch != ';')
873             return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED;
874         fCharacterCounter++;
875         loadNextByte();
876         if (toobig)
877             return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE;
878         return num;
879     }
880     //
881
//
882
//
883
public int scanStringLiteral() throws Exception JavaDoc {
884         boolean single;
885         if (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) {
886             return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
887         }
888         int offset = fCurrentOffset;
889         char qchar = single ? '\'' : '\"';
890         while (!lookingAtChar(qchar, false)) {
891             if (!lookingAtValidChar(true)) {
892                 return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR;
893             }
894         }
895         int stringIndex = fCurrentChunk.addString(offset, fCurrentOffset - offset);
896         lookingAtChar(qchar, true); // move past qchar
897
return stringIndex;
898     }
899     //
900
// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
901
// | "'" ([^<&'] | Reference)* "'"
902
//
903
// The values in the following table are defined as:
904
//
905
// 0 - not special
906
// 1 - quote character
907
// 2 - complex
908
// 3 - less than
909
// 4 - invalid
910
//
911
public static final byte fgAsciiAttValueChar[] = {
912         4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // tab is 0x09, LF is 0x0A, CR is 0x0D
913
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
914         0, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, // '\"' is 0x22, '&' is 0x26, '\'' is 0x27
915
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, // '<' is 0x3C
916
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
917         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
918         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
919         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
920     };
921     public int scanAttValue(char qchar, boolean asSymbol) throws Exception JavaDoc
922     {
923         int offset = fCurrentOffset;
924         int b0 = fMostRecentByte;
925         while (true) {
926             if (b0 < 0x80) {
927                 switch (fgAsciiAttValueChar[b0]) {
928                 case 1: // quote char
929
if (b0 == qchar) {
930                         int length = fCurrentOffset - offset;
931                         int result = length == 0 ? StringPool.EMPTY_STRING : (asSymbol ? fCurrentChunk.addSymbol(offset, length, 0) : fCurrentChunk.addString(offset, length));
932                         fCharacterCounter++;
933                         if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
934                             loadNextByte();
935                         } else {
936                             fCurrentOffset++;
937                             if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
938                                 fCurrentIndex++;
939                                 try {
940                                     fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
941                                 } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
942                                     slowLoadNextByte();
943                                 }
944                             } else {
945                                 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
946                                     slowLoadNextByte();
947                                 else
948                                     fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
949                             }
950                         }
951                         return result;
952                     }
953                     // the other quote character is not special
954
// fall through
955
case 0: // non-special char
956
fCharacterCounter++;
957                     if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
958                         b0 = loadNextByte();
959                     } else {
960                         fCurrentOffset++;
961                         if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
962                             fCurrentIndex++;
963                             try {
964                                 b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
965                             } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
966                                 b0 = slowLoadNextByte();
967                             }
968                         } else {
969                             if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
970                                 b0 = slowLoadNextByte();
971                             else
972                                 b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
973                         }
974                     }
975                     continue;
976                 case 2: // complex
977
return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
978                 case 3: // less than
979
return XMLEntityHandler.ATTVALUE_RESULT_LESSTHAN;
980                 case 4: // invalid
981
return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR;
982                 }
983             } else {
984                 if (!skipMultiByteCharData(b0))
985                     return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR;
986                 b0 = fMostRecentByte;
987             }
988         }
989     }
990     //
991
// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
992
// | "'" ([^%&'] | PEReference | Reference)* "'"
993
//
994
// The values in the following table are defined as:
995
//
996
// 0 - not special
997
// 1 - quote character
998
// 2 - reference
999
// 3 - peref
1000
// 4 - invalid
1001
// 5 - linefeed
1002
// 6 - carriage-return
1003
// 7 - end of input
1004
//
1005
public static final byte fgAsciiEntityValueChar[] = {
1006        7, 4, 4, 4, 4, 4, 4, 4, 4, 0, 5, 4, 4, 6, 4, 4, // tab is 0x09, LF is 0x0A, CR is 0x0D
1007
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1008        0, 0, 1, 0, 0, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, // '\"', '%', '&', '\''
1009
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1010        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1011        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1012        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1013        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1014    };
1015    public int scanEntityValue(int qchar, boolean createString) throws Exception JavaDoc
1016    {
1017        int offset = fCurrentOffset;
1018        int b0 = fMostRecentByte;
1019        while (true) {
1020            if (b0 < 0x80) {
1021                switch (fgAsciiEntityValueChar[b0]) {
1022                case 1: // quote char
1023
if (b0 == qchar) {
1024                        if (!createString)
1025                            return XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED;
1026                        int length = fCurrentOffset - offset;
1027                        int result = length == 0 ? StringPool.EMPTY_STRING : fCurrentChunk.addString(offset, length);
1028                        fCharacterCounter++;
1029                        if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1030                            loadNextByte();
1031                        } else {
1032                            fCurrentOffset++;
1033                            if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1034                                fCurrentIndex++;
1035                                try {
1036                                    fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
1037                                } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1038                                    slowLoadNextByte();
1039                                }
1040                            } else {
1041                                if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1042                                    slowLoadNextByte();
1043                                else
1044                                    fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
1045                            }
1046                        }
1047                        return result;
1048                    }
1049                    // the other quote character is not special
1050
// fall through
1051
case 0: // non-special char
1052
fCharacterCounter++;
1053                    if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1054                        b0 = loadNextByte();
1055                    } else {
1056                        fCurrentOffset++;
1057                        if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1058                            fCurrentIndex++;
1059                            try {
1060                                b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1061                            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1062                                b0 = slowLoadNextByte();
1063                            }
1064                        } else {
1065                            if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1066                                b0 = slowLoadNextByte();
1067                            else
1068                                b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1069                        }
1070                    }
1071                    continue;
1072                case 5: // linefeed
1073
fLinefeedCounter++;
1074                    fCharacterCounter = 1;
1075                    if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1076                        b0 = loadNextByte();
1077                    } else {
1078                        fCurrentOffset++;
1079                        if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1080                            fCurrentIndex++;
1081                            try {
1082                                b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1083                            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1084                                b0 = slowLoadNextByte();
1085                            }
1086                        } else {
1087                            if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1088                                b0 = slowLoadNextByte();
1089                            else
1090                                b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1091                        }
1092                    }
1093                    continue;
1094                case 6: // carriage-return
1095
fCarriageReturnCounter++;
1096                    fCharacterCounter = 1;
1097                    if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1098                        b0 = loadNextByte();
1099                    } else {
1100                        fCurrentOffset++;
1101                        if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1102                            fCurrentIndex++;
1103                            try {
1104                                b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1105                            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1106                                b0 = slowLoadNextByte();
1107                            }
1108                        } else {
1109                            if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1110                                b0 = slowLoadNextByte();
1111                            else
1112                                b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1113                        }
1114                    }
1115                    if (b0 != 0x0A) {
1116                        continue;
1117                    }
1118                    fLinefeedCounter++;
1119                    if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1120                        b0 = loadNextByte();
1121                    } else {
1122                        fCurrentOffset++;
1123                        if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1124                            fCurrentIndex++;
1125                            try {
1126                                b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1127                            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1128                                b0 = slowLoadNextByte();
1129                            }
1130                        } else {
1131                            if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1132                                b0 = slowLoadNextByte();
1133                            else
1134                                b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1135                        }
1136                    }
1137                    continue;
1138                case 2: // reference
1139
return XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE;
1140                case 3: // peref
1141
return XMLEntityHandler.ENTITYVALUE_RESULT_PEREF;
1142                case 7:
1143                    if (atEOF(fCurrentOffset + 1)) {
1144                        changeReaders(); // do not call next reader, our caller may need to change the parameters
1145
return XMLEntityHandler.ENTITYVALUE_RESULT_END_OF_INPUT;
1146                    }
1147                    // fall into...
1148
case 4: // invalid
1149
return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR;
1150                }
1151            } else {
1152                if (!skipMultiByteCharData(b0))
1153                    return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR;
1154                b0 = fMostRecentByte;
1155            }
1156        }
1157    }
1158    //
1159
//
1160
//
1161
public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws Exception JavaDoc {
1162        char[] expected = expectedName.chars;
1163        int offset = expectedName.offset;
1164        int len = expectedName.length;
1165        int b0 = fMostRecentByte;
1166        int ch = 0;
1167        int i = 0;
1168        while (true) {
1169            if (b0 < 0x80) {
1170                ch = b0;
1171                if (i == len)
1172                    break;
1173                if (ch != expected[offset]) {
1174                    skipPastNmtoken(fastcheck);
1175                    return false;
1176                }
1177            } else {
1178                //
1179
// REVISIT - optimize this with in-buffer lookahead.
1180
//
1181
UTF8DataChunk saveChunk = fCurrentChunk;
1182                int saveIndex = fCurrentIndex;
1183                int saveOffset = fCurrentOffset;
1184                int b1;
1185                if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1186                    b1 = loadNextByte();
1187                } else {
1188                    fCurrentOffset++;
1189                    if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1190                        fCurrentIndex++;
1191                        try {
1192                            b1 = fMostRecentData[fCurrentIndex] & 0xFF;
1193                        } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1194                            b1 = slowLoadNextByte();
1195                        }
1196                    } else {
1197                        if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1198                            b1 = slowLoadNextByte();
1199                        else
1200                            b1 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1201                    }
1202                }
1203                if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
1204
ch = ((0x1f & b0)<<6) + (0x3f & b1);
1205                    if (i == len)
1206                        break;
1207                    if (ch != expected[offset]) {
1208                        fCurrentChunk = saveChunk;
1209                        fCurrentIndex = saveIndex;
1210                        fCurrentOffset = saveOffset;
1211                        fMostRecentData = saveChunk.toByteArray();
1212                        fMostRecentByte = b0;
1213                        skipPastNmtoken(fastcheck);
1214                        return false;
1215                    }
1216                } else {
1217                    int b2;
1218                    if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1219                        b2 = loadNextByte();
1220                    } else {
1221                        fCurrentOffset++;
1222                        if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1223                            fCurrentIndex++;
1224                            try {
1225                                b2 = fMostRecentData[fCurrentIndex] & 0xFF;
1226                            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1227                                b2 = slowLoadNextByte();
1228                            }
1229                        } else {
1230                            if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1231                                b2 = slowLoadNextByte();
1232                            else
1233                                b2 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1234                        }
1235                    }
1236                    if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
1237
// if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
1238
if ((b0 == 0xED && b1 >= 0xA0) || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
1239                            fCurrentChunk = saveChunk;
1240                            fCurrentIndex = saveIndex;
1241                            fCurrentOffset = saveOffset;
1242                            fMostRecentData = saveChunk.toByteArray();
1243                            fMostRecentByte = b0;
1244                            return false;
1245                        }
1246                        ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
1247                        if (i == len)
1248                            break;
1249                        if (ch != expected[offset]) {
1250                            fCurrentChunk = saveChunk;
1251                            fCurrentIndex = saveIndex;
1252                            fCurrentOffset = saveOffset;
1253                            fMostRecentData = saveChunk.toByteArray();
1254                            fMostRecentByte = b0;
1255                            skipPastNmtoken(fastcheck);
1256                            return false;
1257                        }
1258                    } else { // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
1259
fCurrentChunk = saveChunk;
1260                        fCurrentIndex = saveIndex;
1261                        fCurrentOffset = saveOffset;
1262                        fMostRecentData = saveChunk.toByteArray();
1263                        fMostRecentByte = b0;
1264                        return false;
1265                    }
1266                }
1267            }
1268            i++;
1269            offset++;
1270            fCharacterCounter++;
1271            fCurrentOffset++;
1272            if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1273                fCurrentIndex++;
1274                try {
1275                    b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1276                } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1277                    b0 = slowLoadNextByte();
1278                }
1279            } else {
1280                if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1281                    b0 = slowLoadNextByte();
1282                else
1283                    b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1284            }
1285        }
1286        if (ch == fastcheck)
1287            return true;
1288        if (ch < 0x80) {
1289            if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
1290                return true;
1291        } else {
1292            if (!fCalledCharPropInit) {
1293                XMLCharacterProperties.initCharFlags();
1294                fCalledCharPropInit = true;
1295            }
1296            if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
1297                return true;
1298        }
1299        skipPastNmtoken(fastcheck);
1300        return false;
1301    }
1302
1303    public void scanQName(char fastcheck, QName qname) throws Exception JavaDoc {
1304        int offset = fCurrentOffset;
1305        int ch = fMostRecentByte;
1306        if (ch < 0x80) {
1307            if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) {
1308                qname.clear();
1309                return;
1310            }
1311            if (ch == ':') {
1312                qname.clear();
1313                return;
1314            }
1315        } else {
1316            if (!fCalledCharPropInit) {
1317                XMLCharacterProperties.initCharFlags();
1318                fCalledCharPropInit = true;
1319            }
1320            ch = getMultiByteSymbolChar(ch);
1321            fCurrentIndex--;
1322            fCurrentOffset--;
1323            if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) {
1324                qname.clear();
1325                return;
1326            }
1327        }
1328        int index = fCurrentIndex;
1329        byte[] data = fMostRecentData;
1330        int prefixend = -1;
1331        while (true) {
1332            fCharacterCounter++;
1333            fCurrentOffset++;
1334            index++;
1335            try {
1336                ch = data[index] & 0xFF;
1337            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1338                ch = slowLoadNextByte();
1339                index = 0;
1340                data = fMostRecentData;
1341            }
1342            if (fastcheck == ch)
1343                break;
1344            if (ch < 0x80) {
1345                if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
1346                    break;
1347                if (ch == ':') {
1348                    if (prefixend != -1)
1349                        break;
1350                    prefixend = fCurrentOffset;
1351                    //
1352
// We need to peek ahead one character. If the next character is not a
1353
// valid initial name character, or is another colon, then we cannot meet
1354
// both the Prefix and LocalPart productions for the QName production,
1355
// which means that there is no Prefix and we need to terminate the QName
1356
// at the first colon.
1357
//
1358
try {
1359                        ch = data[index + 1] & 0xFF;
1360                    } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1361                        UTF8DataChunk savedChunk = fCurrentChunk;
1362                        int savedOffset = fCurrentOffset;
1363                        ch = slowLoadNextByte();
1364                        fCurrentChunk = savedChunk;
1365                        fCurrentOffset = savedOffset;
1366                        fMostRecentData = fCurrentChunk.toByteArray();
1367                    }
1368                    boolean lpok = true;
1369                    if (ch < 0x80) {
1370                        if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0 || ch == ':')
1371                            lpok = false;
1372                    } else {
1373                        if (!fCalledCharPropInit) {
1374                            XMLCharacterProperties.initCharFlags();
1375                            fCalledCharPropInit = true;
1376                        }
1377                        if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
1378                            lpok = false;
1379                    }
1380                    ch = ':';
1381                    if (!lpok) {
1382                        prefixend = -1;
1383                        break;
1384                    }
1385                }
1386            } else {
1387                if (!fCalledCharPropInit) {
1388                    XMLCharacterProperties.initCharFlags();
1389                    fCalledCharPropInit = true;
1390                }
1391                fCurrentIndex = index;
1392                fMostRecentByte = ch;
1393                ch = getMultiByteSymbolChar(ch);
1394                fCurrentIndex--;
1395                fCurrentOffset--;
1396                index = fCurrentIndex;
1397                if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
1398                    break;
1399            }
1400        }
1401        fCurrentIndex = index;
1402        fMostRecentByte = ch;
1403        int length = fCurrentOffset - offset;
1404        qname.rawname = addSymbol(offset, length);
1405        qname.prefix = prefixend == -1 ? -1 : addSymbol(offset, prefixend - offset);
1406        qname.localpart = prefixend == -1 ? qname.rawname : addSymbol(prefixend + 1, fCurrentOffset - (prefixend + 1));
1407        qname.uri = StringPool.EMPTY_STRING;
1408
1409    } // scanQName(char,QName)
1410

1411    private int getMultiByteSymbolChar(int b0) throws Exception JavaDoc {
1412        //
1413
// REVISIT - optimize this with in-buffer lookahead.
1414
//
1415
UTF8DataChunk saveChunk = fCurrentChunk;
1416        int saveIndex = fCurrentIndex;
1417        int saveOffset = fCurrentOffset;
1418        if (!fCalledCharPropInit) {
1419            XMLCharacterProperties.initCharFlags();
1420            fCalledCharPropInit = true;
1421        }
1422        int b1;
1423        if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1424            b1 = loadNextByte();
1425        } else {
1426            fCurrentOffset++;
1427            if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1428                fCurrentIndex++;
1429                try {
1430                    b1 = fMostRecentData[fCurrentIndex] & 0xFF;
1431                } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1432                    b1 = slowLoadNextByte();
1433                }
1434            } else {
1435                if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1436                    b1 = slowLoadNextByte();
1437                else
1438                    b1 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1439            }
1440        }
1441        if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
1442
int ch = ((0x1f & b0)<<6) + (0x3f & b1);
1443            if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) { // yyy yyxx xxxx (0x80 to 0x7ff)
1444
fCurrentChunk = saveChunk;
1445                fCurrentIndex = saveIndex;
1446                fCurrentOffset = saveOffset;
1447                fMostRecentData = saveChunk.toByteArray();
1448                fMostRecentByte = b0;
1449                return -1;
1450            }
1451            loadNextByte();
1452            return ch;
1453        }
1454        int b2;
1455        if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1456            b2 = loadNextByte();
1457        } else {
1458            fCurrentOffset++;
1459            if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1460                fCurrentIndex++;
1461                try {
1462                    b2 = fMostRecentData[fCurrentIndex] & 0xFF;
1463                } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1464                    b2 = slowLoadNextByte();
1465                }
1466            } else {
1467                if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1468                    b2 = slowLoadNextByte();
1469                else
1470                    b2 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1471            }
1472        }
1473        if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
1474
// if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
1475
if ((b0 == 0xED && b1 >= 0xA0) || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
1476                fCurrentChunk = saveChunk;
1477                fCurrentIndex = saveIndex;
1478                fCurrentOffset = saveOffset;
1479                fMostRecentData = saveChunk.toByteArray();
1480                fMostRecentByte = b0;
1481                return -1;
1482            }
1483            int ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
1484            if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) { // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
1485
fCurrentChunk = saveChunk;
1486                fCurrentIndex = saveIndex;
1487                fCurrentOffset = saveOffset;
1488                fMostRecentData = saveChunk.toByteArray();
1489                fMostRecentByte = b0;
1490                return -1;
1491            }
1492            loadNextByte();
1493            return ch;
1494        }
1495        // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
1496
fCurrentChunk = saveChunk;
1497        fCurrentIndex = saveIndex;
1498        fCurrentOffset = saveOffset;
1499        fMostRecentData = saveChunk.toByteArray();
1500        fMostRecentByte = b0;
1501        return -1;
1502    }
1503    public int scanName(char fastcheck) throws Exception JavaDoc {
1504        int b0 = fMostRecentByte;
1505        int ch;
1506        if (b0 < 0x80) {
1507            if (XMLCharacterProperties.fgAsciiInitialNameChar[b0] == 0) {
1508                if (b0 == 0 && atEOF(fCurrentOffset + 1)) {
1509                    return changeReaders().scanName(fastcheck);
1510                }
1511                return -1;
1512            }
1513            ch = b0;
1514        } else {
1515            //
1516
// REVISIT - optimize this with in-buffer lookahead.
1517
//
1518
UTF8DataChunk saveChunk = fCurrentChunk;
1519            int saveIndex = fCurrentIndex;
1520            int saveOffset = fCurrentOffset;
1521            if (!fCalledCharPropInit) {
1522                XMLCharacterProperties.initCharFlags();
1523                fCalledCharPropInit = true;
1524            }
1525            int b1;
1526            if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1527                b1 = loadNextByte();
1528            } else {
1529                fCurrentOffset++;
1530                if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1531                    fCurrentIndex++;
1532                    try {
1533                        b1 = fMostRecentData[fCurrentIndex] & 0xFF;
1534                    } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1535                        b1 = slowLoadNextByte();
1536                    }
1537                } else {
1538                    if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1539                        b1 = slowLoadNextByte();
1540                    else
1541                        b1 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1542                }
1543            }
1544            if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
1545
ch = ((0x1f & b0)<<6) + (0x3f & b1);
1546                if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) { // yyy yyxx xxxx (0x80 to 0x7ff)
1547
fCurrentChunk = saveChunk;
1548                    fCurrentIndex = saveIndex;
1549                    fCurrentOffset = saveOffset;
1550                    fMostRecentData = saveChunk.toByteArray();
1551                    fMostRecentByte = b0;
1552                    return -1;
1553                }
1554            } else {
1555                int b2;
1556                if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1557                    b2 = loadNextByte();
1558                } else {
1559                    fCurrentOffset++;
1560                    if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1561                        fCurrentIndex++;
1562                        try {
1563                            b2 = fMostRecentData[fCurrentIndex] & 0xFF;
1564                        } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1565                            b2 = slowLoadNextByte();
1566                        }
1567                    } else {
1568                        if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1569                            b2 = slowLoadNextByte();
1570                        else
1571                            b2 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1572                    }
1573                }
1574                if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
1575
// if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
1576
if ((b0 == 0xED && b1 >= 0xA0) || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
1577                        fCurrentChunk = saveChunk;
1578                        fCurrentIndex = saveIndex;
1579                        fCurrentOffset = saveOffset;
1580                        fMostRecentData = saveChunk.toByteArray();
1581                        fMostRecentByte = b0;
1582                        return -1;
1583                    }
1584                    ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
1585                    if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) { // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
1586
fCurrentChunk = saveChunk;
1587                        fCurrentIndex = saveIndex;
1588                        fCurrentOffset = saveOffset;
1589                        fMostRecentData = saveChunk.toByteArray();
1590                        fMostRecentByte = b0;
1591                        return -1;
1592                    }
1593                } else { // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
1594
fCurrentChunk = saveChunk;
1595                    fCurrentIndex = saveIndex;
1596                    fCurrentOffset = saveOffset;
1597                    fMostRecentData = saveChunk.toByteArray();
1598                    fMostRecentByte = b0;
1599                    return -1;
1600                }
1601            }
1602        }
1603        fCharacterCounter++;
1604        if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1605            b0 = loadNextByte();
1606        } else {
1607            fCurrentOffset++;
1608            if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1609                fCurrentIndex++;
1610                try {
1611                    b0 = fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
1612                } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1613                    b0 = slowLoadNextByte();
1614                }
1615            } else {
1616                if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1617                    b0 = slowLoadNextByte();
1618                else
1619                    b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1620            }
1621        }
1622        return scanMatchingName(ch, b0, fastcheck);
1623    }
1624    private int scanMatchingName(int ch, int b0, int fastcheck) throws Exception JavaDoc {
1625        SymbolCache cache = fStringPool.getSymbolCache();
1626        int[][] cacheLines = cache.fCacheLines;
1627        char[] symbolChars = cache.fSymbolChars;
1628        boolean lengthOfOne = fastcheck == fMostRecentByte;
1629        int startOffset = cache.fSymbolCharsOffset;
1630        int entry = 0;
1631        int[] entries = cacheLines[entry];
1632        int offset = 1 + ((entries[0] - 1) * SymbolCache.CACHE_RECORD_SIZE);
1633        int totalMisses = 0;
1634        if (lengthOfOne) {
1635            while (offset > 0) {
1636                if (entries[offset + SymbolCache.CHAR_OFFSET] == ch) {
1637                    if (entries[offset + SymbolCache.INDEX_OFFSET] != -1) {
1638                        int symbolIndex = entries[offset + SymbolCache.INDEX_OFFSET];
1639                        if (totalMisses > 3)
1640                            fStringPool.updateCacheLine(symbolIndex, totalMisses, 1);
1641                        return symbolIndex;
1642                    }
1643                    break;
1644                }
1645                offset -= SymbolCache.CACHE_RECORD_SIZE;
1646                totalMisses++;
1647            }
1648            try {
1649                symbolChars[cache.fSymbolCharsOffset] = (char)ch;
1650            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1651                symbolChars = new char[cache.fSymbolCharsOffset * 2];
1652                System.arraycopy(cache.fSymbolChars, 0, symbolChars, 0, cache.fSymbolCharsOffset);
1653                cache.fSymbolChars = symbolChars;
1654                symbolChars[cache.fSymbolCharsOffset] = (char)ch;
1655            }
1656            cache.fSymbolCharsOffset++;
1657            if (offset < 0) {
1658                offset = 1 + (entries[0] * SymbolCache.CACHE_RECORD_SIZE);
1659                entries[0]++;
1660                try {
1661                    entries[offset + SymbolCache.CHAR_OFFSET] = ch;
1662                } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1663                    int newSize = 1 + ((offset - 1) * 2);
1664                    entries = new int[newSize];
1665                    System.arraycopy(cacheLines[entry], 0, entries, 0, offset);
1666                    cacheLines[entry] = entries;
1667                    entries[offset + SymbolCache.CHAR_OFFSET] = ch;
1668                }
1669                entries[offset + SymbolCache.NEXT_OFFSET] = -1;
1670            }
1671            int result = fStringPool.createNonMatchingSymbol(startOffset, entry, entries, offset);
1672            return result;
1673        }
1674        try {
1675            symbolChars[cache.fSymbolCharsOffset] = (char)ch;
1676        } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1677            symbolChars = new char[cache.fSymbolCharsOffset * 2];
1678            System.arraycopy(cache.fSymbolChars, 0, symbolChars, 0, cache.fSymbolCharsOffset);
1679            cache.fSymbolChars = symbolChars;
1680            symbolChars[cache.fSymbolCharsOffset] = (char)ch;
1681        }
1682        cache.fSymbolCharsOffset++;
1683        int depth = 1;
1684        while (true) {
1685            if (offset < 0)
1686                break;
1687            if (entries[offset + SymbolCache.CHAR_OFFSET] != ch) {
1688                offset -= SymbolCache.CACHE_RECORD_SIZE;
1689                totalMisses++;
1690                continue;
1691            }
1692            if (b0 >= 0x80) {
1693                ch = getMultiByteSymbolChar(b0);
1694                b0 = fMostRecentByte;
1695            } else if (b0 == fastcheck || XMLCharacterProperties.fgAsciiNameChar[b0] == 0) {
1696                ch = -1;
1697            } else {
1698                ch = b0;
1699                fCharacterCounter++;
1700                if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1701                    b0 = loadNextByte();
1702                } else {
1703                    fCurrentOffset++;
1704                    if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1705                        fCurrentIndex++;
1706                        try {
1707                            b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1708                        } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1709                            b0 = slowLoadNextByte();
1710                        }
1711                    } else {
1712                        if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1713                            b0 = slowLoadNextByte();
1714                        else
1715                            b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1716                    }
1717                }
1718            }
1719            if (ch == -1) {
1720                if (entries[offset + SymbolCache.INDEX_OFFSET] == -1) {
1721                    return fStringPool.createNonMatchingSymbol(startOffset, entry, entries, offset);
1722                }
1723                cache.fSymbolCharsOffset = startOffset;
1724                int symbolIndex = entries[offset + SymbolCache.INDEX_OFFSET];
1725                if (totalMisses > (depth * 3))
1726                    fStringPool.updateCacheLine(symbolIndex, totalMisses, depth);
1727                return symbolIndex;
1728            }
1729            try {
1730                symbolChars[cache.fSymbolCharsOffset] = (char)ch;
1731            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1732                symbolChars = new char[cache.fSymbolCharsOffset * 2];
1733                System.arraycopy(cache.fSymbolChars, 0, symbolChars, 0, cache.fSymbolCharsOffset);
1734                cache.fSymbolChars = symbolChars;
1735                symbolChars[cache.fSymbolCharsOffset] = (char)ch;
1736            }
1737            cache.fSymbolCharsOffset++;
1738            entry = entries[offset + SymbolCache.NEXT_OFFSET];
1739            try {
1740                entries = cacheLines[entry];
1741            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1742                if (entry == -1) {
1743                    entry = cache.fCacheLineCount++;
1744                    entries[offset + SymbolCache.NEXT_OFFSET] = entry;
1745                    entries = new int[1+(SymbolCache.INITIAL_CACHE_RECORD_COUNT*SymbolCache.CACHE_RECORD_SIZE)];
1746                    try {
1747                        cacheLines[entry] = entries;
1748                    } catch (ArrayIndexOutOfBoundsException JavaDoc ex2) {
1749                        cacheLines = new int[entry * 2][];
1750                        System.arraycopy(cache.fCacheLines, 0, cacheLines, 0, entry);
1751                        cache.fCacheLines = cacheLines;
1752                        cacheLines[entry] = entries;
1753                    }
1754                } else {
1755                    entries = cacheLines[entry];
1756                    throw new RuntimeException JavaDoc("RDR001 untested"); // REVISIT
1757
}
1758            }
1759            offset = 1 + ((entries[0] - 1) * SymbolCache.CACHE_RECORD_SIZE);
1760            depth++;
1761        }
1762        if (offset < 0)
1763            offset = 1 + (entries[0] * SymbolCache.CACHE_RECORD_SIZE);
1764        while (true) {
1765            entries[0]++;
1766            try {
1767                entries[offset + SymbolCache.CHAR_OFFSET] = ch;
1768            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1769                int newSize = 1 + ((offset - 1) * 2);
1770                entries = new int[newSize];
1771                System.arraycopy(cacheLines[entry], 0, entries, 0, offset);
1772                cacheLines[entry] = entries;
1773                entries[offset + SymbolCache.CHAR_OFFSET] = ch;
1774            }
1775            if (b0 >= 0x80) {
1776                ch = getMultiByteSymbolChar(b0);
1777                b0 = fMostRecentByte;
1778            } else if (b0 == fastcheck || XMLCharacterProperties.fgAsciiNameChar[b0] == 0) {
1779                ch = -1;
1780            } else {
1781                ch = b0;
1782                fCharacterCounter++;
1783                if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1784                    b0 = loadNextByte();
1785                } else {
1786                    fCurrentOffset++;
1787                    if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1788                        fCurrentIndex++;
1789                        try {
1790                            b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1791                        } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1792                            b0 = slowLoadNextByte();
1793                        }
1794                    } else {
1795                        if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1796                            b0 = slowLoadNextByte();
1797                        else
1798                            b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1799                    }
1800                }
1801            }
1802            if (ch == -1) {
1803                entries[offset + SymbolCache.NEXT_OFFSET] = -1;
1804                break;
1805            }
1806            entry = cache.fCacheLineCount++;
1807            entries[offset + SymbolCache.INDEX_OFFSET] = -1;
1808            entries[offset + SymbolCache.NEXT_OFFSET] = entry;
1809            entries = new int[1+(SymbolCache.INITIAL_CACHE_RECORD_COUNT*SymbolCache.CACHE_RECORD_SIZE)];
1810            try {
1811                cacheLines[entry] = entries;
1812            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1813                cacheLines = new int[entry * 2][];
1814                System.arraycopy(cache.fCacheLines, 0, cacheLines, 0, entry);
1815                cache.fCacheLines = cacheLines;
1816                cacheLines[entry] = entries;
1817            }
1818            offset = 1;
1819            try {
1820                symbolChars[cache.fSymbolCharsOffset] = (char)ch;
1821            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1822                symbolChars = new char[cache.fSymbolCharsOffset * 2];
1823                System.arraycopy(cache.fSymbolChars, 0, symbolChars, 0, cache.fSymbolCharsOffset);
1824                cache.fSymbolChars = symbolChars;
1825                symbolChars[cache.fSymbolCharsOffset] = (char)ch;
1826            }
1827            cache.fSymbolCharsOffset++;
1828        }
1829
1830        int result = fStringPool.createNonMatchingSymbol(startOffset, entry, entries, offset);
1831        return result;
1832    }
1833    //
1834
// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1835
//
1836
private int recognizeMarkup(int b0, QName element) throws Exception JavaDoc {
1837        switch (b0) {
1838        case 0:
1839            return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1840        case '?':
1841            fCharacterCounter++;
1842            loadNextByte();
1843            return XMLEntityHandler.CONTENT_RESULT_START_OF_PI;
1844        case '!':
1845            fCharacterCounter++;
1846            b0 = loadNextByte();
1847            if (b0 == 0) {
1848                fCharacterCounter--;
1849                fCurrentOffset--;
1850                return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1851            }
1852            if (b0 == '-') {
1853                fCharacterCounter++;
1854                b0 = loadNextByte();
1855                if (b0 == 0) {
1856                    fCharacterCounter -= 2;
1857                    fCurrentOffset -= 2;
1858                    return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1859                }
1860                if (b0 == '-') {
1861                    fCharacterCounter++;
1862                    b0 = loadNextByte();
1863                    return XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT;
1864                }
1865                break;
1866            }
1867            if (b0 == '[') {
1868                for (int i = 0; i < 6; i++) {
1869                    fCharacterCounter++;
1870                    b0 = loadNextByte();
1871                    if (b0 == 0) {
1872                        fCharacterCounter -= (2 + i);
1873                        fCurrentOffset -= (2 + i);
1874                        return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1875                    }
1876                    if (b0 != cdata_string[i]) {
1877                        return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
1878                    }
1879                }
1880                fCharacterCounter++;
1881                loadNextByte();
1882                return XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT;
1883            }
1884            break;
1885        case '/':
1886            fCharacterCounter++;
1887            if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1888                b0 = loadNextByte();
1889            } else {
1890                fCurrentOffset++;
1891                if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1892                    fCurrentIndex++;
1893                    try {
1894                        b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1895                    } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1896                        b0 = slowLoadNextByte();
1897                    }
1898                } else {
1899                    if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1900                        b0 = slowLoadNextByte();
1901                    else
1902                        b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1903                }
1904            }
1905            int expectedName = element.rawname;
1906            fStringPool.getCharArrayRange(expectedName, fCharArrayRange);
1907            char[] expected = fCharArrayRange.chars;
1908            int offset = fCharArrayRange.offset;
1909            int len = fCharArrayRange.length;
1910            //
1911
// DEFECT !! - needs UTF8 multibyte support...
1912
//
1913
if (b0 == expected[offset++]) {
1914                UTF8DataChunk savedChunk = fCurrentChunk;
1915                int savedIndex = fCurrentIndex;
1916                int savedOffset = fCurrentOffset;
1917                for (int i = 1; i < len; i++) {
1918                    if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1919                        b0 = loadNextByte();
1920                    } else {
1921                        fCurrentOffset++;
1922                        if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1923                            fCurrentIndex++;
1924                            try {
1925                                b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1926                            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1927                                b0 = slowLoadNextByte();
1928                            }
1929                        } else {
1930                            if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1931                                b0 = slowLoadNextByte();
1932                            else
1933                                b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1934                        }
1935                    }
1936                    //
1937
// DEFECT !! - needs UTF8 multibyte support...
1938
//
1939
if (b0 != expected[offset++]) {
1940                        fCurrentChunk = savedChunk;
1941                        fCurrentIndex = savedIndex;
1942                        fCurrentOffset = savedOffset;
1943                        fMostRecentData = fCurrentChunk.toByteArray();
1944                        fMostRecentByte = fMostRecentData[savedIndex] & 0xFF;
1945                        return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG;
1946                    }
1947                }
1948                fCharacterCounter += len; // REVISIT - double check this...
1949
fCharacterCounter++;
1950                if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1951                    b0 = loadNextByte();
1952                } else {
1953                    fCurrentOffset++;
1954                    if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1955                        fCurrentIndex++;
1956                        try {
1957                            b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1958                        } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1959                            b0 = slowLoadNextByte();
1960                        }
1961                    } else {
1962                        if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1963                            b0 = slowLoadNextByte();
1964                        else
1965                            b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1966                    }
1967                }
1968                if (b0 == '>') {
1969                    fCharacterCounter++;
1970                    if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1971                        loadNextByte();
1972                    } else {
1973                        fCurrentOffset++;
1974                        if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1975                            fCurrentIndex++;
1976                            try {
1977                                fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
1978                            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
1979                                slowLoadNextByte();
1980                            }
1981                        } else {
1982                            if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1983                                slowLoadNextByte();
1984                            else
1985                                fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
1986                        }
1987                    }
1988                    return XMLEntityHandler.CONTENT_RESULT_MATCHING_ETAG;
1989                }
1990                while (b0 == 0x20 || b0 == 0x09 || b0 == 0x0A || b0 == 0x0D) {
1991                    if (b0 == 0x0A) {
1992                        fLinefeedCounter++;
1993                        fCharacterCounter = 1;
1994                        b0 = loadNextByte();
1995                    } else if (b0 == 0x0D) {
1996                        fCarriageReturnCounter++;
1997                        fCharacterCounter = 1;
1998                        b0 = loadNextByte();
1999                        if (b0 == 0x0A) {
2000                            fLinefeedCounter++;
2001                            b0 = loadNextByte();
2002                        }
2003                    } else {
2004                        fCharacterCounter++;
2005                        b0 = loadNextByte();
2006                    }
2007                    if (b0 == '>') {
2008                        fCharacterCounter++;
2009                        if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2010                            loadNextByte();
2011                        } else {
2012                            fCurrentOffset++;
2013                            if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2014                                fCurrentIndex++;
2015                                try {
2016                                    fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
2017                                } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2018                                    slowLoadNextByte();
2019                                }
2020                            } else {
2021                                if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2022                                    slowLoadNextByte();
2023                                else
2024                                    fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
2025                            }
2026                        }
2027                        return XMLEntityHandler.CONTENT_RESULT_MATCHING_ETAG;
2028                    }
2029                }
2030                fCurrentChunk = savedChunk;
2031                fCurrentIndex = savedIndex;
2032                fCurrentOffset = savedOffset;
2033                fMostRecentData = fCurrentChunk.toByteArray();
2034                fMostRecentByte = fMostRecentData[savedIndex] & 0xFF;
2035            }
2036            return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG;
2037        default:
2038            return XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT;
2039        }
2040        return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
2041    }
2042    private int recognizeReference(int ch) throws Exception JavaDoc {
2043        if (ch == 0) {
2044            return XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT;
2045        }
2046        //
2047
// [67] Reference ::= EntityRef | CharRef
2048
// [68] EntityRef ::= '&' Name ';'
2049
// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2050
//
2051
if (ch == '#') {
2052            fCharacterCounter++;
2053            loadNextByte();
2054            return XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF;
2055        } else {
2056            return XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF;
2057        }
2058    }
2059    public int scanContent(QName element) throws Exception JavaDoc {
2060        if (fCallClearPreviousChunk && fCurrentChunk.clearPreviousChunk())
2061            fCallClearPreviousChunk = false;
2062        fCharDataLength = 0;
2063        int charDataOffset = fCurrentOffset;
2064        int ch = fMostRecentByte;
2065        if (ch < 0x80) {
2066            switch (XMLCharacterProperties.fgAsciiWSCharData[ch]) {
2067            case 0:
2068                if (fSendCharDataAsCharArray) {
2069                    try {
2070                        fCharacters[fCharDataLength] = (char)ch;
2071                        fCharDataLength++;
2072                    } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2073                        slowAppendCharData(ch);
2074                    }
2075                }
2076                fCharacterCounter++;
2077                if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2078                    ch = loadNextByte();
2079                } else {
2080                    fCurrentOffset++;
2081                    if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2082                        fCurrentIndex++;
2083                        try {
2084                            ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2085                        } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2086                            ch = slowLoadNextByte();
2087                        }
2088                    } else {
2089                        if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2090                            ch = slowLoadNextByte();
2091                        else
2092                            ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2093                    }
2094                }
2095                break;
2096            case 1: // '<'
2097
fCharacterCounter++;
2098                if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2099                    ch = loadNextByte();
2100                } else {
2101                    fCurrentOffset++;
2102                    if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2103                        fCurrentIndex++;
2104                        try {
2105                            ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2106                        } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2107                            ch = slowLoadNextByte();
2108                        }
2109                    } else {
2110                        if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2111                            ch = slowLoadNextByte();
2112                        else
2113                            ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2114                    }
2115                }
2116                if (!fInCDSect) {
2117                    return recognizeMarkup(ch, element);
2118                }
2119                if (fSendCharDataAsCharArray)
2120                    appendCharData('<');
2121                break;
2122            case 2: // '&'
2123
fCharacterCounter++;
2124                ch = loadNextByte();
2125                if (!fInCDSect) {
2126                    return recognizeReference(ch);
2127                }
2128                if (fSendCharDataAsCharArray)
2129                    appendCharData('&');
2130                break;
2131            case 3: // ']'
2132
fCharacterCounter++;
2133                ch = loadNextByte();
2134                if (ch != ']') {
2135                    if (fSendCharDataAsCharArray)
2136                        appendCharData(']');
2137                    break;
2138                }
2139                if (fCurrentIndex + 1 == UTF8DataChunk.CHUNK_SIZE) {
2140                    UTF8DataChunk saveChunk = fCurrentChunk;
2141                    int saveIndex = fCurrentIndex;
2142                    int saveOffset = fCurrentOffset;
2143                    if (loadNextByte() != '>') {
2144                        fCurrentChunk = saveChunk;
2145                        fCurrentIndex = saveIndex;
2146                        fCurrentOffset = saveOffset;
2147                        fMostRecentData = fCurrentChunk.toByteArray();
2148                        fMostRecentByte = ']';
2149                        if (fSendCharDataAsCharArray)
2150                            appendCharData(']');
2151                        break;
2152                    }
2153                } else {
2154                    if (fMostRecentData[fCurrentIndex + 1] != '>') {
2155                        if (fSendCharDataAsCharArray)
2156                            appendCharData(']');
2157                        break;
2158                    }
2159                    fCurrentIndex++;
2160                    fCurrentOffset++;
2161                }
2162                loadNextByte();
2163                fCharacterCounter += 2;
2164                return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
2165            case 4: // invalid char
2166
if (ch == 0 && atEOF(fCurrentOffset + 1)) {
2167                    changeReaders();
2168                    return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
2169
}
2170                return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2171            case 5:
2172                do {
2173                    if (ch == 0x0A) {
2174                        fLinefeedCounter++;
2175                        fCharacterCounter = 1;
2176                    } else if (ch == 0x0D) {
2177                        fCarriageReturnCounter++;
2178                        fCharacterCounter = 1;
2179                        if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2180                            ch = loadNextByte();
2181                        } else {
2182                            fCurrentOffset++;
2183                            if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2184                                fCurrentIndex++;
2185                                try {
2186                                    ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2187                                } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2188                                    ch = slowLoadNextByte();
2189                                }
2190                            } else {
2191                                if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2192                                    ch = slowLoadNextByte();
2193                                else
2194                                    ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2195                            }
2196                        }
2197                        if (ch != 0x0A) {
2198                            if (fSendCharDataAsCharArray)
2199                                appendCharData(0x0A);
2200                            if (ch == 0x20 || ch == 0x09 || ch == 0x0D)
2201                                continue;
2202                            break;
2203                        }
2204                        fLinefeedCounter++;
2205                    } else {
2206                        fCharacterCounter++;
2207                    }
2208                    if (fSendCharDataAsCharArray) {
2209                        try {
2210                            fCharacters[fCharDataLength] = (char)ch;
2211                            fCharDataLength++;
2212                        } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2213                            slowAppendCharData(ch);
2214                        }
2215                    }
2216                    if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2217                        ch = loadNextByte();
2218                    } else {
2219                        fCurrentOffset++;
2220                        if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2221                            fCurrentIndex++;
2222                            try {
2223                                ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2224                            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2225                                ch = slowLoadNextByte();
2226                            }
2227                        } else {
2228                            if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2229                                ch = slowLoadNextByte();
2230                            else
2231                                ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2232                        }
2233                    }
2234                } while (ch == 0x20 || ch == 0x09 || ch == 0x0A || ch == 0x0D);
2235                if (ch < 0x80) {
2236                    switch (XMLCharacterProperties.fgAsciiCharData[ch]) {
2237                    case 0:
2238                        if (fSendCharDataAsCharArray)
2239                            appendCharData(ch);
2240                        fCharacterCounter++;
2241                        ch = loadNextByte();
2242                        break;
2243                    case 1: // '<'
2244
if (!fInCDSect) {
2245                            if (fSendCharDataAsCharArray) {
2246                                fCharDataHandler.processWhitespace(fCharacters, 0, fCharDataLength);
2247                            } else {
2248                                int stringIndex = addString(charDataOffset, fCurrentOffset - charDataOffset);
2249                                fCharDataHandler.processWhitespace(stringIndex);
2250                            }
2251                            fCharacterCounter++;
2252                            if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2253                                ch = loadNextByte();
2254                            } else {
2255                                fCurrentOffset++;
2256                                if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2257                                    fCurrentIndex++;
2258                                    try {
2259                                        ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2260                                    } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2261                                        ch = slowLoadNextByte();
2262                                    }
2263                                } else {
2264                                    if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2265                                        ch = slowLoadNextByte();
2266                                    else
2267                                        ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2268                                }
2269                            }
2270                            return recognizeMarkup(ch, element);
2271                        }
2272                        if (fSendCharDataAsCharArray)
2273                            appendCharData('<');
2274                        fCharacterCounter++;
2275                        ch = loadNextByte();
2276                        break;
2277                    case 2: // '&'
2278
if (!fInCDSect) {
2279                            whitespace(charDataOffset, fCurrentOffset);
2280                            fCharacterCounter++;
2281                            ch = loadNextByte();
2282                            return recognizeReference(ch);
2283                        }
2284                        if (fSendCharDataAsCharArray)
2285                            appendCharData('&');
2286                        fCharacterCounter++;
2287                        ch = loadNextByte();
2288                        break;
2289                    case 3: // ']'
2290
int endOffset = fCurrentOffset;
2291                        ch = loadNextByte();
2292                        if (ch != ']') {
2293                            fCharacterCounter++;
2294                            if (fSendCharDataAsCharArray)
2295                                appendCharData(']');
2296                            break;
2297                        }
2298                        if (fCurrentIndex + 1 == UTF8DataChunk.CHUNK_SIZE) {
2299                            UTF8DataChunk saveChunk = fCurrentChunk;
2300                            int saveIndex = fCurrentIndex;
2301                            int saveOffset = fCurrentOffset;
2302                            if (loadNextByte() != '>') {
2303                                fCurrentChunk = saveChunk;
2304                                fCurrentIndex = saveIndex;
2305                                fCurrentOffset = saveOffset;
2306                                fMostRecentData = fCurrentChunk.toByteArray();
2307                                fMostRecentByte = ']';
2308                                fCharacterCounter++;
2309                                if (fSendCharDataAsCharArray)
2310                                    appendCharData(']');
2311                                break;
2312                            }
2313                        } else {
2314                            if (fMostRecentData[fCurrentIndex + 1] != '>') {
2315                                fCharacterCounter++;
2316                                if (fSendCharDataAsCharArray)
2317                                    appendCharData(']');
2318                                break;
2319                            }
2320                            fCurrentIndex++;
2321                            fCurrentOffset++;
2322                        }
2323                        loadNextByte();
2324                        whitespace(charDataOffset, endOffset);
2325                        fCharacterCounter += 3;
2326                        return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
2327                    case 4: // invalid char
2328
whitespace(charDataOffset, fCurrentOffset);
2329                        if (ch == 0 && atEOF(fCurrentOffset + 1)) {
2330                            changeReaders();
2331                            return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
2332
}
2333                        return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2334                    }
2335                } else {
2336                    if (fSendCharDataAsCharArray) {
2337                        if (!copyMultiByteCharData(ch)) {
2338                            whitespace(charDataOffset, fCurrentOffset);
2339                            return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2340                        }
2341                    } else if (!skipMultiByteCharData(ch)) {
2342                        whitespace(charDataOffset, fCurrentOffset);
2343                        return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2344                    }
2345                }
2346                break;
2347            }
2348        } else {
2349            if (fSendCharDataAsCharArray) {
2350                if (!copyMultiByteCharData(ch)) {
2351                    return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2352                }
2353            } else {
2354                if (!skipMultiByteCharData(ch)) {
2355                    return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2356                }
2357            }
2358        }
2359        if (fSendCharDataAsCharArray)
2360            ch = copyAsciiCharData();
2361        else
2362            ch = skipAsciiCharData();
2363        while (true) {
2364            if (ch < 0x80) {
2365                switch (XMLCharacterProperties.fgAsciiCharData[ch]) {
2366                case 0:
2367                    if (fSendCharDataAsCharArray)
2368                        appendCharData(ch);
2369                    fCharacterCounter++;
2370                    ch = loadNextByte();
2371                    break;
2372                case 1: // '<'
2373
if (!fInCDSect) {
2374                        if (fSendCharDataAsCharArray) {
2375                            fCharDataHandler.processCharacters(fCharacters, 0, fCharDataLength);
2376                        } else {
2377                            int stringIndex = addString(charDataOffset, fCurrentOffset - charDataOffset);
2378                            fCharDataHandler.processCharacters(stringIndex);
2379                        }
2380                        fCharacterCounter++;
2381                        if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2382                            ch = loadNextByte();
2383                        } else {
2384                            fCurrentOffset++;
2385                            if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2386                                fCurrentIndex++;
2387                                try {
2388                                    ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2389                                } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2390                                    ch = slowLoadNextByte();
2391                                }
2392                            } else {
2393                                if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2394                                    ch = slowLoadNextByte();
2395                                else
2396                                    ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2397                            }
2398                        }
2399                        return recognizeMarkup(ch, element);
2400                    }
2401                    if (fSendCharDataAsCharArray)
2402                        appendCharData('<');
2403                    fCharacterCounter++;
2404                    ch = loadNextByte();
2405                    break;
2406                case 2: // '&'
2407
if (!fInCDSect) {
2408                        characters(charDataOffset, fCurrentOffset);
2409                        fCharacterCounter++;
2410                        ch = loadNextByte();
2411                        return recognizeReference(ch);
2412                    }
2413                    if (fSendCharDataAsCharArray)
2414                        appendCharData('&');
2415                    fCharacterCounter++;
2416                    ch = loadNextByte();
2417                    break;
2418                case 3: // ']'
2419
int endOffset = fCurrentOffset;
2420                    ch = loadNextByte();
2421                    if (ch != ']') {
2422                        fCharacterCounter++;
2423                        if (fSendCharDataAsCharArray)
2424                            appendCharData(']');
2425                        break;
2426                    }
2427                    if (fCurrentIndex + 1 == UTF8DataChunk.CHUNK_SIZE) {
2428                        UTF8DataChunk saveChunk = fCurrentChunk;
2429                        int saveIndex = fCurrentIndex;
2430                        int saveOffset = fCurrentOffset;
2431                        if (loadNextByte() != '>') {
2432                            fCurrentChunk = saveChunk;
2433                            fCurrentIndex = saveIndex;
2434                            fCurrentOffset = saveOffset;
2435                            fMostRecentData = fCurrentChunk.toByteArray();
2436                            fMostRecentByte = ']';
2437                            fCharacterCounter++;
2438                            if (fSendCharDataAsCharArray)
2439                                appendCharData(']');
2440                            break;
2441                        }
2442                    } else {
2443                        if (fMostRecentData[fCurrentIndex + 1] != '>') {
2444                            fCharacterCounter++;
2445                            if (fSendCharDataAsCharArray)
2446                                appendCharData(']');
2447                            break;
2448                        }
2449                        fCurrentIndex++;
2450                        fCurrentOffset++;
2451                    }
2452                    loadNextByte();
2453                    characters(charDataOffset, endOffset);
2454                    fCharacterCounter += 3;
2455                    return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
2456                case 4: // invalid char
2457
if (ch == 0x0A) {
2458                        if (fSendCharDataAsCharArray)
2459                            appendCharData(ch);
2460                        fLinefeedCounter++;
2461                        fCharacterCounter = 1;
2462                        ch = loadNextByte();
2463                        break;
2464                    }
2465                    if (ch == 0x0D) {
2466                        if (fSendCharDataAsCharArray)
2467                            appendCharData(0x0A);
2468                        fCarriageReturnCounter++;
2469                        fCharacterCounter = 1;
2470                        ch = loadNextByte();
2471                        if (ch == 0x0A) {
2472                            fLinefeedCounter++;
2473                            ch = loadNextByte();
2474                        }
2475                        break;
2476                    }
2477                    characters(charDataOffset, fCurrentOffset);
2478                    if (ch == 0 && atEOF(fCurrentOffset + 1)) {
2479                        changeReaders();
2480                        return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
2481
}
2482                    return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2483                }
2484            } else {
2485                if (fSendCharDataAsCharArray) {
2486                    if (!copyMultiByteCharData(ch)) {
2487                        characters(charDataOffset, fCurrentOffset);
2488                        return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2489                    }
2490                } else if (!skipMultiByteCharData(ch)) {
2491                    characters(charDataOffset, fCurrentOffset);
2492                    return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2493                }
2494                ch = fMostRecentByte;
2495            }
2496        }
2497    }
2498    private boolean copyMultiByteCharData(int b0) throws Exception JavaDoc {
2499        UTF8DataChunk saveChunk = fCurrentChunk;
2500        int saveOffset = fCurrentOffset;
2501        int saveIndex = fCurrentIndex;
2502        int b1 = loadNextByte();
2503        if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx (0x80 to 0x7ff)
2504
int ch = ((0x1f & b0)<<6) + (0x3f & b1);
2505            appendCharData(ch); // yyy yyxx xxxx (0x80 to 0x7ff)
2506
loadNextByte();
2507            return true;
2508        }
2509        int b2 = loadNextByte();
2510        if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
2511
// ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
2512
// if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
2513
if ((b0 == 0xED && b1 >= 0xA0) || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
2514                fCurrentChunk = saveChunk;
2515                fCurrentIndex = saveIndex;
2516                fCurrentOffset = saveOffset;
2517                fMostRecentData = saveChunk.toByteArray();
2518                fMostRecentByte = b0;
2519                return false;
2520            }
2521            int ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
2522            appendCharData(ch); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
2523
loadNextByte();
2524            return true;
2525        }
2526
2527        int b3 = loadNextByte(); // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
2528
// ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3); // u uuuu zzzz yyyy yyxx xxxx (0x10000 to 0x1ffff)
2529
// if (ch >= 0x110000)
2530
if (( 0xf8 & b0 ) == 0xf0 ) {
2531            if (b0 > 0xF4 || (b0 == 0xF4 && b1 >= 0x90)) {
2532                fCurrentChunk = saveChunk;
2533                fCurrentIndex = saveIndex;
2534                fCurrentOffset = saveOffset;
2535                fMostRecentData = saveChunk.toByteArray();
2536                fMostRecentByte = b0;
2537                return false;
2538            }
2539            int ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3);
2540            if (ch < 0x10000) {
2541                appendCharData(ch);
2542            } else {
2543                appendCharData(((ch-0x00010000)>>10)+0xd800);
2544                appendCharData(((ch-0x00010000)&0x3ff)+0xdc00);
2545            }
2546            loadNextByte();
2547            return true;
2548        } else {
2549            fCurrentChunk = saveChunk;
2550            fCurrentIndex = saveIndex;
2551            fCurrentOffset = saveOffset;
2552            fMostRecentData = saveChunk.toByteArray();
2553            fMostRecentByte = b0;
2554            return false;
2555        }
2556    }
2557    private boolean skipMultiByteCharData(int b0) throws Exception JavaDoc {
2558        UTF8DataChunk saveChunk = fCurrentChunk;
2559        int saveOffset = fCurrentOffset;
2560        int saveIndex = fCurrentIndex;
2561        int b1 = loadNextByte();
2562        if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx (0x80 to 0x7ff)
2563
loadNextByte();
2564            return true;
2565        }
2566        int b2 = loadNextByte();
2567        if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
2568
// ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
2569
// if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
2570
if ((b0 == 0xED && b1 >= 0xA0) || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
2571                fCurrentChunk = saveChunk;
2572                fCurrentIndex = saveIndex;
2573                fCurrentOffset = saveOffset;
2574                fMostRecentData = saveChunk.toByteArray();
2575                fMostRecentByte = b0;
2576                return false;
2577            }
2578            loadNextByte();
2579            return true;
2580        }
2581        int b3 = loadNextByte(); // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
2582
// ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3); // u uuuu zzzz yyyy yyxx xxxx (0x10000 to 0x1ffff)
2583
// if (ch >= 0x110000)
2584
if (b0 > 0xF4 || (b0 == 0xF4 && b1 >= 0x90)) {
2585            fCurrentChunk = saveChunk;
2586            fCurrentIndex = saveIndex;
2587            fCurrentOffset = saveOffset;
2588            fMostRecentData = saveChunk.toByteArray();
2589            fMostRecentByte = b0;
2590            return false;
2591        }
2592        loadNextByte();
2593        return true;
2594    }
2595    private int copyAsciiCharData() throws Exception JavaDoc {
2596        int srcIndex = fCurrentIndex;
2597        int offset = fCurrentOffset - srcIndex;
2598        byte[] data = fMostRecentData;
2599        int dstIndex = fCharDataLength;
2600        boolean skiplf = false;
2601        while (true) {
2602            int ch;
2603            try {
2604                ch = data[srcIndex] & 0xFF;
2605            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2606                offset += srcIndex;
2607                slowLoadNextByte();
2608                srcIndex = 0;
2609                data = fMostRecentData;
2610                ch = data[srcIndex] & 0xFF;
2611            }
2612            if (ch >= 0x80) {
2613                fCurrentOffset = offset + srcIndex;
2614                fCurrentIndex = srcIndex;
2615                fMostRecentByte = ch;
2616                return ch;
2617            }
2618            if (XMLCharacterProperties.fgAsciiCharData[ch] == 0) {
2619                fCharacterCounter++;
2620                skiplf = false;
2621            } else if (ch == 0x0A) {
2622                fLinefeedCounter++;
2623                if (skiplf) {
2624                    skiplf = false;
2625                    srcIndex++;
2626                    continue;
2627                }
2628                fCharacterCounter = 1;
2629            } else if (ch == 0x0D) {
2630                fCarriageReturnCounter++;
2631                fCharacterCounter = 1;
2632                skiplf = true;
2633                ch = 0x0A;
2634            } else {
2635                fCurrentOffset = offset + srcIndex;
2636                fCurrentIndex = srcIndex;
2637                fMostRecentByte = ch;
2638                return ch;
2639            }
2640            srcIndex++;
2641            try {
2642                fCharacters[fCharDataLength] = (char)ch;
2643                fCharDataLength++;
2644            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2645                slowAppendCharData(ch);
2646            }
2647        }
2648    }
2649    private int skipAsciiCharData() throws Exception JavaDoc {
2650        int srcIndex = fCurrentIndex;
2651        int offset = fCurrentOffset - srcIndex;
2652        byte[] data = fMostRecentData;
2653        while (true) {
2654            int ch;
2655            try {
2656                ch = data[srcIndex] & 0xFF;
2657            } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2658                offset += srcIndex;
2659                slowLoadNextByte();
2660                srcIndex = 0;
2661                data = fMostRecentData;
2662                ch = data[srcIndex] & 0xFF;
2663            }
2664            if (ch >= 0x80) {
2665                fCurrentOffset = offset + srcIndex;
2666                fCurrentIndex = srcIndex;
2667                fMostRecentByte = ch;
2668                return ch;
2669            }
2670            if (XMLCharacterProperties.fgAsciiCharData[ch] == 0) {
2671                fCharacterCounter++;
2672            } else if (ch == 0x0A) {
2673                fLinefeedCounter++;
2674                fCharacterCounter = 1;
2675            } else if (ch == 0x0D) {
2676                fCarriageReturnCounter++;
2677                fCharacterCounter = 1;
2678            } else {
2679                fCurrentOffset = offset + srcIndex;
2680                fCurrentIndex = srcIndex;
2681                fMostRecentByte = ch;
2682                return ch;
2683            }
2684            srcIndex++;
2685        }
2686    }
2687    private char[] fCharacters = new char[UTF8DataChunk.CHUNK_SIZE];
2688    private int fCharDataLength = 0;
2689    private void appendCharData(int ch) throws Exception JavaDoc {
2690        try {
2691            fCharacters[fCharDataLength] = (char)ch;
2692            fCharDataLength++;
2693        } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2694            slowAppendCharData(ch);
2695        }
2696    }
2697    private void slowAppendCharData(int ch) throws Exception JavaDoc {
2698        // flush the buffer...
2699
characters(0, fCharDataLength); /* DEFECT !! whitespace this long is unlikely, but possible */
2700        fCharDataLength = 0;
2701        fCharacters[fCharDataLength++] = (char)ch;
2702    }
2703    private void characters(int offset, int endOffset) throws Exception JavaDoc {
2704        //
2705
// REVISIT - need more up front bounds checking code of params...
2706
//
2707
if (!fSendCharDataAsCharArray) {
2708            int stringIndex = addString(offset, endOffset - offset);
2709            fCharDataHandler.processCharacters(stringIndex);
2710            return;
2711        }
2712        fCharDataHandler.processCharacters(fCharacters, 0, fCharDataLength);
2713    }
2714    private void whitespace(int offset, int endOffset) throws Exception JavaDoc {
2715        //
2716
// REVISIT - need more up front bounds checking code of params...
2717
//
2718
if (!fSendCharDataAsCharArray) {
2719            int stringIndex = addString(offset, endOffset - offset);
2720            fCharDataHandler.processWhitespace(stringIndex);
2721            return;
2722        }
2723        fCharDataHandler.processWhitespace(fCharacters, 0, fCharDataLength);
2724    }
2725    //
2726
//
2727
//
2728
private static final char[] cdata_string = { 'C','D','A','T','A','['};
2729    private StringPool.CharArrayRange fCharArrayRange = null;
2730    private InputStream JavaDoc fInputStream = null;
2731    private StringPool fStringPool = null;
2732    private UTF8DataChunk fCurrentChunk = null;
2733    private int fCurrentIndex = 0;
2734    private byte[] fMostRecentData = null;
2735    private int fMostRecentByte = 0;
2736    private int fLength = 0;
2737    private boolean fCalledCharPropInit = false;
2738    private boolean fCallClearPreviousChunk = true;
2739    //
2740
//
2741
//
2742
private int fillCurrentChunk() throws Exception JavaDoc {
2743        byte[] buf = fCurrentChunk.toByteArray();
2744        if (fInputStream == null) {
2745            if (buf == null)
2746                buf = new byte[1];
2747            buf[0] = 0;
2748            fMostRecentData = buf;
2749            fCurrentIndex = 0;
2750            fCurrentChunk.setByteArray(fMostRecentData);
2751            return(fMostRecentByte = fMostRecentData[0] & 0xFF);
2752        }
2753        if (buf == null)
2754            buf = new byte[UTF8DataChunk.CHUNK_SIZE];
2755        int offset = 0;
2756        int capacity = UTF8DataChunk.CHUNK_SIZE;
2757        int result = 0;
2758        do {
2759            try {
2760                result = fInputStream.read(buf, offset, capacity);
2761            } catch (java.io.IOException JavaDoc ex) {
2762                result = -1;
2763            }
2764            if (result == -1) {
2765                //
2766
// We have reached the end of the stream.
2767
//
2768
fInputStream.close();
2769                fInputStream = null;
2770                try {
2771                    buf[offset] = 0;
2772                } catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
2773                }
2774                break;
2775            }
2776            if (result > 0) {
2777                offset += result;
2778                capacity -= result;
2779            }
2780        } while (capacity > 0);
2781        fMostRecentData = buf;
2782        fLength += offset;
2783        fCurrentIndex = 0;
2784        fCurrentChunk.setByteArray(fMostRecentData);
2785        return(fMostRecentByte = fMostRecentData[0] & 0xFF);
2786    }
2787}
2788
Popular Tags