KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xerces > readers > UCSReader


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.enhydra.apache.xerces.readers;
59
60 import java.io.IOException JavaDoc;
61
62 import org.enhydra.apache.xerces.framework.XMLErrorReporter;
63 import org.enhydra.apache.xerces.utils.ChunkyByteArray;
64 import org.enhydra.apache.xerces.utils.ChunkyCharArray;
65 import org.enhydra.apache.xerces.utils.QName;
66 import org.enhydra.apache.xerces.utils.StringHasher;
67 import org.enhydra.apache.xerces.utils.StringPool;
68 import org.enhydra.apache.xerces.utils.XMLCharacterProperties;
69
70 /**
71  * Reader for UCS-2 and UCS-4 encodings.
72  * <p>
73  * This reader is created by the UCSRecognizer class when it decides that the
74  * byte stream is encoded in a format supported by this class. This class
75  * was intended to be another example of an encoding sensitive reader that
76  * could take advantage of the system design to improve performance and reduce
77  * resource consumption, but the actual performance tuning remains to be done.
78  *
79  * @version $Id: UCSReader.java,v 1.2 2005/01/26 08:28:44 jkjome Exp $
80  */

81 final class UCSReader extends XMLEntityReader implements StringPool.StringProducer {
82
83     //
84
// Constants
85
//
86

87     // debugging
88

89     /** Set to true to debug UTF-16, big-endian. */
90     private static final boolean DEBUG_UTF16_BIG = false;
91
92     //
93
// Scanner encoding enumeration
94
//
95
static final int
96         E_UCS4B = 0, // UCS-4 big endian
97
E_UCS4L = 1, // UCS-4 little endian
98
E_UCS2B = 2, // UCS-2 big endian with byte order mark
99
E_UCS2L = 3, // UCS-2 little endian with byte order mark
100
E_UCS2B_NOBOM = 4, // UCS-2 big endian without byte order mark
101
E_UCS2L_NOBOM = 5; // UCS-2 little endian without byte order mark
102
//
103
//
104
//
105
private ChunkyByteArray fData = null;
106     private int fEncoding = -1;
107     private StringPool fStringPool = null;
108     private int fBytesPerChar = -1;
109     private boolean fBigEndian = true;
110     private ChunkyCharArray fStringCharArray = null;
111     private boolean fCalledCharPropInit = false;
112     //
113
//
114
//
115
UCSReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, ChunkyByteArray data, int encoding, StringPool stringPool) throws Exception JavaDoc {
116         super(entityHandler, errorReporter, sendCharDataAsCharArray);
117         fCurrentOffset = (encoding == E_UCS2B || encoding == E_UCS2L) ? 2 : 0;
118         fData = data;
119         fEncoding = encoding;
120         fStringPool = stringPool;
121         fBytesPerChar = (fEncoding == E_UCS4B || fEncoding == E_UCS4L) ? 4 : 2;
122         fBigEndian = fEncoding == E_UCS4B || fEncoding == E_UCS2B || fEncoding == E_UCS2B_NOBOM;
123     }
124     //
125
//
126
//
127
private int getChar(int offset) throws IOException JavaDoc {
128         int b0 = fData.byteAt(offset++) & 0xff;
129         if (b0 == 0xff && fData.atEOF(offset))
130             return -1;
131         int b1 = fData.byteAt(offset++) & 0xff;
132         if (fBytesPerChar == 4) {
133             int b2 = fData.byteAt(offset++) & 0xff;
134             int b3 = fData.byteAt(offset++) & 0xff;
135             if (fBigEndian)
136                 return (b0<<24)+(b1<<16)+(b2<<8)+b3;
137             else
138                 return (b3<<24)+(b2<<16)+(b1<<8)+b0;
139         } else {
140             if (fBigEndian)
141                 return (b0<<8)+b1;
142             else
143                 return (b1<<8)+b0;
144         }
145     }
146     /**
147      *
148      */

149     public int addString(int offset, int length) {
150         if (length == 0)
151             return 0;
152         return fStringPool.addString(this, offset, length);
153     }
154     /**
155      *
156      */

157     public int addSymbol(int offset, int length) {
158         if (length == 0)
159             return 0;
160         return fStringPool.addSymbol(this, offset, length, getHashcode(offset, length));
161     }
162     //
163
//
164
//
165
public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) {
166         int endOffset = offset + length;
167         while (offset < endOffset) {
168             int ch;
169             try {
170                 ch = getChar(offset);
171             }
172             catch (IOException JavaDoc ex) {
173                 ch = 0; // REVISIT
174
}
175             charBuffer.append((char)ch);
176             offset += fBytesPerChar;
177         }
178     }
179     //
180
//
181
//
182
public void releaseString(int offset, int length) {
183         // nothing to do...
184
}
185     //
186
//
187
//
188
public String JavaDoc toString(int offset, int length) {
189         //
190
// REVISIT - we need to cache this operation !!
191
//
192
if (fStringCharArray == null)
193             fStringCharArray = new ChunkyCharArray(fStringPool);
194         int newOffset = fStringCharArray.length();
195         append(fStringCharArray, offset, length);
196         int newLength = fStringCharArray.length() - newOffset;
197         int stringIndex = fStringCharArray.addString(newOffset, newLength);
198         return fStringPool.toString(stringIndex);
199     }
200     //
201
//
202
//
203
private int getHashcode(int offset, int length) {
204         int endOffset = offset + length;
205         int hashcode = 0;
206         while (offset < endOffset) {
207             int ch;
208             try {
209                 ch = getChar(offset);
210             }
211             catch (IOException JavaDoc ex) {
212                 ch = 0; // REVISIT
213
}
214             hashcode = StringHasher.hashChar(hashcode, ch);
215             offset += fBytesPerChar;
216         }
217         return StringHasher.finishHash(hashcode);
218     }
219     //
220
public boolean equalsString(int offset, int length, char[] strChars, int strOffset, int strLength) {
221         int endOffset = offset + length;
222         int slen = strLength;
223         while (offset < endOffset) {
224             if (slen-- == 0)
225                 return false;
226             int ch;
227             try {
228                 ch = getChar(offset);
229             }
230             catch (IOException JavaDoc ex) {
231                 ch = 0; // REVISIT
232
}
233             if (ch != strChars[strOffset++])
234                 return false;
235             offset += fBytesPerChar;
236         }
237         return slen == 0;
238     }
239     //
240
//
241
//
242
private static char[] fCharacters = new char[256];
243     private int fCharDataLength = 0;
244     private void appendCharData(int ch) {
245         if (fCharacters.length == fCharDataLength) {
246             char[] newchars = new char[fCharacters.length * 2];
247             System.arraycopy(fCharacters, 0, newchars, 0, fCharacters.length);
248             fCharacters = newchars;
249         }
250         fCharacters[fCharDataLength++] = (char)ch;
251     }
252     public void callCharDataHandler(int offset, int length, boolean isWhitespace) throws Exception JavaDoc {
253         int endOffset = offset + length;
254         boolean skiplf = false;
255         while (offset < endOffset) {
256             int ch = getChar(offset);
257             // fix for Bug23: Element Data not normalized...
258
if (skiplf) {
259                 skiplf = false;
260                 if (ch == 0x0A) {
261                     offset += fBytesPerChar;
262                     continue;
263                 }
264             }
265             if (ch == 0x0D) {
266                 skiplf = true;
267                 ch = 0x0A;
268             }
269             appendCharData(ch);
270             offset += fBytesPerChar;
271         }
272         if (fSendCharDataAsCharArray) {
273             if (isWhitespace)
274                 fCharDataHandler.processWhitespace(fCharacters, 0, fCharDataLength);
275             else
276                 fCharDataHandler.processCharacters(fCharacters, 0, fCharDataLength);
277         } else {
278             int stringIndex = fStringPool.addString(new String JavaDoc(fCharacters, 0, fCharDataLength));
279             if (isWhitespace)
280                 fCharDataHandler.processWhitespace(stringIndex);
281             else
282                 fCharDataHandler.processCharacters(stringIndex);
283         }
284         fCharDataLength = 0;
285     }
286     //
287
//
288
//
289
public boolean lookingAtChar(char ch, boolean skipPastChar) throws Exception JavaDoc {
290         int ch2 = getChar(fCurrentOffset);
291         if (ch2 == ch) {
292             if (skipPastChar) {
293                 fCharacterCounter++;
294                 fCurrentOffset += fBytesPerChar;
295             }
296             return true;
297         }
298         return false;
299     }
300     //
301
//
302
//
303
public boolean lookingAtValidChar(boolean skipPastChar) throws Exception JavaDoc {
304         int ch = getChar(fCurrentOffset);
305         if (ch < 0x20) {
306             if (ch == 0x09) {
307                 if (!skipPastChar)
308                     return true;
309                 fCharacterCounter++;
310             } else if (ch == 0x0A) {
311                 if (!skipPastChar)
312                     return true;
313                 fLinefeedCounter++;
314                 fCharacterCounter = 1;
315             } else if (ch == 0x0D) {
316                 if (!skipPastChar)
317                     return true;
318                 fCarriageReturnCounter++;
319                 fCharacterCounter = 1;
320             } else {
321                 if (ch == -1) {
322                     return changeReaders().lookingAtValidChar(skipPastChar);
323                 }
324                 return false;
325             }
326             fCurrentOffset += fBytesPerChar;
327             return true;
328         }
329         if (ch <= 0xD7FF) {
330             if (skipPastChar) {
331                 fCharacterCounter++;
332                 fCurrentOffset += fBytesPerChar;
333             }
334             return true;
335         }
336         if (ch <= 0xDFFF) {
337             // REVISIT - check that the surrogate pair is valid
338
if (skipPastChar) {
339                 fCharacterCounter++;
340                 fCurrentOffset += fBytesPerChar;
341             }
342             return true;
343         }
344         if (ch <= 0xFFFD) {
345             if (skipPastChar) {
346                 fCharacterCounter++;
347                 fCurrentOffset += fBytesPerChar;
348             }
349             return true;
350         }
351         return false;
352     }
353     //
354
//
355
//
356
public boolean lookingAtSpace(boolean skipPastChar) throws Exception JavaDoc {
357         int ch = getChar(fCurrentOffset);
358         if (ch > 0x20)
359             return false;
360         if (ch == 0x20 || ch == 0x09) {
361             if (!skipPastChar)
362                 return true;
363             fCharacterCounter++;
364         } else if (ch == 0x0A) {
365             if (!skipPastChar)
366                 return true;
367             fLinefeedCounter++;
368             fCharacterCounter = 1;
369         } else if (ch == 0x0D) {
370             if (!skipPastChar)
371                 return true;
372             fCarriageReturnCounter++;
373             fCharacterCounter = 1;
374         } else {
375             if (ch == -1) { // REVISIT - should we be checking this here ?
376
return changeReaders().lookingAtSpace(skipPastChar);
377             }
378             return false;
379         }
380         fCurrentOffset += fBytesPerChar;
381         return true;
382     }
383     //
384
//
385
//
386
public void skipToChar(char chr) throws Exception JavaDoc {
387         while (true) {
388             int ch = getChar(fCurrentOffset);
389             if (ch == chr)
390                 return;
391             if (ch == -1) {
392                 changeReaders().skipToChar(chr);
393                 return;
394             }
395             if (ch == 0x0A) {
396                 fLinefeedCounter++;
397                 fCharacterCounter = 1;
398             } else if (ch == 0x0D) {
399                 fCarriageReturnCounter++;
400                 fCharacterCounter = 1;
401             } else if (ch >= 0xD800 && ch < 0xDC00) {
402                 fCharacterCounter++;
403                 fCurrentOffset += fBytesPerChar;
404                 ch = getChar(fCurrentOffset);
405                 if (ch < 0xDC00 || ch >= 0xE000)
406                     continue;
407             } else
408                 fCharacterCounter++;
409             fCurrentOffset += fBytesPerChar;
410         }
411     }
412     //
413
//
414
//
415
public void skipPastSpaces() throws Exception JavaDoc {
416         while (true) {
417             int ch = getChar(fCurrentOffset);
418             if (ch > 0x20)
419                 return;
420             if (ch == 0x20 || ch == 0x09) {
421                 fCharacterCounter++;
422             } else if (ch == 0x0A) {
423                 fLinefeedCounter++;
424                 fCharacterCounter = 1;
425             } else if (ch == 0x0D) {
426                 fCarriageReturnCounter++;
427                 fCharacterCounter = 1;
428             } else {
429                 if (ch == -1)
430                     changeReaders().skipPastSpaces();
431                 return;
432             }
433             fCurrentOffset += fBytesPerChar;
434         }
435     }
436     //
437
//
438
//
439
public void skipPastName(char fastcheck) throws Exception JavaDoc {
440         int ch = getChar(fCurrentOffset);
441         if (!fCalledCharPropInit) {
442             XMLCharacterProperties.initCharFlags();
443             fCalledCharPropInit = true;
444         }
445         if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
446             return;
447         while (true) {
448             fCurrentOffset += fBytesPerChar;
449             fCharacterCounter++;
450             ch = getChar(fCurrentOffset);
451             if (fastcheck == ch)
452                 return;
453             if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
454                 return;
455         }
456     }
457     //
458
//
459
//
460
public void skipPastNmtoken(char fastcheck) throws Exception JavaDoc {
461         int ch = getChar(fCurrentOffset);
462         if (!fCalledCharPropInit) {
463             XMLCharacterProperties.initCharFlags();
464             fCalledCharPropInit = true;
465         }
466         while (true) {
467             if (fastcheck == ch)
468                 return;
469             if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
470                 return;
471             fCurrentOffset += fBytesPerChar;
472             fCharacterCounter++;
473             ch = getChar(fCurrentOffset);
474         }
475     }
476     //
477
//
478
//
479
public boolean skippedString(char[] s) throws Exception JavaDoc {
480         int offset = fCurrentOffset;
481         for (int i = 0; i < s.length; i++) {
482             if (getChar(offset) != s[i])
483                 return false;
484             offset += fBytesPerChar;
485         }
486         fCurrentOffset = offset;
487         fCharacterCounter += s.length;
488         return true;
489     }
490     //
491
//
492
//
493
public int scanInvalidChar() throws Exception JavaDoc {
494         int ch = getChar(fCurrentOffset);
495         if (ch == -1) {
496             return changeReaders().scanInvalidChar();
497         }
498         fCurrentOffset += fBytesPerChar;
499         if (ch == 0x0A) {
500             fLinefeedCounter++;
501             fCharacterCounter = 1;
502         } else if (ch == 0x0D) {
503             fCarriageReturnCounter++;
504             fCharacterCounter = 1;
505         } else {
506             fCharacterCounter++;
507             if (ch >= 0xD800 && ch < 0xDC00) {
508                 int ch2 = getChar(fCurrentOffset);
509                 if (ch2 >= 0xDC00 && ch2 < 0xE000) {
510                     ch = ((ch-0xD800)<<10)+(ch2-0xDC00)+0x10000;
511                     fCurrentOffset += fBytesPerChar;
512                 }
513             }
514         }
515         return ch;
516     }
517     //
518
//
519
//
520
public int scanCharRef(boolean hex) throws Exception JavaDoc {
521         int ch = getChar(fCurrentOffset);
522         if (ch == -1) {
523             return changeReaders().scanCharRef(hex);
524         }
525         int num = 0;
526         if (hex) {
527             if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
528                 return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
529             num = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
530         } else {
531             if (ch < '0' || ch > '9')
532                 return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
533             num = ch - '0';
534         }
535         fCharacterCounter++;
536         fCurrentOffset += fBytesPerChar;
537         boolean toobig = false;
538         while (true) {
539             ch = getChar(fCurrentOffset);
540             if (ch == -1)
541                 break;
542             if (hex) {
543                 if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
544                     break;
545             } else {
546                 if (ch < '0' || ch > '9')
547                     break;
548             }
549             fCharacterCounter++;
550             fCurrentOffset += fBytesPerChar;
551             if (hex) {
552                 int dig = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
553                 num = (num << 4) + dig;
554             } else {
555                 int dig = ch - '0';
556                 num = (num * 10) + dig;
557             }
558             if (num > 0x10FFFF) {
559                 toobig = true;
560                 num = 0;
561             }
562         }
563         if (ch != ';')
564             return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED;
565         fCharacterCounter++;
566         fCurrentOffset += fBytesPerChar;
567         if (toobig)
568             return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE;
569         return num;
570     }
571     //
572
//
573
//
574
public int scanStringLiteral() throws Exception JavaDoc {
575         boolean single;
576         if (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) {
577             return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
578         }
579         int offset = fCurrentOffset;
580         char qchar = single ? '\'' : '\"';
581         while (!lookingAtChar(qchar, false)) {
582             if (!lookingAtValidChar(true)) {
583                 return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR;
584             }
585         }
586         int stringIndex = addString(offset, fCurrentOffset - offset);
587         lookingAtChar(qchar, true); // move past qchar
588
return stringIndex;
589     }
590     //
591
// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
592
// | "'" ([^<&'] | Reference)* "'"
593
//
594
public int scanAttValue(char qchar, boolean asSymbol) throws Exception JavaDoc
595     {
596         int offset = fCurrentOffset;
597         while (true) {
598             if (lookingAtChar(qchar, false)) {
599                 break;
600             }
601             if (lookingAtChar(' ', true)) {
602                 continue;
603             }
604             if (lookingAtSpace(false)) {
605                 return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
606             }
607             if (lookingAtChar('&', false)) {
608                 return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
609             }
610             if (lookingAtChar('<', false)) {
611                 return XMLEntityHandler.ATTVALUE_RESULT_LESSTHAN;
612             }
613             if (!lookingAtValidChar(true)) {
614                 return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR;
615             }
616         }
617         int result = asSymbol ? addSymbol(offset, fCurrentOffset - offset) : addString(offset, fCurrentOffset - offset);
618         lookingAtChar(qchar, true);
619         return result;
620     }
621     //
622
// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
623
// | "'" ([^%&'] | PEReference | Reference)* "'"
624
//
625
public int scanEntityValue(int qchar, boolean createString) throws Exception JavaDoc
626     {
627         int offset = fCurrentOffset;
628         while (true) {
629             if (qchar != -1 && lookingAtChar((char)qchar, false)) {
630                 if (!createString)
631                     return XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED;
632                 break;
633             }
634             if (lookingAtChar('&', false)) {
635                 return XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE;
636             }
637             if (lookingAtChar('%', false)) {
638                 return XMLEntityHandler.ENTITYVALUE_RESULT_PEREF;
639             }
640             if (!lookingAtValidChar(true)) {
641                 return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR;
642             }
643         }
644         int result = addString(offset, fCurrentOffset - offset);
645         lookingAtChar((char)qchar, true);
646         return result;
647     }
648     //
649
//
650
//
651
public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws Exception JavaDoc {
652         int nameOffset = fCurrentOffset;
653         skipPastName(fastcheck);
654         int nameLength = fCurrentOffset - nameOffset;
655         if (nameLength == 0)
656             return false;
657         int nameIndex = addSymbol(nameOffset, nameLength);
658         // DEFECT !! check name against expected name
659
return true;
660     }
661
662     public void scanQName(char fastcheck, QName qname) throws Exception JavaDoc {
663
664         // REVISIT: possible bugs with surrogate characters -el
665
int nameOffset = fCurrentOffset;
666         int ch;
667         int prefixend=-1;
668         int offset=fCurrentOffset;
669         ch = getChar(fCurrentOffset);
670         if (ch < 0x80) {
671             if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) {
672                 qname.clear();
673                 return;
674             }
675             if (ch == ':') {
676                 qname.clear();
677                 return;
678             }
679         }
680         else {
681             if (!fCalledCharPropInit) {
682                 XMLCharacterProperties.initCharFlags();
683                 fCalledCharPropInit = true;
684             }
685             if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
686                 return;
687         }
688
689         while (true) {
690             fCurrentOffset += fBytesPerChar;
691             fCharacterCounter++;
692             ch = getChar(fCurrentOffset);
693             if (fastcheck == ch) {
694                 break;
695             }
696             if (ch < 0x80) {
697                 if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0) {
698                     break;
699                 }
700                 if (ch == ':') {
701                     if (prefixend != -1) {
702                         break;
703                     }
704                     prefixend = fCurrentOffset;
705                     //
706
// We need to peek ahead one character. If the next character is not a
707
// valid initial name character, or is another colon, then we cannot meet
708
// both the Prefix and LocalPart productions for the QName production,
709
// which means that there is no Prefix and we need to terminate the QName
710
// at the first colon. --JR's comments
711
//
712

713                     ch = getChar(fCurrentOffset+fBytesPerChar);
714                     boolean lpok = true;
715                     if (ch < 0x80) {
716                         if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0 || ch == ':') {
717                             lpok = false;
718                         }
719                     }
720                     else {
721                         if (!fCalledCharPropInit) {
722                             XMLCharacterProperties.initCharFlags();
723                             fCalledCharPropInit = true;
724                         }
725                         if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) {
726                             lpok = false;
727                         }
728                     }
729                     if (!lpok) {
730                         prefixend = -1;
731                         break;
732                     }
733                 }
734             }
735             else {
736                 if (!fCalledCharPropInit) {
737                     XMLCharacterProperties.initCharFlags();
738                     fCalledCharPropInit = true;
739                 }
740                 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) {
741                     break;
742                 }
743             }
744         }//end while loop
745
int length = fCurrentOffset - offset;
746         qname.prefix = prefixend == -1 ? -1 : addSymbol(offset, prefixend - offset);
747         qname.rawname = addSymbol(offset, length);
748         qname.localpart = prefixend == -1 ? qname.rawname : addSymbol(prefixend + fBytesPerChar, fCurrentOffset - (prefixend + fBytesPerChar));
749         qname.uri = StringPool.EMPTY_STRING;
750
751
752
753     } // scanQName(char,QName)
754

755     public int scanName(char fastcheck) throws Exception JavaDoc {
756         int nameOffset = fCurrentOffset;
757         skipPastName(fastcheck);
758         int nameLength = fCurrentOffset - nameOffset;
759         if (nameLength == 0)
760             return -1;
761         int nameIndex = addSymbol(nameOffset, nameLength);
762         return nameIndex;
763     }
764     //
765
//
766
//
767
private static final char[] cdata_string = { 'C','D','A','T','A','[' };
768     private int recognizeMarkup() throws Exception JavaDoc {
769         int ch = getChar(fCurrentOffset);
770         switch (ch) {
771         case -1:
772             return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
773         case '?':
774             fCharacterCounter++;
775             fCurrentOffset += fBytesPerChar;
776             return XMLEntityHandler.CONTENT_RESULT_START_OF_PI;
777         case '!':
778             fCharacterCounter++;
779             fCurrentOffset += fBytesPerChar;
780             ch = getChar(fCurrentOffset);
781             if (ch == -1) {
782                 fCharacterCounter--;
783                 fCurrentOffset -= fBytesPerChar;;
784                 return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
785             }
786             if (ch == '-') {
787                 fCharacterCounter++;
788                 fCurrentOffset += fBytesPerChar;
789                 ch = getChar(fCurrentOffset);
790                 if (ch == -1) {
791                     fCharacterCounter -= 2;
792                     fCurrentOffset -= 2;
793                     return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
794                 }
795                 if (ch == '-') {
796                     fCharacterCounter++;
797                     fCurrentOffset += fBytesPerChar;
798                     return XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT;
799                 }
800                 break;
801             }
802             if (ch == '[') {
803                 fCharacterCounter++;
804                 fCurrentOffset += fBytesPerChar;
805                 for (int i = 0; i < 6; i++) {
806                     ch = getChar(fCurrentOffset);
807                     if (ch == -1) {
808                         fCharacterCounter -= (2 + i);
809                         fCurrentOffset -= ((2 + i) * fBytesPerChar);
810                         return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
811                     }
812                     if (ch != cdata_string[i]) {
813                         return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
814                     }
815                     fCharacterCounter++;
816                     fCurrentOffset += fBytesPerChar;
817                 }
818                 return XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT;
819             }
820             break;
821         case '/':
822             fCharacterCounter++;
823             fCurrentOffset += fBytesPerChar;
824             return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG;
825         default:
826             return XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT;
827         }
828         return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
829     }
830     private int recognizeReference() throws Exception JavaDoc {
831         int ch = getChar(fCurrentOffset);
832         if (ch == -1) {
833             return XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT;
834         }
835         //
836
// [67] Reference ::= EntityRef | CharRef
837
// [68] EntityRef ::= '&' Name ';'
838
// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
839
//
840
if (ch == '#') {
841             fCharacterCounter++;
842             fCurrentOffset += fBytesPerChar;
843             return XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF;
844         } else {
845             return XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF;
846         }
847     }
848     public int scanContent(QName element) throws Exception JavaDoc {
849         int offset = fCurrentOffset;
850         int ch = getChar(fCurrentOffset);
851         fCurrentOffset += fBytesPerChar;
852         byte prop;
853         if (!fCalledCharPropInit) {
854             XMLCharacterProperties.initCharFlags();
855             fCalledCharPropInit = true;
856         }
857         if (ch < 0x80) {
858             if (ch == -1) {
859                 fCurrentOffset -= fBytesPerChar;
860                 return changeReaders().scanContent(element); // REVISIT - not quite...
861
}
862             prop = XMLCharacterProperties.fgCharFlags[ch];
863             if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0 && ch != 0x0A && ch != 0x0D) {
864                 if (ch == '<') {
865                     fCharacterCounter++;
866                     if (!fInCDSect) {
867                         return recognizeMarkup();
868                     }
869                 } else if (ch == '&') {
870                     fCharacterCounter++;
871                     if (!fInCDSect) {
872                         return recognizeReference();
873                     }
874                 } else if (ch == ']') {
875                     if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') {
876                         fCharacterCounter += 3;
877                         fCurrentOffset += (2 * fBytesPerChar);
878                         return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
879                     }
880                 } else {
881                     fCurrentOffset -= fBytesPerChar;
882                     return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
883                 }
884             } else if (ch == 0x20 || ch == 0x09 || ch == 0x0A || ch == 0x0D) {
885                 do {
886                     if (ch == 0x0A) {
887                         fLinefeedCounter++;
888                         fCharacterCounter = 1;
889                     } else if (ch == 0x0D) {
890                         fCarriageReturnCounter++;
891                         fCharacterCounter = 1;
892                     } else {
893                         fCharacterCounter++;
894                     }
895                     ch = getChar(fCurrentOffset);
896                     fCurrentOffset += fBytesPerChar;
897                 } while (ch == 0x20 || ch == 0x09 || ch == 0x0A || ch == 0x0D);
898                 if (ch < 0x80) {
899                     if (ch == -1) {
900                         fCurrentOffset -= fBytesPerChar;
901                         callCharDataHandler(offset, fCurrentOffset - offset, true);
902                         return changeReaders().scanContent(element); // REVISIT - not quite...
903
}
904                     prop = XMLCharacterProperties.fgCharFlags[ch];
905                     if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) {
906                         if (ch == '<') {
907                             if (!fInCDSect) {
908                                 callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true);
909                                 fCharacterCounter++;
910                                 return recognizeMarkup();
911                             }
912                             fCharacterCounter++;
913                         } else if (ch == '&') {
914                             if (!fInCDSect) {
915                                 callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true);
916                                 fCharacterCounter++;
917                                 return recognizeReference();
918                             }
919                             fCharacterCounter++;
920                         } else if (ch == ']') {
921                             if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') {
922                                 callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true);
923                                 fCharacterCounter += 3;
924                                 fCurrentOffset += (2 * fBytesPerChar);
925                                 return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
926                             }
927                         } else {
928                             fCurrentOffset -= fBytesPerChar;
929                             callCharDataHandler(offset, fCurrentOffset - offset, true);
930                             return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
931                         }
932                     }
933                 } else {
934                     if (ch >= 0xD800 && ch <= 0xDFFF) {
935                         fCurrentOffset += fBytesPerChar;
936                     } else if (ch == 0xFFFE || ch == 0xFFFF) {
937                         fCurrentOffset -= fBytesPerChar;
938                         callCharDataHandler(offset, fCurrentOffset - offset, true);
939                         return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
940                     }
941                 }
942             }
943         } else {
944             if (ch >= 0xD800 && ch <= 0xDFFF) {
945                 fCurrentOffset += fBytesPerChar;
946             } else if (ch == 0xFFFE || ch == 0xFFFF) {
947                 fCurrentOffset -= fBytesPerChar;
948                 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
949             }
950         }
951         fCharacterCounter++;
952         while (true) {
953             ch = getChar(fCurrentOffset);
954             fCurrentOffset += fBytesPerChar;
955             if (ch >= 0x80 || ch < 0)
956                 break;
957             prop = XMLCharacterProperties.fgCharFlags[ch];
958             if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) {
959                 if (ch == 0x0A) {
960                     fLinefeedCounter++;
961                     fCharacterCounter = 1;
962                 } else if (ch == 0x0D) {
963                     fCarriageReturnCounter++;
964                     fCharacterCounter = 1;
965                 } else
966                     break;
967             } else
968                 fCharacterCounter++;
969         }
970         while (true) { // REVISIT - EOF check ?
971
if (ch < 0x80) {
972                 if (ch == -1) {
973                     fCurrentOffset -= fBytesPerChar;
974                     callCharDataHandler(offset, fCurrentOffset - offset, false);
975                     return changeReaders().scanContent(element); // REVISIT - not quite...
976
}
977                 prop = XMLCharacterProperties.fgCharFlags[ch];
978                 if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) {
979                     if (ch == '<') {
980                         if (!fInCDSect) {
981                             callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false);
982                             fCharacterCounter++;
983                             return recognizeMarkup();
984                         }
985                         fCharacterCounter++;
986                     } else if (ch == '&') {
987                         if (!fInCDSect) {
988                             callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false);
989                             fCharacterCounter++;
990                             return recognizeReference();
991                         }
992                         fCharacterCounter++;
993                     } else if (ch == 0x0A) {
994                         fLinefeedCounter++;
995                         fCharacterCounter = 1;
996                     } else if (ch == 0x0D) {
997                         fCarriageReturnCounter++;
998                         fCharacterCounter = 1;
999                     } else if (ch == ']') {
1000                        if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') {
1001                            callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false);
1002                            fCharacterCounter += 3;
1003                            fCurrentOffset += (2 * fBytesPerChar);
1004                            return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
1005                        }
1006                        fCharacterCounter++;
1007                    } else {
1008                        fCurrentOffset -= fBytesPerChar;
1009                        callCharDataHandler(offset, fCurrentOffset - offset, false);
1010                        return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
1011                    }
1012                } else {
1013                    fCharacterCounter++;
1014                }
1015            } else {
1016                if (ch >= 0xD800 && ch <= 0xDFFF) {
1017                    fCharacterCounter++;
1018                    fCurrentOffset += fBytesPerChar;
1019                } else if (ch == 0xFFFE || ch == 0xFFFF) {
1020                    fCurrentOffset -= fBytesPerChar;
1021                    callCharDataHandler(offset, fCurrentOffset - offset, false);
1022                    return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
1023                }
1024                fCharacterCounter++;
1025            }
1026            ch = getChar(fCurrentOffset);
1027            fCurrentOffset += fBytesPerChar;
1028        }
1029    }
1030}
1031
Popular Tags