KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > batik > xml > XMLScanner


1 /*
2
3    Copyright 2002-2003 The Apache Software Foundation
4
5    Licensed under the Apache License, Version 2.0 (the "License");
6    you may not use this file except in compliance with the License.
7    You may obtain a copy of the License at
8
9        http://www.apache.org/licenses/LICENSE-2.0
10
11    Unless required by applicable law or agreed to in writing, software
12    distributed under the License is distributed on an "AS IS" BASIS,
13    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14    See the License for the specific language governing permissions and
15    limitations under the License.
16
17  */

18 package org.apache.batik.xml;
19
20 import java.io.IOException JavaDoc;
21 import java.io.InputStream JavaDoc;
22 import java.io.Reader JavaDoc;
23 import java.util.Locale JavaDoc;
24 import java.util.MissingResourceException JavaDoc;
25
26 import org.apache.batik.i18n.Localizable;
27 import org.apache.batik.i18n.LocalizableSupport;
28 import org.apache.batik.util.io.NormalizingReader;
29 import org.apache.batik.util.io.StreamNormalizingReader;
30 import org.apache.batik.util.io.StringNormalizingReader;
31
32 /**
33  * This class represents a scanner for XML documents.
34  *
35  * @author <a HREF="mailto:stephane@hillion.org">Stephane Hillion</a>
36  * @version $Id: XMLScanner.java,v 1.8 2005/03/27 08:58:37 cam Exp $
37  */

38 public class XMLScanner implements Localizable {
39
40     /**
41      * The document start context.
42      */

43     public final static int DOCUMENT_START_CONTEXT = 0;
44
45     /**
46      * The top level context.
47      */

48     public final static int TOP_LEVEL_CONTEXT = 1;
49
50     /**
51      * The processing instruction context.
52      */

53     public final static int PI_CONTEXT = 2;
54
55     /**
56      * The XML declaration context.
57      */

58     public final static int XML_DECL_CONTEXT = 3;
59
60     /**
61      * The doctype context.
62      */

63     public final static int DOCTYPE_CONTEXT = 4;
64
65     /**
66      * The start tag context.
67      */

68     public final static int START_TAG_CONTEXT = 5;
69
70     /**
71      * The content context.
72      */

73     public final static int CONTENT_CONTEXT = 6;
74
75     /**
76      * The DTD declarations context.
77      */

78     public final static int DTD_DECLARATIONS_CONTEXT = 7;
79
80     /**
81      * The CDATA section context.
82      */

83     public final static int CDATA_SECTION_CONTEXT = 8;
84
85     /**
86      * The end tag context.
87      */

88     public final static int END_TAG_CONTEXT = 9;
89
90     /**
91      * The attribute value context.
92      */

93     public final static int ATTRIBUTE_VALUE_CONTEXT = 10;
94
95     /**
96      * The ATTLIST context.
97      */

98     public final static int ATTLIST_CONTEXT = 11;
99
100     /**
101      * The element declaration context.
102      */

103     public final static int ELEMENT_DECLARATION_CONTEXT = 12;
104
105     /**
106      * The entity context.
107      */

108     public final static int ENTITY_CONTEXT = 13;
109
110     /**
111      * The notation context.
112      */

113     public final static int NOTATION_CONTEXT = 14;
114
115     /**
116      * The notation type context.
117      */

118     public final static int NOTATION_TYPE_CONTEXT = 15;
119
120     /**
121      * The enumeration context.
122      */

123     public final static int ENUMERATION_CONTEXT = 16;
124
125     /**
126      * The entity value context.
127      */

128     public final static int ENTITY_VALUE_CONTEXT = 17;
129
130     /**
131      * The default resource bundle base name.
132      */

133     protected final static String JavaDoc BUNDLE_CLASSNAME =
134     "org.apache.batik.xml.resources.Messages";
135
136     /**
137      * The localizable support.
138      */

139     protected LocalizableSupport localizableSupport =
140         new LocalizableSupport(BUNDLE_CLASSNAME,
141                                XMLScanner.class.getClassLoader());
142
143     /**
144      * The reader.
145      */

146     protected NormalizingReader reader;
147
148     /**
149      * The current char.
150      */

151     protected int current;
152
153     /**
154      * The type of the current lexical unit.
155      */

156     protected int type;
157
158     /**
159      * The recording buffer.
160      */

161     protected char[] buffer = new char[1024];
162
163     /**
164      * The current position in the buffer.
165      */

166     protected int position;
167
168     /**
169      * The start offset of the last lexical unit.
170      */

171     protected int start;
172
173     /**
174      * The end offset of the last lexical unit.
175      */

176     protected int end;
177
178     /**
179      * The current scanning context.
180      */

181     protected int context;
182
183     /**
184      * The depth in the xml tree.
185      */

186     protected int depth;
187
188     /**
189      * A PI end has been previously read.
190      */

191     protected boolean piEndRead;
192
193     /**
194      * The scanner is in the internal DTD.
195      */

196     protected boolean inDTD;
197
198     /**
199      * The last attribute delimiter encountered.
200      */

201     protected char attrDelimiter;
202
203     /**
204      * A CDATA section end is the next token
205      */

206     protected boolean cdataEndRead;
207
208     /**
209      * Creates a new XML scanner.
210      * @param r The reader to scan.
211      */

212     public XMLScanner(Reader JavaDoc r) throws XMLException {
213         context = DOCUMENT_START_CONTEXT;
214         try {
215             reader = new StreamNormalizingReader(r);
216             current = nextChar();
217         } catch (IOException JavaDoc e) {
218             throw new XMLException(e);
219         }
220     }
221
222     /**
223      * Creates a new XML scanner.
224      * @param is The input stream to scan.
225      * @param enc The character encoding to use.
226      */

227     public XMLScanner(InputStream JavaDoc is, String JavaDoc enc) throws XMLException {
228         context = DOCUMENT_START_CONTEXT;
229         try {
230             reader = new StreamNormalizingReader(is, enc);
231             current = nextChar();
232         } catch (IOException JavaDoc e) {
233             throw new XMLException(e);
234         }
235     }
236
237     /**
238      * Creates a new XML scanner.
239      * @param s The string to parse.
240      */

241     public XMLScanner(String JavaDoc s) throws XMLException {
242         context = DOCUMENT_START_CONTEXT;
243         try {
244             reader = new StringNormalizingReader(s);
245             current = nextChar();
246         } catch (IOException JavaDoc e) {
247             throw new XMLException(e);
248         }
249     }
250
251     /**
252      * Implements {@link org.apache.batik.i18n.Localizable#setLocale(Locale)}.
253      */

254     public void setLocale(Locale JavaDoc l) {
255     localizableSupport.setLocale(l);
256     }
257
258     /**
259      * Implements {@link org.apache.batik.i18n.Localizable#getLocale()}.
260      */

261     public Locale JavaDoc getLocale() {
262         return localizableSupport.getLocale();
263     }
264
265     /**
266      * Implements {@link
267      * org.apache.batik.i18n.Localizable#formatMessage(String,Object[])}.
268      */

269     public String JavaDoc formatMessage(String JavaDoc key, Object JavaDoc[] args)
270         throws MissingResourceException JavaDoc {
271         return localizableSupport.formatMessage(key, args);
272     }
273
274     /**
275      * Sets the current depth in the XML tree.
276      */

277     public void setDepth(int i) {
278     depth = i;
279     }
280
281     /**
282      * Returns the current depth in the XML tree.
283      */

284     public int getDepth() {
285         return depth;
286     }
287
288     /**
289      * Sets the current context.
290      */

291     public void setContext(int c) {
292     context = c;
293     }
294
295     /**
296      * Returns the current context.
297      */

298     public int getContext() {
299         return context;
300     }
301
302     /**
303      * The current lexical unit type like defined in LexicalUnits.
304      */

305     public int getType() {
306         return type;
307     }
308
309     /**
310      * Returns the current line.
311      */

312     public int getLine() {
313         return reader.getLine();
314     }
315
316     /**
317      * Returns the current column.
318      */

319     public int getColumn() {
320         return reader.getColumn();
321     }
322
323     /**
324      * Returns the buffer used to store the chars.
325      */

326     public char[] getBuffer() {
327         return buffer;
328     }
329
330     /**
331      * Returns the start offset of the last lexical unit.
332      */

333     public int getStart() {
334         return start;
335     }
336
337     /**
338      * Returns the end offset of the last lexical unit.
339      */

340     public int getEnd() {
341         return end;
342     }
343
344     /**
345      * Returns the last encountered string delimiter.
346      */

347     public char getStringDelimiter() {
348         return attrDelimiter;
349     }
350
351     /**
352      * Returns the start offset of the current lexical unit.
353      */

354     public int getStartOffset() {
355         switch (type) {
356         case LexicalUnits.SECTION_END:
357             return -3;
358
359         case LexicalUnits.PI_END:
360             return -2;
361
362         case LexicalUnits.STRING:
363         case LexicalUnits.ENTITY_REFERENCE:
364         case LexicalUnits.PARAMETER_ENTITY_REFERENCE:
365         case LexicalUnits.START_TAG:
366         case LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT:
367             return 1;
368
369         case LexicalUnits.PI_START:
370         case LexicalUnits.END_TAG:
371         case LexicalUnits.CHARACTER_REFERENCE:
372             return 2;
373
374         case LexicalUnits.COMMENT:
375             return 4;
376
377         default:
378             return 0;
379         }
380     }
381
382     /**
383      * Returns the end offset of the current lexical unit.
384      */

385     public int getEndOffset() {
386         switch (type) {
387         case LexicalUnits.STRING:
388         case LexicalUnits.ENTITY_REFERENCE:
389         case LexicalUnits.CHARACTER_REFERENCE:
390         case LexicalUnits.PARAMETER_ENTITY_REFERENCE:
391         case LexicalUnits.LAST_ATTRIBUTE_FRAGMENT:
392             return -1;
393
394         case LexicalUnits.PI_DATA:
395             return -2;
396
397         case LexicalUnits.COMMENT:
398             return -3;
399
400         case LexicalUnits.CHARACTER_DATA:
401             if (cdataEndRead) {
402                 return -3;
403             }
404             return 0;
405
406         default:
407             return 0;
408         }
409     }
410
411     /**
412      * Clears the buffer.
413      */

414     public void clearBuffer() {
415         if (position <= 0) {
416             position = 0;
417         } else {
418             buffer[0] = buffer[position - 1];
419             position = 1;
420         }
421     }
422
423     /**
424      * Advances to the next lexical unit.
425      * @return The type of the lexical unit like defined in LexicalUnits.
426      */

427     public int next() throws XMLException {
428     return next(context);
429     }
430
431     /**
432      * Advances to the next lexical unit.
433      * @param ctx The context to use for scanning.
434      * @return The type of the lexical unit like defined in LexicalUnits.
435      */

436     public int next(int ctx) throws XMLException {
437         start = position - 1;
438         try {
439             switch (ctx) {
440             case DOCUMENT_START_CONTEXT:
441                 type = nextInDocumentStart();
442                 break;
443
444         case TOP_LEVEL_CONTEXT:
445         type = nextInTopLevel();
446                 break;
447
448         case PI_CONTEXT:
449         type = nextInPI();
450                 break;
451
452         case START_TAG_CONTEXT:
453         type = nextInStartTag();
454                 break;
455
456         case ATTRIBUTE_VALUE_CONTEXT:
457         type = nextInAttributeValue();
458                 break;
459
460         case CONTENT_CONTEXT:
461         type = nextInContent();
462                 break;
463
464         case END_TAG_CONTEXT:
465         type = nextInEndTag();
466                 break;
467
468         case CDATA_SECTION_CONTEXT:
469         type = nextInCDATASection();
470                 break;
471
472         case XML_DECL_CONTEXT:
473         type = nextInXMLDecl();
474                 break;
475
476         case DOCTYPE_CONTEXT:
477         type = nextInDoctype();
478                 break;
479
480         case DTD_DECLARATIONS_CONTEXT:
481         type = nextInDTDDeclarations();
482                 break;
483
484         case ELEMENT_DECLARATION_CONTEXT:
485         type = nextInElementDeclaration();
486                 break;
487
488         case ATTLIST_CONTEXT:
489         type = nextInAttList();
490                 break;
491
492         case NOTATION_CONTEXT:
493         type = nextInNotation();
494                 break;
495
496         case ENTITY_CONTEXT:
497         type = nextInEntity();
498                 break;
499
500         case ENTITY_VALUE_CONTEXT:
501         return nextInEntityValue();
502
503             case NOTATION_TYPE_CONTEXT:
504                 return nextInNotationType();
505
506             case ENUMERATION_CONTEXT:
507                 return nextInEnumeration();
508
509             default:
510                 throw new InternalError JavaDoc();
511             }
512         } catch (IOException JavaDoc e) {
513             throw new XMLException(e);
514         }
515         end = position - ((current == -1) ? 0 : 1);
516         return type;
517     }
518
519     /**
520      * Reads the first token in the stream.
521      */

522     protected int nextInDocumentStart() throws IOException JavaDoc, XMLException {
523         switch (current) {
524     case 0x9:
525     case 0xA:
526     case 0xD:
527     case 0x20:
528         do {
529         nextChar();
530         } while (current != -1 && XMLUtilities.isXMLSpace((char)current));
531         context = (depth == 0) ? TOP_LEVEL_CONTEXT : CONTENT_CONTEXT;
532         return LexicalUnits.S;
533
534         case '<':
535             switch (nextChar()) {
536             case '?':
537                 int c1 = nextChar();
538         if (c1 == -1 ||
539                     !XMLUtilities.isXMLNameFirstCharacter((char)c1)) {
540             throw createXMLException("invalid.pi.target");
541         }
542                 context = PI_CONTEXT;
543                 int c2 = nextChar();
544         if (c2 == -1 || !XMLUtilities.isXMLNameCharacter((char)c2)) {
545             return LexicalUnits.PI_START;
546         }
547                 int c3 = nextChar();
548         if (c3 == -1 || !XMLUtilities.isXMLNameCharacter((char)c3)) {
549             return LexicalUnits.PI_START;
550         }
551                 int c4 = nextChar();
552         if (c4 != -1 && XMLUtilities.isXMLNameCharacter((char)c4)) {
553             do {
554             nextChar();
555             } while (current != -1 &&
556                              XMLUtilities.isXMLNameCharacter((char)current));
557             return LexicalUnits.PI_START;
558         }
559         if (c1 == 'x' && c2 == 'm' && c3 == 'l') {
560             context = XML_DECL_CONTEXT;
561             return LexicalUnits.XML_DECL_START;
562         }
563         if ((c1 == 'x' || c1 == 'X') &&
564             (c2 == 'm' || c2 == 'M') &&
565             (c3 == 'l' || c3 == 'L')) {
566             throw createXMLException("xml.reserved");
567         }
568         return LexicalUnits.PI_START;
569                 
570             case '!':
571                 switch (nextChar()) {
572                 case '-':
573                     return readComment();
574
575                 case 'D':
576                     context = DOCTYPE_CONTEXT;
577                     return readIdentifier("OCTYPE",
578                                           LexicalUnits.DOCTYPE_START,
579                                           -1);
580
581                 default:
582                     throw createXMLException("invalid.doctype");
583                 }
584
585             default:
586                 context = START_TAG_CONTEXT;
587                 depth++;
588                 return readName(LexicalUnits.START_TAG);
589             }
590
591         case -1:
592             return LexicalUnits.EOF;
593             
594         default:
595             if (depth == 0) {
596                 throw createXMLException("invalid.character");
597             } else {
598                 return nextInContent();
599             }
600         }
601     }
602
603     /**
604      * Advances to the next lexical unit in the top level context.
605      * @return The type of the lexical unit like defined in LexicalUnits.
606      */

607     protected int nextInTopLevel() throws IOException JavaDoc, XMLException {
608     switch (current) {
609     case 0x9:
610     case 0xA:
611     case 0xD:
612     case 0x20:
613         do {
614         nextChar();
615         } while (current != -1 &&
616                      XMLUtilities.isXMLSpace((char)current));
617         return LexicalUnits.S;
618
619     case '<':
620         switch (nextChar()) {
621         case '?':
622         context = PI_CONTEXT;
623         return readPIStart();
624
625         case '!':
626         switch (nextChar()) {
627         case '-':
628             return readComment();
629
630         case 'D':
631             context = DOCTYPE_CONTEXT;
632             return readIdentifier("OCTYPE",
633                                           LexicalUnits.DOCTYPE_START,
634                                           -1);
635
636         default:
637             throw createXMLException("invalid.character");
638         }
639         default:
640         context = START_TAG_CONTEXT;
641         depth++;
642         return readName(LexicalUnits.START_TAG);
643         }
644
645     case -1:
646         return LexicalUnits.EOF;
647
648     default:
649         throw createXMLException("invalid.character");
650     }
651     }
652
653     /**
654      * Returns the next lexical unit in the context of a processing
655      * instruction.
656      */

657     protected int nextInPI() throws IOException JavaDoc, XMLException {
658     if (piEndRead) {
659         piEndRead = false;
660         context = (depth == 0) ? TOP_LEVEL_CONTEXT : CONTENT_CONTEXT;
661         return LexicalUnits.PI_END;
662     }
663
664     switch (current) {
665     case 0x9:
666     case 0xA:
667     case 0xD:
668     case 0x20:
669         do {
670         nextChar();
671         } while (current != -1 &&
672                      XMLUtilities.isXMLSpace((char)current));
673         return LexicalUnits.S;
674     case '?':
675         if (nextChar() != '>') {
676         throw createXMLException("pi.end.expected");
677         }
678         nextChar();
679         if (inDTD) {
680         context = DTD_DECLARATIONS_CONTEXT;
681         } else if (depth == 0) {
682         context = TOP_LEVEL_CONTEXT;
683         } else {
684         context = CONTENT_CONTEXT;
685         }
686         return LexicalUnits.PI_END;
687
688     default:
689         do {
690         do {
691             nextChar();
692         } while (current != -1 && current != '?');
693         nextChar();
694         } while (current != -1 && current != '>');
695         nextChar();
696         piEndRead = true;
697         return LexicalUnits.PI_DATA;
698     }
699     }
700
701     /**
702      * Returns the next lexical unit in the context of a start tag.
703      */

704     protected int nextInStartTag() throws IOException JavaDoc, XMLException {
705     switch (current) {
706     case 0x9:
707     case 0xA:
708     case 0xD:
709     case 0x20:
710         do {
711         nextChar();
712         } while (current != -1 && XMLUtilities.isXMLSpace((char)current));
713         return LexicalUnits.S;
714
715     case '/':
716         if (nextChar() != '>') {
717         throw createXMLException("malformed.tag.end");
718         }
719         nextChar();
720         context = (--depth == 0) ? TOP_LEVEL_CONTEXT : CONTENT_CONTEXT;
721         return LexicalUnits.EMPTY_ELEMENT_END;
722
723     case '>':
724         nextChar();
725         context = CONTENT_CONTEXT;
726         return LexicalUnits.END_CHAR;
727
728     case '=':
729         nextChar();
730         return LexicalUnits.EQ;
731
732     case '"':
733         attrDelimiter = '"';
734         nextChar();
735
736         for (;;) {
737         switch (current) {
738         case '"':
739                     nextChar();
740                     return LexicalUnits.STRING;
741
742         case '&':
743                     context = ATTRIBUTE_VALUE_CONTEXT;
744                     return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
745                     
746         case '<':
747                     throw createXMLException("invalid.character");
748
749         case -1:
750                     throw createXMLException("unexpected.eof");
751         }
752         nextChar();
753         }
754
755     case '\'':
756         attrDelimiter = '\'';
757             nextChar();
758
759         for (;;) {
760         switch (current) {
761         case '\'':
762                     nextChar();
763                     return LexicalUnits.STRING;
764
765         case '&':
766                     context = ATTRIBUTE_VALUE_CONTEXT;
767                     return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
768                     
769         case '<':
770                     throw createXMLException("invalid.character");
771
772         case -1:
773                     throw createXMLException("unexpected.eof");
774         }
775         nextChar();
776         }
777
778     default:
779         return readName(LexicalUnits.NAME);
780     }
781     }
782
783     /**
784      * Returns the next lexical unit in the context of an attribute value.
785      */

786     protected int nextInAttributeValue()
787         throws IOException JavaDoc, XMLException {
788     if (current == -1) {
789         return LexicalUnits.EOF;
790     }
791
792     if (current == '&') {
793         return readReference();
794
795     } else {
796         loop: for (;;) {
797         switch (current) {
798         case '&':
799         case '<':
800         case -1:
801             break loop;
802         case '"':
803         case '\'':
804             if (current == attrDelimiter) {
805             break loop;
806             }
807         }
808         nextChar();
809         }
810
811         switch (current) {
812         case -1:
813         break;
814
815         case '<':
816         throw createXMLException("invalid.character");
817
818             case '&':
819                 return LexicalUnits.ATTRIBUTE_FRAGMENT;
820
821         case '\'':
822         case '"':
823         nextChar();
824         if (inDTD) {
825             context = ATTLIST_CONTEXT;
826         } else {
827             context = START_TAG_CONTEXT;
828         }
829         }
830         return LexicalUnits.LAST_ATTRIBUTE_FRAGMENT;
831     }
832     }
833
834     /**
835      * Returns the next lexical unit in the context of an element content.
836      */

837     protected int nextInContent() throws IOException JavaDoc, XMLException {
838     switch (current) {
839     case -1:
840         return LexicalUnits.EOF;
841
842     case '&':
843         return readReference();
844
845     case '<':
846         switch (nextChar()) {
847         case '?':
848         context = PI_CONTEXT;
849         return readPIStart();
850
851         case '!':
852         switch (nextChar()) {
853         case '-':
854             return readComment();
855         case '[':
856             context = CDATA_SECTION_CONTEXT;
857             return readIdentifier("CDATA[",
858                                           LexicalUnits.CDATA_START,
859                                           -1);
860         default:
861             throw createXMLException("invalid.character");
862         }
863
864         case '/':
865         nextChar();
866         context = END_TAG_CONTEXT;
867         return readName(LexicalUnits.END_TAG);
868
869         default:
870         depth++;
871         context = START_TAG_CONTEXT;
872         return readName(LexicalUnits.START_TAG);
873         }
874
875     default:
876             loop: for (;;) {
877                 switch (current) {
878                 default:
879                     nextChar();
880                     break;
881
882                 case -1:
883                 case '&':
884                 case '<':
885                     break loop;
886                 }
887         }
888         return LexicalUnits.CHARACTER_DATA;
889     }
890     }
891
892     /**
893      * Returns the next lexical unit in the context of a end tag.
894      */

895     protected int nextInEndTag() throws IOException JavaDoc, XMLException {
896     switch (current) {
897     case 0x9:
898     case 0xA:
899     case 0xD:
900     case 0x20:
901         do {
902                 nextChar();
903         } while (current != -1 &&
904                      XMLUtilities.isXMLSpace((char)current));
905         return LexicalUnits.S;
906
907     case '>':
908         if (--depth < 0) {
909         throw createXMLException("unexpected.end.tag");
910         } else if (depth == 0) {
911         context = TOP_LEVEL_CONTEXT;
912         } else {
913         context = CONTENT_CONTEXT;
914         }
915         nextChar();
916         return LexicalUnits.END_CHAR;
917
918     default:
919         throw createXMLException("invalid.character");
920     }
921     }
922
923     /**
924      * Returns the next lexical unit in the context of a CDATA section.
925      */

926     protected int nextInCDATASection() throws IOException JavaDoc, XMLException {
927     if (cdataEndRead) {
928         cdataEndRead = false;
929         context = CONTENT_CONTEXT;
930         return LexicalUnits.SECTION_END;
931     }
932     
933     while (current != -1) {
934         while (current != ']' && current != -1) {
935         nextChar();
936         }
937         if (current != -1) {
938         nextChar();
939         if (current == ']') {
940             nextChar();
941             if (current == '>') {
942             break;
943             }
944         }
945         }
946     }
947     if (current == -1) {
948         throw createXMLException("unexpected.eof");
949     }
950     nextChar();
951     cdataEndRead = true;
952     return LexicalUnits.CHARACTER_DATA;
953     }
954
955     /**
956      * Returns the next lexical unit in the context of an XML declaration.
957      */

958     protected int nextInXMLDecl() throws IOException JavaDoc, XMLException {
959     switch (current) {
960     case 0x9:
961     case 0xA:
962     case 0xD:
963     case 0x20:
964         do {
965         nextChar();
966         } while (current != -1 &&
967                      XMLUtilities.isXMLSpace((char)current));
968         return LexicalUnits.S;
969     case 'v':
970         return readIdentifier("ersion",
971                                   LexicalUnits.VERSION_IDENTIFIER,
972                                   -1);
973     case 'e':
974         return readIdentifier("ncoding",
975                                   LexicalUnits.ENCODING_IDENTIFIER,
976                                   -1);
977     case 's':
978         return readIdentifier("tandalone",
979                                   LexicalUnits.STANDALONE_IDENTIFIER,
980                                   -1);
981     case '=':
982         nextChar();
983         return LexicalUnits.EQ;
984
985     case '?':
986         nextChar();
987         if (current != '>') {
988         throw createXMLException("pi.end.expected");
989         }
990         nextChar();
991         context = TOP_LEVEL_CONTEXT;
992         return LexicalUnits.PI_END;
993
994     case '"':
995             attrDelimiter = '"';
996         return readString();
997
998     case '\'':
999             attrDelimiter = '\'';
1000        return readString();
1001
1002    default:
1003        throw createXMLException("invalid.character");
1004    }
1005    }
1006
1007    /**
1008     * Returns the next lexical unit in the context of a doctype.
1009     */

1010    protected int nextInDoctype() throws IOException JavaDoc, XMLException {
1011    switch (current) {
1012    case 0x9:
1013    case 0xA:
1014    case 0xD:
1015    case 0x20:
1016        do {
1017        nextChar();
1018        } while (current != -1 &&
1019                     XMLUtilities.isXMLSpace((char)current));
1020        return LexicalUnits.S;
1021
1022    case '>':
1023        nextChar();
1024        context = TOP_LEVEL_CONTEXT;
1025        return LexicalUnits.END_CHAR;
1026
1027    case 'S':
1028        return readIdentifier("YSTEM",
1029                  LexicalUnits.SYSTEM_IDENTIFIER,
1030                  LexicalUnits.NAME);
1031
1032    case 'P':
1033        return readIdentifier("UBLIC",
1034                  LexicalUnits.PUBLIC_IDENTIFIER,
1035                  LexicalUnits.NAME);
1036
1037    case '"':
1038            attrDelimiter = '"';
1039        return readString();
1040
1041    case '\'':
1042            attrDelimiter = '\'';
1043        return readString();
1044
1045    case '[':
1046        nextChar();
1047        context = DTD_DECLARATIONS_CONTEXT;
1048        inDTD = true;
1049        return LexicalUnits.LSQUARE_BRACKET;
1050
1051    default:
1052        return readName(LexicalUnits.NAME);
1053    }
1054    }
1055
1056    /**
1057     * Returns the next lexical unit in the context dtd declarations.
1058     */

1059    protected int nextInDTDDeclarations() throws IOException JavaDoc, XMLException {
1060    switch (current) {
1061    case 0x9:
1062    case 0xA:
1063    case 0xD:
1064    case 0x20:
1065        do {
1066        nextChar();
1067        } while (current != -1 &&
1068                     XMLUtilities.isXMLSpace((char)current));
1069        return LexicalUnits.S;
1070
1071    case ']':
1072        nextChar();
1073        context = DOCTYPE_CONTEXT;
1074        inDTD = false;
1075        return LexicalUnits.RSQUARE_BRACKET;
1076
1077    case '%':
1078        return readPEReference();
1079
1080    case '<':
1081        switch (nextChar()) {
1082        case '?':
1083        context = PI_CONTEXT;
1084        return readPIStart();
1085
1086        case '!':
1087        switch (nextChar()) {
1088        case '-':
1089            return readComment();
1090
1091        case 'E':
1092            switch (nextChar()) {
1093            case 'L':
1094            context = ELEMENT_DECLARATION_CONTEXT;
1095            return readIdentifier
1096                            ("EMENT",
1097                             LexicalUnits.ELEMENT_DECLARATION_START,
1098                             -1);
1099            case 'N':
1100            context = ENTITY_CONTEXT;
1101            return readIdentifier("TITY",
1102                          LexicalUnits.ENTITY_START,
1103                          -1);
1104            default:
1105            throw createXMLException("invalid.character");
1106            }
1107
1108        case 'A':
1109            context = ATTLIST_CONTEXT;
1110            return readIdentifier("TTLIST",
1111                      LexicalUnits.ATTLIST_START,
1112                      -1);
1113        case 'N':
1114            context = NOTATION_CONTEXT;
1115            return readIdentifier("OTATION",
1116                      LexicalUnits.NOTATION_START,
1117                      -1);
1118        default:
1119            throw createXMLException("invalid.character");
1120        }
1121        default:
1122        throw createXMLException("invalid.character");
1123        }
1124    default:
1125        throw createXMLException("invalid.character");
1126    }
1127    }
1128
1129    /**
1130     * Reads a simple string, like the ones used for version, encoding,
1131     * public/system identifiers...
1132     * The current character must be the string delimiter.
1133     * @return type.
1134     */

1135    protected int readString() throws IOException JavaDoc, XMLException {
1136        do {
1137        nextChar();
1138        } while (current != -1 && current != attrDelimiter);
1139    if (current == -1) {
1140        throw createXMLException("unexpected.eof");
1141    }
1142    nextChar();
1143    return LexicalUnits.STRING;
1144    }
1145
1146    /**
1147     * Reads a comment. '&lt;!-' must have been read.
1148     */

1149    protected int readComment() throws IOException JavaDoc, XMLException {
1150    if (nextChar() != '-') {
1151        throw createXMLException("malformed.comment");
1152    }
1153    int c = nextChar();
1154    while (c != -1) {
1155        while (c != -1 && c != '-') {
1156        c = nextChar();
1157        }
1158        c = nextChar();
1159        if (c == '-') {
1160        break;
1161        }
1162    }
1163    if (c == -1) {
1164        throw createXMLException("unexpected.eof");
1165    }
1166    c = nextChar();
1167    if (c != '>') {
1168        throw createXMLException("malformed.comment");
1169    }
1170    nextChar();
1171    return LexicalUnits.COMMENT;
1172    }
1173
1174    /**
1175     * Reads the given identifier.
1176     * @param s The portion of the identifier to read.
1177     * @param type The lexical unit type of the identifier.
1178     * @param ntype The lexical unit type to set if the identifier do not
1179     * match or -1 if an error must be signaled.
1180     */

1181    protected int readIdentifier(String JavaDoc s, int type, int ntype)
1182    throws IOException JavaDoc, XMLException {
1183    int len = s.length();
1184    for (int i = 0; i < len; i++) {
1185        nextChar();
1186        if (current != s.charAt(i)) {
1187        if (ntype == -1) {
1188            throw createXMLException("invalid.character");
1189        } else {
1190            while (current != -1 &&
1191                           XMLUtilities.isXMLNameCharacter((char)current)) {
1192            nextChar();
1193            }
1194            return ntype;
1195        }
1196        }
1197    }
1198    nextChar();
1199    return type;
1200    }
1201
1202    /**
1203     * Reads a name. The current character must be the first character.
1204     * @param type The lexical unit type to set.
1205     * @return type.
1206     */

1207    protected int readName(int type) throws IOException JavaDoc, XMLException {
1208    if (current == -1) {
1209        throw createXMLException("unexpected.eof");
1210    }
1211    if (!XMLUtilities.isXMLNameFirstCharacter((char)current)) {
1212        throw createXMLException("invalid.name");
1213    }
1214    do {
1215        nextChar();
1216    } while (current != -1 &&
1217                 XMLUtilities.isXMLNameCharacter((char)current));
1218    return type;
1219    }
1220
1221
1222    /**
1223     * Reads a processing instruction start.
1224     * @return type.
1225     */

1226    protected int readPIStart() throws IOException JavaDoc, XMLException {
1227    int c1 = nextChar();
1228    if (c1 == -1) {
1229        throw createXMLException("unexpected.eof");
1230    }
1231    if (!XMLUtilities.isXMLNameFirstCharacter((char)current)) {
1232        throw createXMLException("malformed.pi.target");
1233    }
1234    int c2 = nextChar();
1235    if (c2 == -1 || !XMLUtilities.isXMLNameCharacter((char)c2)) {
1236        return LexicalUnits.PI_START;
1237    }
1238    int c3 = nextChar();
1239    if (c3 == -1 || !XMLUtilities.isXMLNameCharacter((char)c3)) {
1240        return LexicalUnits.PI_START;
1241    }
1242    int c4 = nextChar();
1243    if (c4 != -1 && XMLUtilities.isXMLNameCharacter((char)c4)) {
1244        do {
1245        nextChar();
1246        } while (current != -1 &&
1247                     XMLUtilities.isXMLNameCharacter((char)current));
1248        return LexicalUnits.PI_START;
1249    }
1250    if ((c1 == 'x' || c1 == 'X') &&
1251        (c2 == 'm' || c2 == 'M') &&
1252        (c3 == 'l' || c3 == 'L')) {
1253        throw createXMLException("xml.reserved");
1254    }
1255    return LexicalUnits.PI_START;
1256    }
1257
1258    /**
1259     * Returns the next lexical unit in the context of a element declaration.
1260     */

1261    protected int nextInElementDeclaration() throws IOException JavaDoc, XMLException {
1262    switch (current) {
1263    case 0x9:
1264    case 0xA:
1265    case 0xD:
1266    case 0x20:
1267        do {
1268        nextChar();
1269        } while (current != -1 && XMLUtilities.isXMLSpace((char)current));
1270        return LexicalUnits.S;
1271
1272    case '>':
1273        nextChar();
1274        context = DTD_DECLARATIONS_CONTEXT;
1275        return LexicalUnits.END_CHAR;
1276
1277    case '%':
1278            nextChar();
1279        int t = readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1280        if (current != ';') {
1281        throw createXMLException("malformed.parameter.entity");
1282        }
1283        nextChar();
1284        return t;
1285
1286    case 'E':
1287        return readIdentifier("MPTY",
1288                  LexicalUnits.EMPTY_IDENTIFIER,
1289                  LexicalUnits.NAME);
1290
1291    case 'A':
1292        return readIdentifier("NY",
1293                  LexicalUnits.ANY_IDENTIFIER,
1294                  LexicalUnits.NAME);
1295
1296    case '?':
1297        nextChar();
1298        return LexicalUnits.QUESTION;
1299
1300    case '+':
1301        nextChar();
1302        return LexicalUnits.PLUS;
1303
1304    case '*':
1305        nextChar();
1306        return LexicalUnits.STAR;
1307
1308    case '(':
1309        nextChar();
1310        return LexicalUnits.LEFT_BRACE;
1311
1312    case ')':
1313        nextChar();
1314        return LexicalUnits.RIGHT_BRACE;
1315
1316    case '|':
1317        nextChar();
1318        return LexicalUnits.PIPE;
1319
1320    case ',':
1321        nextChar();
1322        return LexicalUnits.COMMA;
1323
1324    case '#':
1325        return readIdentifier("PCDATA",
1326                                  LexicalUnits.PCDATA_IDENTIFIER,
1327                                  -1);
1328
1329    default:
1330        return readName(LexicalUnits.NAME);
1331    }
1332    }
1333
1334    /**
1335     * Returns the next lexical unit in the context of an attribute list.
1336     */

1337    protected int nextInAttList() throws IOException JavaDoc, XMLException {
1338    switch (current) {
1339    case 0x9:
1340    case 0xA:
1341    case 0xD:
1342    case 0x20:
1343        do {
1344        nextChar();
1345        } while (current != -1 && XMLUtilities.isXMLSpace((char)current));
1346        return LexicalUnits.S;
1347
1348    case '>':
1349        nextChar();
1350        context = DTD_DECLARATIONS_CONTEXT;
1351        return type = LexicalUnits.END_CHAR;
1352        
1353    case '%':
1354        int t = readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1355        if (current != ';') {
1356        throw createXMLException("malformed.parameter.entity");
1357        }
1358        nextChar();
1359        return t;
1360
1361    case 'C':
1362        return readIdentifier("DATA",
1363                  LexicalUnits.CDATA_IDENTIFIER,
1364                  LexicalUnits.NAME);
1365
1366    case 'I':
1367        nextChar();
1368        if (current != 'D') {
1369        do {
1370            nextChar();
1371        } while (current != -1 &&
1372                         XMLUtilities.isXMLNameCharacter((char)current));
1373        return LexicalUnits.NAME;
1374        }
1375        nextChar();
1376        if (current == -1 ||
1377                !XMLUtilities.isXMLNameCharacter((char)current)) {
1378        return LexicalUnits.ID_IDENTIFIER;
1379        }
1380        if (current != 'R') {
1381        do {
1382            nextChar();
1383        } while (current != -1 &&
1384                         XMLUtilities.isXMLNameCharacter((char)current));
1385        return LexicalUnits.NAME;
1386        }
1387        nextChar();
1388        if (current == -1 ||
1389                !XMLUtilities.isXMLNameCharacter((char)current)) {
1390        return LexicalUnits.NAME;
1391        }
1392        if (current != 'E') {
1393        do {
1394            nextChar();
1395        } while (current != -1 &&
1396                         XMLUtilities.isXMLNameCharacter((char)current));
1397        return LexicalUnits.NAME;
1398        }
1399        nextChar();
1400        if (current == -1 ||
1401                !XMLUtilities.isXMLNameCharacter((char)current)) {
1402        return LexicalUnits.NAME;
1403        }
1404        if (current != 'F') {
1405        do {
1406            nextChar();
1407        } while (current != -1 &&
1408                         XMLUtilities.isXMLNameCharacter((char)current));
1409        return LexicalUnits.NAME;
1410        }
1411        nextChar();
1412        if (current == -1 ||
1413                !XMLUtilities.isXMLNameCharacter((char)current)) {
1414        return LexicalUnits.IDREF_IDENTIFIER;
1415        }
1416        if (current != 'S') {
1417        do {
1418            nextChar();
1419        } while (current != -1 &&
1420                         XMLUtilities.isXMLNameCharacter((char)current));
1421        return LexicalUnits.NAME;
1422        }
1423        nextChar();
1424        if (current == -1 ||
1425                !XMLUtilities.isXMLNameCharacter((char)current)) {
1426        return LexicalUnits.IDREFS_IDENTIFIER;
1427        }
1428        do {
1429        nextChar();
1430        } while (current != -1 &&
1431                     XMLUtilities.isXMLNameCharacter((char)current));
1432        return type = LexicalUnits.NAME;
1433
1434    case 'N':
1435            switch (nextChar()) {
1436            default:
1437        do {
1438            nextChar();
1439        } while (current != -1 &&
1440                         XMLUtilities.isXMLNameCharacter((char)current));
1441        return LexicalUnits.NAME;
1442
1443            case 'O':
1444                context = NOTATION_TYPE_CONTEXT;
1445                return readIdentifier("TATION",
1446                                      LexicalUnits.NOTATION_IDENTIFIER,
1447                                      LexicalUnits.NAME);
1448
1449            case 'M':
1450                nextChar();
1451                if (current == -1 ||
1452                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1453                    return LexicalUnits.NAME;
1454                }
1455                if (current != 'T') {
1456                    do {
1457                        nextChar();
1458                    } while (current != -1 &&
1459                             XMLUtilities.isXMLNameCharacter((char)current));
1460                    return LexicalUnits.NAME;
1461                }
1462                nextChar();
1463                if (current == -1 ||
1464                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1465                    return LexicalUnits.NAME;
1466                }
1467                if (current != 'O') {
1468                    do {
1469                        nextChar();
1470                    } while (current != -1 &&
1471                             XMLUtilities.isXMLNameCharacter((char)current));
1472                    return LexicalUnits.NAME;
1473                }
1474                nextChar();
1475                if (current == -1 ||
1476                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1477                    return LexicalUnits.NAME;
1478                }
1479                if (current != 'K') {
1480                    do {
1481                        nextChar();
1482                    } while (current != -1 &&
1483                             XMLUtilities.isXMLNameCharacter((char)current));
1484                    return LexicalUnits.NAME;
1485                }
1486                nextChar();
1487                if (current == -1 ||
1488                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1489                    return LexicalUnits.NAME;
1490                }
1491                if (current != 'E') {
1492                    do {
1493                        nextChar();
1494                    } while (current != -1 &&
1495                             XMLUtilities.isXMLNameCharacter((char)current));
1496                    return LexicalUnits.NAME;
1497                }
1498                nextChar();
1499                if (current == -1 ||
1500                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1501                    return LexicalUnits.NAME;
1502                }
1503                if (current != 'N') {
1504                    do {
1505                        nextChar();
1506                    } while (current != -1 &&
1507                             XMLUtilities.isXMLNameCharacter((char)current));
1508                    return LexicalUnits.NAME;
1509                }
1510                nextChar();
1511                if (current == -1 ||
1512                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1513                    return LexicalUnits.NMTOKEN_IDENTIFIER;
1514                }
1515                if (current != 'S') {
1516                    do {
1517                        nextChar();
1518                    } while (current != -1 &&
1519                             XMLUtilities.isXMLNameCharacter((char)current));
1520                    return LexicalUnits.NAME;
1521                }
1522                nextChar();
1523                if (current == -1 ||
1524                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1525                    return LexicalUnits.NMTOKENS_IDENTIFIER;
1526                }
1527                do {
1528                    nextChar();
1529                } while (current != -1 &&
1530                         XMLUtilities.isXMLNameCharacter((char)current));
1531                return LexicalUnits.NAME;
1532            }
1533
1534    case 'E':
1535        nextChar();
1536        if (current != 'N') {
1537        do {
1538            nextChar();
1539        } while (current != -1 &&
1540                         XMLUtilities.isXMLNameCharacter((char)current));
1541        return LexicalUnits.NAME;
1542        }
1543        nextChar();
1544        if (current == -1 ||
1545                !XMLUtilities.isXMLNameCharacter((char)current)) {
1546        return LexicalUnits.NAME;
1547        }
1548        if (current != 'T') {
1549        do {
1550            nextChar();
1551        } while (current != -1 &&
1552                         XMLUtilities.isXMLNameCharacter((char)current));
1553        return LexicalUnits.NAME;
1554        }
1555        nextChar();
1556        if (current == -1 ||
1557                !XMLUtilities.isXMLNameCharacter((char)current)) {
1558        return LexicalUnits.NAME;
1559        }
1560        if (current != 'I') {
1561        do {
1562            nextChar();
1563        } while (current != -1 &&
1564                         XMLUtilities.isXMLNameCharacter((char)current));
1565        return LexicalUnits.NAME;
1566        }
1567        nextChar();
1568        if (current == -1 ||
1569                !XMLUtilities.isXMLNameCharacter((char)current)) {
1570        return LexicalUnits.NAME;
1571        }
1572        if (current != 'T') {
1573        do {
1574            nextChar();
1575        } while (current != -1 &&
1576                         XMLUtilities.isXMLNameCharacter((char)current));
1577        return type = LexicalUnits.NAME;
1578        }
1579        nextChar();
1580        if (current == -1 ||
1581                !XMLUtilities.isXMLNameCharacter((char)current)) {
1582        return LexicalUnits.NAME;
1583        }
1584        switch (current) {
1585        case 'Y':
1586        nextChar();
1587        if (current == -1 ||
1588                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1589            return LexicalUnits.ENTITY_IDENTIFIER;
1590        }
1591        do {
1592            nextChar();
1593        } while (current != -1 &&
1594                         XMLUtilities.isXMLNameCharacter((char)current));
1595        return LexicalUnits.NAME;
1596        case 'I':
1597        nextChar();
1598        if (current == -1 ||
1599                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1600            return LexicalUnits.NAME;
1601        }
1602        if (current != 'E') {
1603            do {
1604            nextChar();
1605            } while (current != -1 &&
1606                             XMLUtilities.isXMLNameCharacter((char)current));
1607            return LexicalUnits.NAME;
1608        }
1609        nextChar();
1610        if (current == -1 ||
1611                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1612            return LexicalUnits.NAME;
1613        }
1614        if (current != 'S') {
1615            do {
1616            nextChar();
1617            } while (current != -1 &&
1618                             XMLUtilities.isXMLNameCharacter((char)current));
1619            return LexicalUnits.NAME;
1620        }
1621        return LexicalUnits.ENTITIES_IDENTIFIER;
1622
1623        default:
1624        if (current == -1 ||
1625                    !XMLUtilities.isXMLNameCharacter((char)current)) {
1626            return LexicalUnits.NAME;
1627        }
1628        do {
1629            nextChar();
1630        } while (current != -1 &&
1631                         XMLUtilities.isXMLNameCharacter((char)current));
1632        return LexicalUnits.NAME;
1633        }
1634
1635    case '"':
1636            attrDelimiter = '"';
1637        nextChar();
1638        if (current == -1) {
1639        throw createXMLException("unexpected.eof");
1640        }
1641        if (current != '"' && current != '&') {
1642        do {
1643            nextChar();
1644        } while (current != -1 && current != '"' && current != '&');
1645        }
1646        switch (current) {
1647        case '&':
1648        context = ATTRIBUTE_VALUE_CONTEXT;
1649                return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1650
1651        case '"':
1652        nextChar();
1653                return LexicalUnits.STRING;
1654
1655        default:
1656        throw createXMLException("invalid.character");
1657        }
1658
1659    case '\'':
1660            attrDelimiter = '\'';
1661        nextChar();
1662        if (current == -1) {
1663        throw createXMLException("unexpected.eof");
1664        }
1665        if (current != '\'' && current != '&') {
1666        do {
1667            nextChar();
1668        } while (current != -1 && current != '\'' && current != '&');
1669        }
1670        switch (current) {
1671        case '&':
1672        context = ATTRIBUTE_VALUE_CONTEXT;
1673                return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1674
1675        case '\'':
1676        nextChar();
1677                return LexicalUnits.STRING;
1678
1679        default:
1680        throw createXMLException("invalid.character");
1681        }
1682
1683    case '#':
1684        switch (nextChar()) {
1685        case 'R':
1686        return readIdentifier("EQUIRED",
1687                                      LexicalUnits.REQUIRED_IDENTIFIER,
1688                                      -1);
1689
1690        case 'I':
1691        return readIdentifier("MPLIED",
1692                                      LexicalUnits.IMPLIED_IDENTIFIER,
1693                                      -1);
1694        case 'F':
1695        return readIdentifier("IXED",
1696                                      LexicalUnits.FIXED_IDENTIFIER,
1697                                      -1);
1698        default:
1699        throw createXMLException("invalid.character");
1700        }
1701
1702    case '(':
1703        nextChar();
1704            context = ENUMERATION_CONTEXT;
1705        return LexicalUnits.LEFT_BRACE;
1706
1707    default:
1708        return readName(LexicalUnits.NAME);
1709    }
1710    }
1711
1712    /**
1713     * Returns the next lexical unit in the context of a notation.
1714     */

1715    protected int nextInNotation() throws IOException JavaDoc, XMLException {
1716    switch (current) {
1717    case 0x9:
1718    case 0xA:
1719    case 0xD:
1720    case 0x20:
1721        do {
1722        nextChar();
1723        } while (current != -1 &&
1724                     XMLUtilities.isXMLSpace((char)current));
1725        return LexicalUnits.S;
1726
1727    case '>':
1728        nextChar();
1729        context = DTD_DECLARATIONS_CONTEXT;
1730        return LexicalUnits.END_CHAR;
1731
1732    case '%':
1733        int t = readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1734        if (current != ';') {
1735        throw createXMLException("malformed.parameter.entity");
1736        }
1737        nextChar();
1738        return t;
1739    case 'S':
1740        return readIdentifier("YSTEM",
1741                  LexicalUnits.SYSTEM_IDENTIFIER,
1742                  LexicalUnits.NAME);
1743
1744    case 'P':
1745        return readIdentifier("UBLIC",
1746                  LexicalUnits.PUBLIC_IDENTIFIER,
1747                  LexicalUnits.NAME);
1748
1749    case '"':
1750            attrDelimiter = '"';
1751        return readString();
1752
1753    case '\'':
1754            attrDelimiter = '\'';
1755        return readString();
1756
1757    default:
1758        return readName(LexicalUnits.NAME);
1759    }
1760    }
1761
1762    /**
1763     * Returns the next lexical unit in the context of an entity.
1764     */

1765    protected int nextInEntity() throws IOException JavaDoc, XMLException {
1766    switch (current) {
1767    case 0x9:
1768    case 0xA:
1769    case 0xD:
1770    case 0x20:
1771        do {
1772        nextChar();
1773        } while (current != -1 &&
1774                     XMLUtilities.isXMLSpace((char)current));
1775        return LexicalUnits.S;
1776
1777    case '>':
1778        nextChar();
1779        context = DTD_DECLARATIONS_CONTEXT;
1780        return LexicalUnits.END_CHAR;
1781
1782    case '%':
1783        nextChar();
1784        return LexicalUnits.PERCENT;
1785
1786    case 'S':
1787        return readIdentifier("YSTEM",
1788                  LexicalUnits.SYSTEM_IDENTIFIER,
1789                  LexicalUnits.NAME);
1790
1791    case 'P':
1792        return readIdentifier("UBLIC",
1793                  LexicalUnits.PUBLIC_IDENTIFIER,
1794                  LexicalUnits.NAME);
1795
1796    case 'N':
1797        return readIdentifier("DATA",
1798                  LexicalUnits.NDATA_IDENTIFIER,
1799                  LexicalUnits.NAME);
1800
1801    case '"':
1802            attrDelimiter = '"';
1803        nextChar();
1804        if (current == -1) {
1805        throw createXMLException("unexpected.eof");
1806        }
1807            
1808        if (current != '"' && current != '&' && current != '%') {
1809        do {
1810            nextChar();
1811        } while (current != -1 &&
1812                         current != '"' &&
1813                         current != '&' &&
1814                         current != '%');
1815        }
1816        switch (current) {
1817        default:
1818        throw createXMLException("invalid.character");
1819
1820        case '&':
1821        case '%':
1822        context = ENTITY_VALUE_CONTEXT;
1823        break;
1824
1825        case '"':
1826        nextChar();
1827                return LexicalUnits.STRING;
1828        }
1829        return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1830
1831    case '\'':
1832            attrDelimiter = '\'';
1833        nextChar();
1834        if (current == -1) {
1835        throw createXMLException("unexpected.eof");
1836        }
1837            
1838        if (current != '\'' && current != '&' && current != '%') {
1839        do {
1840            nextChar();
1841        } while (current != -1 &&
1842                         current != '\'' &&
1843                         current != '&' &&
1844                         current != '%');
1845        }
1846        switch (current) {
1847        default:
1848        throw createXMLException("invalid.character");
1849
1850        case '&':
1851        case '%':
1852        context = ENTITY_VALUE_CONTEXT;
1853        break;
1854
1855        case '\'':
1856        nextChar();
1857                return LexicalUnits.STRING;
1858        }
1859        return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1860
1861    default:
1862        return readName(LexicalUnits.NAME);
1863    }
1864    }
1865
1866    /**
1867     * Returns the next lexical unit in the context of an entity value.
1868     */

1869    protected int nextInEntityValue() throws IOException JavaDoc, XMLException {
1870    switch (current) {
1871    case '&':
1872        return readReference();
1873
1874    case '%':
1875        int t = nextChar();
1876        readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1877        if (current != ';') {
1878        throw createXMLException("invalid.parameter.entity");
1879        }
1880        nextChar();
1881        return t;
1882
1883    default:
1884        while (current != -1 &&
1885                   current != attrDelimiter &&
1886                   current != '&' &&
1887                   current != '%') {
1888        nextChar();
1889        }
1890        switch (current) {
1891        case -1:
1892        throw createXMLException("unexpected.eof");
1893
1894        case '\'':
1895        case '"':
1896        nextChar();
1897        context = ENTITY_CONTEXT;
1898                return LexicalUnits.STRING;
1899        }
1900        return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1901    }
1902    }
1903    
1904    /**
1905     * Returns the next lexical unit in the context of a notation type.
1906     */

1907    protected int nextInNotationType() throws IOException JavaDoc, XMLException {
1908        switch (current) {
1909    case 0x9:
1910    case 0xA:
1911    case 0xD:
1912    case 0x20:
1913        do {
1914        nextChar();
1915        } while (current != -1 && XMLUtilities.isXMLSpace((char)current));
1916        return LexicalUnits.S;
1917
1918    case '|':
1919        nextChar();
1920        return LexicalUnits.PIPE;
1921
1922    case '(':
1923        nextChar();
1924        return LexicalUnits.LEFT_BRACE;
1925
1926    case ')':
1927        nextChar();
1928            context = ATTLIST_CONTEXT;
1929        return LexicalUnits.RIGHT_BRACE;
1930
1931        default:
1932            return readName(LexicalUnits.NAME);
1933        }
1934    }
1935
1936    /**
1937     * Returns the next lexical unit in the context of an enumeration.
1938     */

1939    protected int nextInEnumeration() throws IOException JavaDoc, XMLException {
1940        switch (current) {
1941    case 0x9:
1942    case 0xA:
1943    case 0xD:
1944    case 0x20:
1945        do {
1946        nextChar();
1947        } while (current != -1 && XMLUtilities.isXMLSpace((char)current));
1948        return LexicalUnits.S;
1949
1950    case '|':
1951        nextChar();
1952        return LexicalUnits.PIPE;
1953
1954    case ')':
1955        nextChar();
1956            context = ATTLIST_CONTEXT;
1957        return LexicalUnits.RIGHT_BRACE;
1958
1959        default:
1960            return readNmtoken();
1961        }
1962    }
1963
1964
1965    /**
1966     * Reads an entity or character reference. The current character
1967     * must be '&amp;'.
1968     * @return type.
1969     */

1970    protected int readReference() throws IOException JavaDoc, XMLException {
1971    nextChar();
1972    if (current == '#') {
1973        nextChar();
1974        int i = 0;
1975        switch (current) {
1976        case 'x':
1977        do {
1978            i++;
1979            nextChar();
1980        } while ((current >= '0' && current <= '9') ||
1981             (current >= 'a' && current <= 'f') ||
1982             (current >= 'A' && current <= 'F'));
1983        break;
1984
1985        default:
1986        do {
1987            i++;
1988            nextChar();
1989        } while (current >= '0' && current <= '9');
1990        break;
1991
1992        case -1:
1993        throw createXMLException("unexpected.eof");
1994        }
1995        if (i == 1 || current != ';') {
1996        throw createXMLException("character.reference");
1997        }
1998        nextChar();
1999        return LexicalUnits.CHARACTER_REFERENCE;
2000    } else {
2001        int t = readName(LexicalUnits.ENTITY_REFERENCE);
2002        if (current != ';') {
2003        throw createXMLException("character.reference");
2004        }
2005        nextChar();
2006        return t;
2007    }
2008    }
2009
2010    /**
2011     * Reads a parameter entity reference. The current character must be '%'.
2012     * @return type.
2013     */

2014    protected int readPEReference() throws IOException JavaDoc, XMLException {
2015    nextChar();
2016    if (current == -1) {
2017        throw createXMLException("unexpected.eof");
2018    }
2019    if (!XMLUtilities.isXMLNameFirstCharacter((char)current)) {
2020        throw createXMLException("invalid.parameter.entity");
2021    }
2022    do {
2023        nextChar();
2024    } while (current != -1 &&
2025                 XMLUtilities.isXMLNameCharacter((char)current));
2026    if (current != ';') {
2027        throw createXMLException("invalid.parameter.entity");
2028    }
2029    nextChar();
2030    return LexicalUnits.PARAMETER_ENTITY_REFERENCE;
2031    }
2032
2033    /**
2034     * Reads a Nmtoken. The current character must be the first character.
2035     * @return LexicalUnits.NMTOKEN.
2036     */

2037    protected int readNmtoken() throws IOException JavaDoc, XMLException {
2038    if (current == -1) {
2039        throw createXMLException("unexpected.eof");
2040    }
2041    while (XMLUtilities.isXMLNameCharacter((char)current)) {
2042        nextChar();
2043    }
2044    return LexicalUnits.NMTOKEN;
2045    }
2046
2047    /**
2048     * Sets the value of the current char to the next character or -1 if the
2049     * end of stream has been reached.
2050     */

2051    protected int nextChar() throws IOException JavaDoc {
2052        current = reader.read();
2053
2054        if (current == -1) {
2055            return current;
2056        }
2057
2058        if (position == buffer.length) {
2059            char[] t = new char[position * 3 / 2];
2060            for (int i = 0; i < position; i++) {
2061                t[i] = buffer[i];
2062            }
2063            buffer = t;
2064        }
2065
2066        return buffer[position++] = (char)current;
2067    }
2068
2069    /**
2070     * Returns an XMLException initialized with the given message key.
2071     */

2072    protected XMLException createXMLException(String JavaDoc message) {
2073        String JavaDoc m;
2074        try {
2075            m = formatMessage(message,
2076                              new Object JavaDoc[] {
2077                                  new Integer JavaDoc(reader.getLine()),
2078                                  new Integer JavaDoc(reader.getColumn())
2079                              });
2080        } catch (MissingResourceException JavaDoc e) {
2081            m = message;
2082        }
2083        return new XMLException(m);
2084    }
2085
2086}
2087
Popular Tags