KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > xmlpull > mxp1 > MXParserNonValidating


1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- //------100-columns-wide------>|*/
2 /*
3  * Copyright (c) 2002 Extreme! Lab, Indiana University. All rights reserved.
4  *
5  * This software is open source. See the bottom of this file for the licence.
6  *
7  * $Id: MXParserNonValidating.java,v 1.2 2002/08/23 19:50:54 aslom Exp $
8  */

9
10 package org.xmlpull.mxp1;
11
12 import java.io.IOException JavaDoc;
13 import org.xmlpull.v1.XmlPullParserException;
14
15 /**
16  * Extend MXP parser to be full non validating XML 1.0 parser
17  * (added internal DTD parsing and support for full XML 1.0 (or 1.1) character classes).
18  *
19  * @author <a HREF="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a>
20  */

21
22 public class MXParserNonValidating extends MXParserCachingStrings
23 {
24     private boolean processDocDecl;
25
26     public MXParserNonValidating() {
27         super();
28     }
29
30     /**
31      * This allows to change processing DOCDECL (controls if parser is non-validating).
32      */

33     public void setFeature(String JavaDoc name,
34                            boolean state) throws XmlPullParserException
35     {
36         if(FEATURE_PROCESS_DOCDECL.equals(name)) {
37             if(eventType != START_DOCUMENT) throw new XmlPullParserException(
38                     "process DOCDECL feature can only be changed before parsing", this, null);
39             processDocDecl = state;
40             if(state == false) {
41                 //
42
}
43         } else {
44             super.setFeature(name, state);
45         }
46     }
47
48     public boolean getFeature(String JavaDoc name)
49     {
50         if(FEATURE_PROCESS_DOCDECL.equals(name)) {
51             return processDocDecl;
52         } else {
53             return super.getFeature(name);
54         }
55     }
56
57
58     // will need to overwrite more() and processEntityRef ...
59
protected char more() throws IOException JavaDoc, XmlPullParserException {
60         return super.more();
61     }
62
63     protected char[] lookuEntityReplacement(int entitNameLen)
64         throws XmlPullParserException, IOException JavaDoc
65
66     {
67         if(!allStringsInterned) {
68             int hash = fastHash(buf, posStart, posEnd - posStart);
69             LOOP:
70             for (int i = entityEnd - 1; i >= 0; --i)
71             {
72                 if(hash == entityNameHash[ i ] && entitNameLen == entityNameBuf[ i ].length) {
73                     char[] entityBuf = entityNameBuf[ i ];
74                     for (int j = 0; j < entitNameLen; j++)
75                     {
76                         if(buf[posStart + j] != entityBuf[j]) continue LOOP;
77                     }
78                     if(tokenize) text = entityReplacement[ i ];
79                     return entityReplacementBuf[ i ];
80                 }
81             }
82         } else {
83             entityRefName = newString(buf, posStart, posEnd - posStart);
84             for (int i = entityEnd - 1; i >= 0; --i)
85             {
86                 // take advantage that interning for newStirng is enforced
87
if(entityRefName == entityName[ i ]) {
88                     if(tokenize) text = entityReplacement[ i ];
89                     return entityReplacementBuf[ i ];
90                 }
91             }
92         }
93         return null;
94     }
95
96
97     protected void parseDocdecl()
98         throws XmlPullParserException, IOException JavaDoc
99     {
100         //make sure that tokenize flag is disabled temporarily!!!!
101
boolean oldTokenize = tokenize;
102         try {
103             //ASSUMPTION: seen <!D
104
char ch = more();
105             if(ch != 'O') throw new XmlPullParserException(
106                     "expected <!DOCTYPE", this, null);
107             ch = more();
108             if(ch != 'C') throw new XmlPullParserException(
109                     "expected <!DOCTYPE", this, null);
110             ch = more();
111             if(ch != 'T') throw new XmlPullParserException(
112                     "expected <!DOCTYPE", this, null);
113             ch = more();
114             if(ch != 'Y') throw new XmlPullParserException(
115                     "expected <!DOCTYPE", this, null);
116             ch = more();
117             if(ch != 'P') throw new XmlPullParserException(
118                     "expected <!DOCTYPE", this, null);
119             ch = more();
120             if(ch != 'E') throw new XmlPullParserException(
121                     "expected <!DOCTYPE", this, null);
122             posStart = pos;
123             // do simple and crude scanning for end of doctype
124

125             // [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('['
126
// (markupdecl | DeclSep)* ']' S?)? '>'
127
ch = requireNextS();
128             int nameStart = pos;
129             ch = readName(ch);
130             int nameEnd = pos;
131             ch = skipS(ch);
132             // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
133
if(ch == 'S' || ch == 'P') {
134                 ch = processExternalId(ch);
135                 ch = skipS(ch);
136             }
137             if(ch == '[') {
138                 processInternalSubset();
139             }
140             ch = skipS(ch);
141             if(ch != '>') {
142                 throw new XmlPullParserException(
143                     "expected > to finish <[DOCTYPE but got "+printable(ch), this, null);
144             }
145             posEnd = pos - 1;
146         } finally {
147             tokenize = oldTokenize;
148         }
149     }
150     protected char processExternalId(char ch)
151         throws XmlPullParserException, IOException JavaDoc
152     {
153         // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
154
// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
155
// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
156
// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
157

158         //TODO
159

160         return ch;
161     }
162
163     protected void processInternalSubset()
164         throws XmlPullParserException, IOException JavaDoc
165     {
166         // [28] ... (markupdecl | DeclSep)* ']' // [WFC: External Subset]
167
// [28a] DeclSep ::= PEReference | S // [WFC: PE Between Declarations]
168

169         // [69] PEReference ::= '%' Name ';' //[WFC: No Recursion] [WFC: In DTD]
170
while(true) {
171             char ch = more(); // firs ttime called it will skip initial "["
172
if(ch == ']') break;
173             if(ch == '%') {
174                 processPEReference();
175             } else if(isS(ch)) {
176                 ch = skipS(ch);
177             } else {
178                 processMarkupDecl(ch);
179             }
180         }
181     }
182
183     protected void processPEReference()
184         throws XmlPullParserException, IOException JavaDoc
185     {
186         //TODO
187
}
188     protected void processMarkupDecl(char ch)
189         throws XmlPullParserException, IOException JavaDoc
190     {
191         // [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
192
// [WFC: PEs in Internal Subset]
193

194
195         //BIG SWITCH statement
196
if(ch != '<') {
197             throw new XmlPullParserException("expected < for markupdecl in DTD not "+printable(ch),
198                                              this, null);
199         }
200         ch = more();
201         if(ch == '?') {
202             parsePI();
203         } else if(ch == '!') {
204             ch = more();
205             if(ch == '-') {
206                 // note: if(tokenize == false) posStart/End is NOT changed!!!!
207
parseComment();
208             } else {
209                 ch = more();
210                 if(ch == 'A') {
211                     processAttlistDecl(ch); //A-TTLIST
212
} else if(ch == 'E') {
213                     ch = more();
214                     if(ch == 'L') {
215                         processElementDecl(ch); //EL-EMENT
216
} else if(ch == 'N') {
217                         processEntityDecl(ch); // EN-TITY
218
} else {
219                         throw new XmlPullParserException(
220                             "expected ELEMENT or ENTITY after <! in DTD not "+printable(ch),
221                             this, null);
222                     }
223                 } else if(ch == 'N') {
224                     processNotationDecl(ch); //N-OTATION
225
} else {
226                     throw new XmlPullParserException(
227                         "expected markupdecl after <! in DTD not "+printable(ch),this, null);
228                 }
229             }
230
231         } else {
232             throw new XmlPullParserException("expected markupdecl in DTD not "+printable(ch),
233                                              this, null);
234         }
235     }
236
237     protected void processElementDecl(char ch)
238         throws XmlPullParserException, IOException JavaDoc
239     {
240         //[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
241
//???? [VC: Unique Element Type Declaration]
242
// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
243
// [47] children ::= (choice | seq) ('?' | '*' | '+')?
244
// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
245
// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
246
// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
247
// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
248
// | '(' S? '#PCDATA' S? ')'
249

250         //assert ch == 'L'
251
ch = requireNextS();
252         readName(ch);
253         ch = requireNextS();
254         // readContentSpec(ch);
255
}
256
257     protected void processAttlistDecl(char ch)
258         throws XmlPullParserException, IOException JavaDoc
259     {
260         // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
261
// [53] AttDef ::= S Name S AttType S DefaultDecl
262
// [54] AttType ::= StringType | TokenizedType | EnumeratedType
263
// [55] StringType ::= 'CDATA'
264
// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN'
265
// | 'NMTOKENS'
266
// [57] EnumeratedType ::= NotationType | Enumeration
267
// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
268
// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
269
// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
270
// [WFC: No < in Attribute Values]
271

272         //assert ch == 'A'
273

274     }
275
276
277     protected void processEntityDecl(char ch)
278         throws XmlPullParserException, IOException JavaDoc
279     {
280
281         // [70] EntityDecl ::= GEDecl | PEDecl
282
// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
283
// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
284
// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
285
// [74] PEDef ::= EntityValue | ExternalID
286
// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
287

288         //[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
289
// | "'" ([^%&'] | PEReference | Reference)* "'"
290

291         //assert ch == 'N'
292

293     }
294
295     protected void processNotationDecl(char ch)
296         throws XmlPullParserException, IOException JavaDoc
297     {
298
299         // [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
300
// [83] PublicID ::= 'PUBLIC' S PubidLiteral
301

302         //assert ch == 'N'
303
}
304
305
306
307     protected char readName(char ch)
308         throws XmlPullParserException, IOException JavaDoc
309     {
310         if(isNameStartChar(ch)) {
311             throw new XmlPullParserException(
312                 "XML name must start with name start character not "+printable(ch), this, null);
313         }
314         while(isNameChar(ch)) {
315             ch = more();
316         }
317         return ch;
318     }
319 }
320
321
322 /*
323  * Indiana University Extreme! Lab Software License, Version 1.1.1
324  *
325  *
326  * Copyright (c) 2002 Extreme! Lab, Indiana University. All rights
327  * reserved.
328  *
329  * Redistribution and use in source and binary forms, with or without
330  * modification, are permitted provided that the following conditions
331  * are met:
332  *
333  * 1. Redistributions of source code must retain the above copyright
334  * notice, this list of conditions and the following disclaimer.
335  *
336  * 2. Redistributions in binary form must reproduce the above copyright
337  * notice, this list of conditions and the following disclaimer in
338  * the documentation and/or other materials provided with the
339  * distribution.
340  *
341  * 3. The end-user documentation included with the redistribution,
342  * if any, must include the following acknowledgment:
343  * "This product includes software developed by the Indiana
344  * University Extreme! Lab (http://www.extreme.indiana.edu/)."
345  * Alternately, this acknowledgment may appear in the software itself,
346  * if and wherever such third-party acknowledgments normally appear.
347  *
348  * 4. The names "Indiana University" and "Indiana University
349  * Extreme! Lab" must not be used to endorse or promote products
350  * derived from this software without prior written permission. For
351  * written permission, please contact http://www.extreme.indiana.edu/.
352  *
353  * 5. Products derived from this software may not use "Indiana
354  * University" name nor may "Indiana University" appear in their name,
355  * without prior written permission of the Indiana University.
356  *
357  *
358  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
359  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
360  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
361  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
362  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
363  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
364  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
365  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
366  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
367  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
368  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
369  * SUCH DAMAGE.
370  */

371
372
Popular Tags