KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > xmlpull > v1 > XmlPullParser


1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- //------100-columns-wide------>|*/
2 // for license please see accompanying LICENSE.txt file (available also at http://www.xmlpull.org/)
3

4 package org.xmlpull.v1;
5
6 import java.io.InputStream JavaDoc;
7 import java.io.IOException JavaDoc;
8 import java.io.Reader JavaDoc;
9
10 /**
11  * XML Pull Parser is an interface that defines parsing functionlity provided
12  * in <a HREF="http://www.xmlpull.org/">XMLPULL V1 API</a> (visit this website to
13  * learn more about API and its implementations).
14  *
15  * <p>There are following different
16  * kinds of parser depending on which features are set:<ul>
17  * <li><b>non-validating</b> parser as defined in XML 1.0 spec when
18  * FEATURE_PROCESS_DOCDECL is set to true
19  * <li><b>validating parser</b> as defined in XML 1.0 spec when
20  * FEATURE_VALIDATION is true (and that implies that FEATURE_PROCESS_DOCDECL is true)
21  * <li>when FEATURE_PROCESS_DOCDECL is false (this is default and
22  * if different value is required necessary must be changed before parsing is started)
23  * then parser behaves like XML 1.0 compliant non-validating parser under condition that
24  * <em>no DOCDECL is present</em> in XML documents
25  * (internal entites can still be defined with defineEntityReplacementText()).
26  * This mode of operation is intened <b>for operation in constrained environments</b> such as J2ME.
27  * </ul>
28  *
29  *
30  * <p>There are two key methods: next() and nextToken(). While next() provides
31  * access to high level parsing events, nextToken() allows access to lower
32  * level tokens.
33  *
34  * <p>The current event state of the parser
35  * can be determined by calling the
36  * <a HREF="#getEventType()">getEventType()</a> method.
37  * Initially, the parser is in the <a HREF="#START_DOCUMENT">START_DOCUMENT</a>
38  * state.
39  *
40  * <p>The method <a HREF="#next()">next()</a> advances the parser to the
41  * next event. The int value returned from next determines the current parser
42  * state and is identical to the value returned from following calls to
43  * getEventType ().
44  *
45  * <p>Th following event types are seen by next()<dl>
46  * <dt><a HREF="#START_TAG">START_TAG</a><dd> An XML start tag was read.
47  * <dt><a HREF="#TEXT">TEXT</a><dd> Text content was read;
48  * the text content can be retreived using the getText() method.
49  * (when in validating mode next() will not report ignorable whitespaces, use nextToken() instead)
50  * <dt><a HREF="#END_TAG">END_TAG</a><dd> An end tag was read
51  * <dt><a HREF="#END_DOCUMENT">END_DOCUMENT</a><dd> No more events are available
52  * </dl>
53  *
54  * <p>after first next() or nextToken() (or any other next*() method)
55  * is called user application can obtain
56  * XML version, standalone and encoding from XML declaration
57  * in following ways:<ul>
58  * <li><b>version</b>:
59  * getProperty(&quot;<a HREF="http://xmlpull.org/v1/doc/properties.html#xmldecl-version">http://xmlpull.org/v1/doc/properties.html#xmldecl-version</a>&quot;)
60  * returns String ("1.0") or null if XMLDecl was not read or if property is not supported
61  * <li><b>standalone</b>:
62  * getProperty(&quot;<a HREF="http://xmlpull.org/v1/doc/features.html#xmldecl-standalone">http://xmlpull.org/v1/doc/features.html#xmldecl-standalone&quot;)
63  * returns Boolean: null if there was no standalone declaration
64  * or if property is not supported
65  * otherwise returns Boolean(true) if standalon="yes" and Boolean(false) when standalone="no"
66  * <li><b>encoding</b>: obtained from getInputEncoding()
67  * null if stream had unknown encoding (not set in setInputStream)
68  * and it was not declared in XMLDecl
69  * </ul>
70  *
71  * A minimal example for using this API may look as follows:
72  * <pre>
73  * import java.io.IOException;
74  * import java.io.StringReader;
75  *
76  * import org.xmlpull.v1.XmlPullParser;
77  * import org.xmlpull.v1.<a HREF="XmlPullParserException.html">XmlPullParserException.html</a>;
78  * import org.xmlpull.v1.<a HREF="XmlPullParserFactory.html">XmlPullParserFactory</a>;
79  *
80  * public class SimpleXmlPullApp
81  * {
82  *
83  * public static void main (String args[])
84  * throws XmlPullParserException, IOException
85  * {
86  * XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
87  * factory.setNamespaceAware(true);
88  * XmlPullParser xpp = factory.newPullParser();
89  *
90  * xpp.<a HREF="#setInput">setInput</a>( new StringReader ( "&lt;foo>Hello World!&lt;/foo>" ) );
91  * int eventType = xpp.getEventType();
92  * while (eventType != xpp.END_DOCUMENT) {
93  * if(eventType == xpp.START_DOCUMENT) {
94  * System.out.println("Start document");
95  * } else if(eventType == xpp.END_DOCUMENT) {
96  * System.out.println("End document");
97  * } else if(eventType == xpp.START_TAG) {
98  * System.out.println("Start tag "+xpp.<a HREF="#getName()">getName()</a>);
99  * } else if(eventType == xpp.END_TAG) {
100  * System.out.println("End tag "+xpp.getName());
101  * } else if(eventType == xpp.TEXT) {
102  * System.out.println("Text "+xpp.<a HREF="#getText()">getText()</a>);
103  * }
104  * eventType = xpp.next();
105  * }
106  * }
107  * }
108  * </pre>
109  *
110  * <p>The above example will generate the following output:
111  * <pre>
112  * Start document
113  * Start tag foo
114  * Text Hello World!
115  * End tag foo
116  * </pre>
117  *
118  * <p>For more details on API usage, please refer to the
119  * quick Introduction available at <a HREF="http://www.xmlpull.org">http://www.xmlpull.org</a>
120  *
121  * @see XmlPullParserFactory
122  * @see #defineEntityReplacementText
123  * @see #getName
124  * @see #getNamespace
125  * @see #getText
126  * @see #next
127  * @see #nextToken
128  * @see #setInput
129  * @see #FEATURE_PROCESS_DOCDECL
130  * @see #FEATURE_VALIDATION
131  * @see #START_DOCUMENT
132  * @see #START_TAG
133  * @see #TEXT
134  * @see #END_TAG
135  * @see #END_DOCUMENT
136  *
137  * @author <a HREF="http://www-ai.cs.uni-dortmund.de/PERSONAL/haustein.html">Stefan Haustein</a>
138  * @author <a HREF="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a>
139  */

140
141 public interface XmlPullParser {
142
143     /** This constant represents the default namespace (empty string "") */
144     public static final String JavaDoc NO_NAMESPACE = "";
145
146     // ----------------------------------------------------------------------------
147
// EVENT TYPES as reported by next()
148

149     /**
150      * Signalize that parser is at the very beginning of the document
151      * and nothing was read yet.
152      * This event type can only be observed by calling getEvent()
153      * before the first call to next(), nextToken, or nextTag()</a>).
154      *
155      * @see #next
156      * @see #nextToken
157      */

158     public final static int START_DOCUMENT = 0;
159
160     /**
161      * Logical end of the xml document. Returned from getEventType, next()
162      * and nextToken()
163      * when the end of the input document has been reached.
164      * <p><strong>NOTE:</strong> calling again
165      * <a HREF="#next()">next()</a> or <a HREF="#nextToken()">nextToken()</a>
166      * will result in exception being thrown.
167      *
168      * @see #next
169      * @see #nextToken
170      */

171     public final static int END_DOCUMENT = 1;
172
173     /**
174      * Returned from getEventType(),
175      * <a HREF="#next()">next()</a>, <a HREF="#nextToken()">nextToken()</a> when
176      * a start tag was read.
177      * The name of start tag is available from getName(), its namespace and prefix are
178      * available from getNamespace() and getPrefix()
179      * if <a HREF='#FEATURE_PROCESS_NAMESPACES'>namespaces are enabled</a>.
180      * See getAttribute* methods to retrieve element attributes.
181      * See getNamespace* methods to retrieve newly declared namespaces.
182      *
183      * @see #next
184      * @see #nextToken
185      * @see #getName
186      * @see #getPrefix
187      * @see #getNamespace
188      * @see #getAttributeCount
189      * @see #getDepth
190      * @see #getNamespaceCount
191      * @see #getNamespace
192      * @see #FEATURE_PROCESS_NAMESPACES
193      */

194     public final static int START_TAG = 2;
195
196     /**
197      * Returned from getEventType(), <a HREF="#next()">next()</a>, or
198      * <a HREF="#nextToken()">nextToken()</a> when an end tag was read.
199      * The name of start tag is available from getName(), its
200      * namespace and prefix are
201      * available from getNamespace() and getPrefix().
202      *
203      * @see #next
204      * @see #nextToken
205      * @see #getName
206      * @see #getPrefix
207      * @see #getNamespace
208      * @see #FEATURE_PROCESS_NAMESPACES
209      */

210     public final static int END_TAG = 3;
211
212
213     /**
214      * Character data was read and will is available by calling getText().
215      * <p><strong>Please note:</strong> <a HREF="#next()">next()</a> will
216      * accumulate multiple
217      * events into one TEXT event, skipping IGNORABLE_WHITESPACE,
218      * PROCESSING_INSTRUCTION and COMMENT events,
219      * In contrast, <a HREF="#nextToken()">nextToken()</a> will stop reading
220      * text when any other event is observed.
221      * Also, when the state was reached by calling next(), the text value will
222      * be normalized, whereas getText() will
223      * return unnormalized content in the case of nextToken(). This allows
224      * an exact roundtrip without chnanging line ends when examining low
225      * level events, whereas for high level applications the text is
226      * normalized apropriately.
227      *
228      * @see #next
229      * @see #nextToken
230      * @see #getText
231      */

232     public final static int TEXT = 4;
233
234     // ----------------------------------------------------------------------------
235
// additional events exposed by lower level nextToken()
236

237     /**
238      * A CDATA sections was just read;
239      * this token is available only from calls to <a HREF="#nextToken()">nextToken()</a>.
240      * A call to next() will accumulate various text events into a single event
241      * of type TEXT. The text contained in the CDATA section is available
242      * by callling getText().
243      *
244      * @see #nextToken
245      * @see #getText
246      */

247     public final static int CDSECT = 5;
248
249     /**
250      * An entity reference was just read;
251      * this token is available from <a HREF="#nextToken()">nextToken()</a>
252      * only. The entity name is available by calling getName(). If available,
253      * the replacement text can be obtained by calling getTextt(); otherwise,
254      * the user is responsibile for resolving the entity reference.
255      * This event type is never returned from next(); next() will
256      * accumulate the replacement text and other text
257      * events to a single TEXT event.
258      *
259      * @see #nextToken
260      * @see #getText
261      */

262     public final static int ENTITY_REF = 6;
263
264     /**
265      * Ignorable whitespace was just read.
266      * This token is available only from <a HREF="#nextToken()">nextToken()</a>).
267      * For non-validating
268      * parsers, this event is only reported by nextToken() when outside
269      * the root element.
270      * Validating parsers may be able to detect ignorable whitespace at
271      * other locations.
272      * The ignorable whitespace string is available by calling getText()
273      *
274      * <p><strong>NOTE:</strong> this is different from calling the
275      * isWhitespace() method, since text content
276      * may be whitespace but not ignorable.
277      *
278      * Ignorable whitespace is skipped by next() automatically; this event
279      * type is never returned from next().
280      *
281      * @see #nextToken
282      * @see #getText
283      */

284     public static final int IGNORABLE_WHITESPACE = 7;
285
286     /**
287      * An XML processing instruction declaration was just read. This
288      * event type is available only via <a HREF="#nextToken()">nextToken()</a>.
289      * getText() will return text that is inside the processing instruction.
290      * Calls to next() will skip processing instructions automatically.
291      * @see #nextToken
292      * @see #getText
293      */

294     public static final int PROCESSING_INSTRUCTION = 8;
295
296     /**
297      * An XML comment was just read. This event type is this token is
298      * available via <a HREF="#nextToken()">nextToken()</a> only;
299      * calls to next() will skip comments automatically.
300      * The content of the comment can be accessed using the getText()
301      * method.
302      *
303      * @see #nextToken
304      * @see #getText
305      */

306     public static final int COMMENT = 9;
307
308     /**
309      * An XML document type declaration was just read. This token is
310      * available from <a HREF="#nextToken()">nextToken()</a> only.
311      * The unparsed text inside the doctype is available via
312      * the getText() method.
313      *
314      * @see #nextToken
315      * @see #getText
316      */

317     public static final int DOCDECL = 10;
318
319     /**
320      * This array can be used to convert the event type integer constants
321      * such as START_TAG or TEXT to
322      * to a string. For example, the value of TYPES[START_TAG] is
323      * the string "START_TAG".
324      *
325      * This array is intended for diagnostic output only. Relying
326      * on the contents of the array may be dangerous since malicous
327      * applications may alter the array, although it is final, due
328      * to limitations of the Java language.
329      */

330     public static final String JavaDoc [] TYPES = {
331         "START_DOCUMENT",
332             "END_DOCUMENT",
333             "START_TAG",
334             "END_TAG",
335             "TEXT",
336             "CDSECT",
337             "ENTITY_REF",
338             "IGNORABLE_WHITESPACE",
339             "PROCESSING_INSTRUCTION",
340             "COMMENT",
341             "DOCDECL"
342     };
343
344
345     // ----------------------------------------------------------------------------
346
// namespace related features
347

348     /**
349      * This feature determines whether the parser processes
350      * namespaces. As for all features, the default value is false.
351      * <p><strong>NOTE:</strong> The value can not be changed during
352      * parsing an must be set before parsing.
353      *
354      * @see #getFeature
355      * @see #setFeature
356      */

357     public static final String JavaDoc FEATURE_PROCESS_NAMESPACES =
358         "http://xmlpull.org/v1/doc/features.html#process-namespaces";
359
360     /**
361      * This feature determines whether namespace attributes are
362      * exposed via the attribute access methods. Like all features,
363      * the default value is false. This feature cannot be changed
364      * during parsing.
365      *
366      * @see #getFeature
367      * @see #setFeature
368      */

369     public static final String JavaDoc FEATURE_REPORT_NAMESPACE_ATTRIBUTES =
370         "http://xmlpull.org/v1/doc/features.html#report-namespace-prefixes";
371
372     /**
373      * This feature determines whether the document declaration
374      * is processed. If set to false,
375      * the DOCDECL event type is reported by nextToken()
376      * and ignored by next().
377      *
378      * If this featue is activated, then the document declaration
379      * must be processed by the parser.
380      *
381      * <p><strong>Please note:</strong> If the document type declaration
382      * was ignored, entity references may cause exceptions
383      * later in the parsing process.
384      * The default value of this feature is false. It cannot be changed
385      * during parsing.
386      *
387      * @see #getFeature
388      * @see #setFeature
389      */

390     public static final String JavaDoc FEATURE_PROCESS_DOCDECL =
391         "http://xmlpull.org/v1/doc/features.html#process-docdecl";
392
393     /**
394      * If this feature is activated, all validation errors as
395      * defined in the XML 1.0 sepcification are reported.
396      * This implies that FEATURE_PROCESS_DOCDECL is true and both, the
397      * internal and external document type declaration will be processed.
398      * <p><strong>Please Note:</strong> This feature can not be changed
399      * during parsing. The default value is false.
400      *
401      * @see #getFeature
402      * @see #setFeature
403      */

404     public static final String JavaDoc FEATURE_VALIDATION =
405         "http://xmlpull.org/v1/doc/features.html#validation";
406
407     /**
408      * Use this call to change the general behaviour of the parser,
409      * such as namespace processing or doctype declaration handling.
410      * This method must be called before the first call to next or
411      * nextToken. Otherwise, an exception is thrown.
412      * <p>Example: call setFeature(FEATURE_PROCESS_NAMESPACES, true) in order
413      * to switch on namespace processing. The initial settings correspond
414      * to the properties requested from the XML Pull Parser factory.
415      * If none were requested, all feautures are deactivated by default.
416      *
417      * @exception XmlPullParserException If the feature is not supported or can not be set
418      * @exception IllegalArgumentException If the feature string is null
419      */

420     public void setFeature(String JavaDoc name,
421                            boolean state) throws XmlPullParserException;
422
423     /**
424      * Returns the current value of the given feature.
425      * <p><strong>Please note:</strong> unknown features are
426      * <strong>always</strong> returned as false.
427      *
428      * @param name The name of feature to be retrieved.
429      * @return The value of the feature.
430      * @exception IllegalArgumentException if feature string is null
431      */

432
433     public boolean getFeature(String JavaDoc name);
434
435     /**
436      * Set the value of a property.
437      *
438      * The property name is any fully-qualified URI.
439      */

440     public void setProperty(String JavaDoc name,
441                             Object JavaDoc value) throws XmlPullParserException;
442
443     /**
444      * Look up the value of a property.
445      *
446      * The property name is any fully-qualified URI. I
447      * <p><strong>NOTE:</strong> unknown properties are <string>always</strong>
448      * returned as null
449      *
450      * @param name The name of property to be retrieved.
451      * @return The value of named property.
452      */

453     public Object JavaDoc getProperty(String JavaDoc name);
454
455
456     /**
457      * Set the input source for parser to the given reader and
458      * resets the parser. The event type is set to the initial value
459      * START_DOCUMENT.
460      * Setting the reader to null will just stop parsing and
461      * reset parser state,
462      * allowing the parser to free internal resources
463      * such as parsing buffers.
464      */

465     public void setInput(Reader JavaDoc in) throws XmlPullParserException;
466
467
468     /**
469      * Sets the input stream the parser is going to process.
470      * This call resets the parser state and sets the event type
471      * to the initial value START_DOCUMENT.
472      *
473      * <p><strong>NOTE:</strong> If an input encoding string is passed,
474      * it MUST be used. Otherwise,
475      * if inputEncoding is null, the parser SHOULD try to determine
476      * input encoding following XML 1.0 specification (see below).
477      * If encoding detection is supported then following feature
478      * <a HREF="http://xmlpull.org/v1/doc/features.html#detect-encoding">http://xmlpull.org/v1/doc/features.html#detect-encoding</a>
479      * MUST be true amd otherwise it must be false
480      *
481      * @param inputStream contains a raw byte input stream of possibly
482      * unknown encoding (when inputEncoding is null).
483      *
484      * @param inputEncoding if not null it MUST be used as encoding for inputStream
485      */

486     public void setInput(InputStream JavaDoc inputStream, String JavaDoc inputEncoding)
487         throws XmlPullParserException;
488
489     /**
490      * Returns the input encoding if known, null otherwise.
491      * If setInput(InputStream, inputEncoding) was called with an inputEncoding
492      * value other than null, this value must be returned
493      * from this method. Otherwise, if inputEncoding is null and
494      * the parser suppports the encoding detection feature
495      * (http://xmlpull.org/v1/doc/features.html#detect-encoding),
496      * it must return the detected encoding.
497      * If setInput(Reader) was called, null is returned.
498      */

499     public String JavaDoc getInputEncoding();
500
501     /**
502      * Set new value for entity replacement text as defined in
503      * <a HREF="http://www.w3.org/TR/REC-xml#intern-replacement">XML 1.0 Section 4.5
504      * Construction of Internal Entity Replacement Text</a>.
505      * If FEATURE_PROCESS_DOCDECL or FEATURE_VALIDATION are set, calling this
506      * function will result in an exception -- when processing of DOCDECL is
507      * enabled, there is no need to the entity replacement text manually.
508      *
509      * <p>The motivation for this function is to allow very small
510      * implementations of XMLPULL that will work in J2ME environments.
511      * Though these implementations may not be able to process the document type
512      * declaration, they still can work with known DTDs by using this function.
513      *
514      * <p><b>Please notes:</b> The given value is used literally as replacement text
515      * and it corresponds to declaring entity in DTD that has all special characters
516      * escaped: left angle bracket is replaced with &amp;lt;, ampersnad with &amp;amp;
517      * and so on.
518      *
519      * <p><b>Note:</b> The given value is the literal replacement text and must not
520      * contain any other entity reference (if it contains any entity reference
521      * there will be no further replacement).
522      *
523      * <p><b>Note:</b> The list of pre-defined entity names will
524      * always contain standard XML entities such as
525      * amp (&amp;amp;), lt (&amp;lt;), gt (&amp;gt;), quot (&amp;quot;), and apos (&amp;apos;).
526      * Those cannot be redefined by this method!
527      *
528      * @see #setInput
529      * @see #FEATURE_PROCESS_DOCDECL
530      * @see #FEATURE_VALIDATION
531      */

532     public void defineEntityReplacementText( String JavaDoc entityName,
533                                             String JavaDoc replacementText ) throws XmlPullParserException;
534
535     /**
536      * Returns the numbers of elements in the namespace stack for the given
537      * depth.
538      * If namespaces are not enabled, 0 is returned.
539
540      * <p><b>NOTE:</b> when parser is on END_TAG then it is allowed to call
541      * this function with getDepth()+1 argument to retrieve position of namespace
542      * prefixes and URIs that were declared on corresponding START_TAG.
543      *
544      * @see #getNamespacePrefix
545      * @see #getNamespaceUri
546      * @see #getNamespace()
547      * @see #getNamespace(String)
548      */

549     public int getNamespaceCount(int depth) throws XmlPullParserException;
550
551     /**
552      * Returns the namespace prefixe for the given position
553      * in the namespace stack.
554      * If the given index is out of range, an exception is thrown.
555      * <p><b>Please note:</b> when the parser is on an END_TAG,
556      * namespace prefixes that were declared
557      * in the corresponding START_TAG are still accessible
558      * although they are no longer in scope.
559      */

560     public String JavaDoc getNamespacePrefix(int pos) throws XmlPullParserException;
561
562     /**
563      * Returns the namespace URI for the given position in the
564      * namespace stack
565      * If the position is out of range, an exception is thrown.
566      * <p><b>NOTE:</b> when parser is on END_TAG then namespace prefixes that were declared
567      * in corresponding START_TAG are still accessible even though they are not in scope
568      */

569     public String JavaDoc getNamespaceUri(int pos) throws XmlPullParserException;
570
571     /**
572      * Returns the URI corresponding to the given prefix,
573      * depending on current state of the parser.
574      *
575      * <p>If the prefix was not declared in the current scope,
576      * null is returned. The default namespace is included
577      * in the namespace table and is available via
578      * getNamespace (null).
579      *
580      * <p>This method is a convenience method for
581      *
582      * <pre>
583      * for (int i = getNamespaceCount (getDepth ())-1; i >= 0; i--) {
584      * if (getNamespacePrefix (i).equals (prefix)) {
585      * return getNamespaceUri (i);
586      * }
587      * }
588      * return null;
589      * </pre>
590      *
591      * <p><strong>Please note:</strong> parser implementations
592      * may provide more efifcient lookup, e.g. using a Hashtable.
593      * The 'xml' prefix is bound to "http://www.w3.org/XML/1998/namespace", as
594      * defined in the
595      * <a HREF="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
596      * specification. Analogous, the 'xmlns' prefix is resolved to
597      * <a HREF="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
598      *
599      * @see #getNamespaceCount
600      * @see #getNamespacePrefix
601      * @see #getNamespaceUri
602      */

603     public String JavaDoc getNamespace (String JavaDoc prefix);
604
605
606     // --------------------------------------------------------------------------
607
// miscellaneous reporting methods
608

609     /**
610      * Returns the current depth of the element.
611      * Outside the root element, the depth is 0. The
612      * depth is incremented by 1 when a start tag is reached.
613      * The depth is decremented AFTER the end tag
614      * event was observed.
615      *
616      * <pre>
617      * &lt;!-- outside --&gt; 0
618      * &lt;root> 1
619      * sometext 1
620      * &lt;foobar&gt; 2
621      * &lt;/foobar&gt; 2
622      * &lt;/root&gt; 1
623      * &lt;!-- outside --&gt; 0
624      * </pre>
625      */

626     public int getDepth();
627
628     /**
629      * Returns a short text describing the current parser state, including
630      * the position, a
631      * description of the current event and the data source if known.
632      * This method is especially useful to provide meaningful
633      * error messages and for debugging purposes.
634      */

635
636     public String JavaDoc getPositionDescription ();
637
638
639     /**
640      * Returns the current line number, starting from 1.
641      * When the parser does not know the current line number
642      * or can not determine it, -1 is returned (e.g. for WBXML).
643      *
644      * @return current line number or -1 if unknown.
645      */

646     public int getLineNumber();
647
648     /**
649      * Returns the current column number, starting from 0.
650      * When the parser does not know the current column number
651      * or can not determine it, -1 is returned (e.g. for WBXML).
652      *
653      * @return current column number or -1 if unknown.
654      */

655     public int getColumnNumber();
656
657
658     // --------------------------------------------------------------------------
659
// TEXT related methods
660

661     /**
662      * Checks whether the current TEXT event contains only whitespace
663      * characters.
664      * For IGNORABLE_WHITESPACE, this is always true.
665      * For TEXT and CDSECT, false is returned when the current event text
666      * contains at least one non-white space character. For any other
667      * event type an exception is thrown.
668      *
669      * <p><b>Please note:</b> non-validating parsers are not
670      * able to distinguish whitespace and ignorable whitespace,
671      * except from whitespace outside the root element. Ignorable
672      * whitespace is reported as separate event, which is exposed
673      * via nextToken only.
674      *
675      */

676
677     public boolean isWhitespace() throws XmlPullParserException;
678
679     /**
680      * Returns the text content of the current event as String.
681      * The value returned depends on current event type,
682      * for example for TEXT event it is element content
683      * (this is typical case when next() is used).
684      *
685      * See description of nextToken() for detailed description of
686      * possible returned values for different types of events.
687      *
688      * <p><strong>NOTE:</strong> in case of ENTITY_REF, this method returns
689      * the entity replacement text (or null if not available). This is
690      * the only case where
691      * getText() and getTextCharacters() return different values.
692      *
693      * @see #getEventType
694      * @see #next
695      * @see #nextToken
696      */

697     public String JavaDoc getText ();
698
699
700     /**
701      * Returns the buffer that contains the text of the current event,
702      * as well as the start offset and length relevant for the current
703      * event. See getText(), next() and nextToken() for description of possible returned values.
704      *
705      * <p><strong>Please note:</strong> this buffer must not
706      * be modified and its content MAY change after a call to
707      * next() or nextToken(). This method will always return the
708      * same value as getText(), except for ENTITY_REF. In the case
709      * of ENTITY ref, getText() returns the replacement text and
710      * this method returns the actual input buffer containing the
711      * entity name.
712      * If getText() returns null, this method returns null as well and
713      * the values returned in the holder array MUST be -1 (both start
714      * and length).
715      *
716      * @see #getText
717      * @see #next
718      * @see #nextToken
719      *
720      * @param holderForStartAndLength Must hold an 2-element int array
721      * into which the start offset and length values will be written.
722      * @return char buffer that contains the text of the current event
723      * (null if the current event has no text associated).
724      */

725     public char[] getTextCharacters(int [] holderForStartAndLength);
726
727     // --------------------------------------------------------------------------
728
// START_TAG / END_TAG shared methods
729

730     /**
731      * Returns the namespace URI of the current element.
732      * The default namespace is represented
733      * as empty string.
734      * If namespaces are not enabled, an empty String ("") is always returned.
735      * The current event must be START_TAG or END_TAG; otherwise,
736      * null is returned.
737      */

738     public String JavaDoc getNamespace ();
739
740     /**
741      * For START_TAG or END_TAG events, the (local) name of the current
742      * element is returned when namespaces are enabled. When namespace
743      * processing is disabled, the raw name is returned.
744      * For ENTITY_REF events, the entity name is returned.
745      * If the current event is not START_TAG, END_TAG, or ENTITY_REF,
746      * null is returned.
747      * <p><b>Please note:</b> To reconstruct the raw element name
748      * when namespaces are enabled and the prefix is not null,
749      * you will need to add the prefix and a colon to localName..
750      *
751      */

752     public String JavaDoc getName();
753
754     /**
755      * Returns the prefix of the current element.
756      * If the element is in the default namespace (has no prefix),
757      * null is returned.
758      * If namespaces are not enabled, or the current event
759      * is not START_TAG or END_TAG, null is returned.
760      */

761
762     public String JavaDoc getPrefix();
763
764     /**
765      * Returns true if the current event is START_TAG and the tag
766      * is degenerated
767      * (e.g. &lt;foobar/&gt;).
768      * <p><b>NOTE:</b> if the parser is not on START_TAG, an exception
769      * will be thrown.
770      */

771     public boolean isEmptyElementTag() throws XmlPullParserException;
772
773     // --------------------------------------------------------------------------
774
// START_TAG Attributes retrieval methods
775

776     /**
777      * Returns the number of attributes of the current start tag, or
778      * -1 if the current event type is not START_TAG
779      *
780      * @see #getAttributeNamespace
781      * @see #getAttributeName
782      * @see #getAttributePrefix
783      * @see #getAttributeValue
784      */

785     public int getAttributeCount();
786
787     /**
788      * Returns the namespace URI of the attribute
789      * with the given index (starts from 0).
790      * Returns an empty string ("") if namespaces are not enabled
791      * or the attribute has no namespace.
792      * Throws an IndexOutOfBoundsException if the index is out of range
793      * or the current event type is not START_TAG.
794      *
795      * <p><strong>NOTE:</strong> if FEATURE_REPORT_NAMESPACE_ATTRIBUTES is set
796      * then namespace attributes (xmlns:ns='...') must be reported
797      * with namespace
798      * <a HREF="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
799      * (visit this URL for description!).
800      * The default namespace attribute (xmlns="...") will be reported with empty namespace.
801      * <p><strong>NOTE:</strong>The xml prefix is bound as defined in
802      * <a HREF="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
803      * specification to "http://www.w3.org/XML/1998/namespace".
804      *
805      * @param zero based index of attribute
806      * @return attribute namespace,
807      * empty string ("") is returned if namesapces processing is not enabled or
808      * namespaces processing is enabled but attribute has no namespace (it has no prefix).
809      */

810     public String JavaDoc getAttributeNamespace (int index);
811
812     /**
813      * Returns the local name of the specified attribute
814      * if namespaces are enabled or just attribute name if namespaces are disabled.
815      * Throws an IndexOutOfBoundsException if the index is out of range
816      * or current event type is not START_TAG.
817      *
818      * @param zero based index of attribute
819      * @return attribute name (null is never returned)
820      */

821     public String JavaDoc getAttributeName (int index);
822
823     /**
824      * Returns the prefix of the specified attribute
825      * Returns null if the element has no prefix.
826      * If namespaces are disabled it will always return null.
827      * Throws an IndexOutOfBoundsException if the index is out of range
828      * or current event type is not START_TAG.
829      *
830      * @param zero based index of attribute
831      * @return attribute prefix or null if namespaces processing is not enabled.
832      */

833     public String JavaDoc getAttributePrefix(int index);
834
835     /**
836      * Returns the type of the specified attribute
837      * If parser is non-validating it MUST return CDATA.
838      *
839      * @param zero based index of attribute
840      * @return attribute type (null is never returned)
841      */

842     public String JavaDoc getAttributeType(int index);
843
844     /**
845      * Returns if the specified attribute was not in input was declared in XML.
846      * If parser is non-validating it MUST always return false.
847      * This information is part of XML infoset:
848      *
849      * @param zero based index of attribute
850      * @return false if attribute was in input
851      */

852     public boolean isAttributeDefault(int index);
853
854     /**
855      * Returns the given attributes value.
856      * Throws an IndexOutOfBoundsException if the index is out of range
857      * or current event type is not START_TAG.
858      *
859      * <p><strong>NOTE:</strong> attribute value must be normalized
860      * (including entity replacement text if PROCESS_DOCDECL is false) as described in
861      * <a HREF="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
862      * 3.3.3 Attribute-Value Normalization</a>
863      *
864      * @see #defineEntityReplacementText
865      *
866      * @param zero based index of attribute
867      * @return value of attribute (null is never returned)
868      */

869     public String JavaDoc getAttributeValue(int index);
870
871     /**
872      * Returns the attributes value identified by namespace URI and namespace localName.
873      * If namespaces are disabled namespace must be null.
874      * If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.
875      *
876      * <p><strong>NOTE:</strong> attribute value must be normalized
877      * (including entity replacement text if PROCESS_DOCDECL is false) as described in
878      * <a HREF="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
879      * 3.3.3 Attribute-Value Normalization</a>
880      *
881      * @see #defineEntityReplacementText
882      *
883      * @param namespace Namespace of the attribute if namespaces are enabled otherwise must be null
884      * @param name If namespaces enabled local name of attribute otherwise just attribute name
885      * @return value of attribute or null if attribute with given name does not exist
886      */

887     public String JavaDoc getAttributeValue(String JavaDoc namespace,
888                                     String JavaDoc name);
889
890     // --------------------------------------------------------------------------
891
// actual parsing methods
892

893     /**
894      * Returns the type of the current event (START_TAG, END_TAG, TEXT, etc.)
895      *
896      * @see #next()
897      * @see #nextToken()
898      */

899     public int getEventType()
900         throws XmlPullParserException;
901
902     /**
903      * Get next parsing event - element content wil be coalesced and only one
904      * TEXT event must be returned for whole element content
905      * (comments and processing instructions will be ignored and emtity references
906      * must be expanded or exception mus be thrown if entity reerence can not be exapnded).
907      * If element content is empty (content is "") then no TEXT event will be reported.
908      *
909      * <p><b>NOTE:</b> empty element (such as &lt;tag/>) will be reported
910      * with two separate events: START_TAG, END_TAG - it must be so to preserve
911      * parsing equivalency of empty element to &lt;tag>&lt;/tag>.
912      * (see isEmptyElementTag ())
913      *
914      * @see #isEmptyElementTag
915      * @see #START_TAG
916      * @see #TEXT
917      * @see #END_TAG
918      * @see #END_DOCUMENT
919      */

920
921     public int next()
922         throws XmlPullParserException, IOException JavaDoc;
923
924
925     /**
926      * This method works similarly to next() but will expose
927      * additional event types (COMMENT, CDSECT, DOCDECL, ENTITY_REF, PROCESSING_INSTRUCTION, or
928      * IGNORABLE_WHITESPACE) if they are available in input.
929      *
930      * <p>If special feature
931      * <a HREF="http://xmlpull.org/v1/doc/features.html#xml-roundtrip">FEATURE_XML_ROUNDTRIP</a>
932      * (identified by URI: http://xmlpull.org/v1/doc/features.html#xml-roundtrip)
933      * is enabled it is possible to do XML document round trip ie. reproduce
934      * exectly on output the XML input using getText():
935      * returned content is always unnormalized (exactly as in input).
936      * Otherwise returned content is end-of-line normalized as described
937      * <a HREF="http://www.w3.org/TR/REC-xml#sec-line-ends">XML 1.0 End-of-Line Handling</a>
938      * and. Also when this feature is enabled exact content of START_TAG, END_TAG,
939      * DOCDECL and PROCESSING_INSTRUCTION is available.
940      *
941      * <p>Here is the list of tokens that can be returned from nextToken()
942      * and what getText() and getTextCharacters() returns:<dl>
943      * <dt>START_DOCUMENT<dd>null
944      * <dt>END_DOCUMENT<dd>null
945      * <dt>START_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
946      * enabled and then returns XML tag, ex: &lt;tag attr='val'>
947      * <dt>END_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
948      * id enabled and then returns XML tag, ex: &lt;/tag>
949      * <dt>TEXT<dd>return element content.
950      * <br>Note: that element content may be delivered in multiple consecutive TEXT events.
951      * <dt>IGNORABLE_WHITESPACE<dd>return characters that are determined to be ignorable white
952      * space. If the FEATURE_XML_ROUNDTRIP is enabled all whitespace content outside root
953      * element will always reported as IGNORABLE_WHITESPACE otherise rteporting is optional.
954      * <br>Note: that element content may be delevered in multiple consecutive IGNORABLE_WHITESPACE events.
955      * <dt>CDSECT<dd>
956      * return text <em>inside</em> CDATA
957      * (ex. 'fo&lt;o' from &lt;!CDATA[fo&lt;o]]>)
958      * <dt>PROCESSING_INSTRUCTION<dd>
959      * if FEATURE_XML_ROUNDTRIP is true
960      * return exact PI content ex: 'pi foo' from &lt;?pi foo?>
961      * otherwise it may be exact PI content or concatenation of PI target,
962      * space and data so for example for
963      * &lt;?target data?> string &quot;target data&quot; may
964      * be returned if FEATURE_XML_ROUNDTRIP is false.
965      * <dt>COMMENT<dd>return comment content ex. 'foo bar' from &lt;!--foo bar-->
966      * <dt>ENTITY_REF<dd>getText() MUST return entity replacement text if PROCESS_DOCDECL is false
967      * otherwise getText() MAY return null,
968      * additionally getTextCharacters() MUST return entity name
969      * (for example 'entity_name' for &amp;entity_name;).
970      * <br><b>NOTE:</b> this is the only place where value returned from getText() and
971      * getTextCharacters() <b>are different</b>
972      * <br><b>NOTE:</b> it is user responsibility to resolve entity reference
973      * if PROCESS_DOCDECL is false and there is no entity replacement text set in
974      * defineEntityReplacementText() method (getText() will be null)
975      * <br><b>NOTE:</b> character entities (ex. &amp;#32;) and standard entities such as
976      * &amp;amp; &amp;lt; &amp;gt; &amp;quot; &amp;apos; are reported as well
977      * and are <b>not</b> reported as TEXT tokens but as ENTITY_REF tokens!
978      * This requirement is added to allow to do roundtrip of XML documents!
979      * <dt>DOCDECL<dd>
980      * if FEATURE_XML_ROUNDTRIP is true or PROCESS_DOCDECL is false
981      * return inside part of DOCDECL ex. returns:<pre>
982      * &quot; titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
983      * [&lt;!ENTITY % active.links "INCLUDE">]&quot;</pre>
984      * <p>for input document that contained:<pre>
985      * &lt;!DOCTYPE titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
986      * [&lt;!ENTITY % active.links "INCLUDE">]></pre>
987      * otherwise of FEATURE_XML_ROUNDTRIP is false and PROCESS_DOCDECL is true
988      * then what is returned is undefined (it may be evn null)
989      * </dd>
990      * </dl>
991      *
992      * <p><strong>NOTE:</strong> there is no gurantee that there will only one TEXT or
993      * IGNORABLE_WHITESPACE event from nextToken() as parser may chose to deliver element content in
994      * multiple tokens (dividing element content into chunks)
995      *
996      * <p><strong>NOTE:</strong> whether returned text of token is end-of-line normalized
997      * is depending on FEATURE_XML_ROUNDTRIP.
998      *
999      * <p><strong>NOTE:</strong> XMLDecl (&lt;?xml ...?&gt;) is not reported but its content
1000     * is available through optional properties (see class description above).
1001     *
1002     * @see #next
1003     * @see #START_TAG
1004     * @see #TEXT
1005     * @see #END_TAG
1006     * @see #END_DOCUMENT
1007     * @see #COMMENT
1008     * @see #DOCDECL
1009     * @see #PROCESSING_INSTRUCTION
1010     * @see #ENTITY_REF
1011     * @see #IGNORABLE_WHITESPACE
1012     */

1013
1014    public int nextToken()
1015        throws XmlPullParserException, IOException JavaDoc;
1016
1017    //-----------------------------------------------------------------------------
1018
// utility methods to mak XML parsing easier ...
1019

1020    /**
1021     * Test if the current event is of the given type and if the
1022     * namespace and name do match. null will match any namespace
1023     * and any name. If the test is not passed, an exception is
1024     * thrown. The exception text indicates the parser position,
1025     * the expected event and the current event that is not meeting the
1026     * requirement.
1027     *
1028     * <p>Essentially it does this
1029     * <pre>
1030     * if (type != getEventType()
1031     * || (namespace != null && !namespace.equals( getNamespace () ) )
1032     * || (name != null && !name.equals( getName() ) ) )
1033     * throw new XmlPullParserException( "expected "+ TYPES[ type ]+getPositionDescription());
1034     * </pre>
1035     */

1036    public void require(int type, String JavaDoc namespace, String JavaDoc name)
1037        throws XmlPullParserException, IOException JavaDoc;
1038
1039    /**
1040     * If current event is START_TAG then if next element is TEXT then element content is returned
1041     * or if next event is END_TAG then empty string is returned, otherwise exception is thrown.
1042     * After calling this function successfully parser will be positioned on END_TAG.
1043     *
1044     * <p>The motivation for this function is to allow to parse consistently both
1045     * empty elements and elements that has non empty content, for example for input: <ol>
1046     * <li>&lt;tag>foo&lt;/tag>
1047     * <li>&lt;tag>&lt;/tag> (which is equivalent to &lt;tag/>) </ol>
1048     * both input can be parsed with the same code:
1049     * <pre>
1050     * p.nextTag()
1051     * p.requireEvent(p.START_TAG, "", "tag");
1052     * String content = p.nextText();
1053     * p.requireEvent(p.END_TAG, "", "tag");
1054     * </pre>
1055     * This function together with nextTag make it very easy to parse XML that has
1056     * no mixed content.
1057     *
1058     *
1059     * <p>Essentially it does this
1060     * <pre>
1061     * if(getEventType() != START_TAG) {
1062     * throw new XmlPullParserException(
1063     * "parser must be on START_TAG to read next text", this, null);
1064     * }
1065     * int eventType = next();
1066     * if(eventType == TEXT) {
1067     * String result = getText();
1068     * eventType = next();
1069     * if(eventType != END_TAG) {
1070     * throw new XmlPullParserException(
1071     * "event TEXT it must be immediately followed by END_TAG", this, null);
1072     * }
1073     * return result;
1074     * } else if(eventType == END_TAG) {
1075     * return "";
1076     * } else {
1077     * throw new XmlPullParserException(
1078     * "parser must be on START_TAG or TEXT to read text", this, null);
1079     * }
1080     * </pre>
1081     */

1082    public String JavaDoc nextText() throws XmlPullParserException, IOException JavaDoc;
1083
1084    /**
1085     * Call next() and return event if it is START_TAG or END_TAG
1086     * otherwise throw an exception.
1087     * It will skip whitespace TEXT before actual tag if any.
1088     *
1089     * <p>essentially it does this
1090     * <pre>
1091     * int eventType = next();
1092     * if(eventType == TEXT && isWhitespace()) { // skip whitespace
1093     * eventType = next();
1094     * }
1095     * if (eventType != START_TAG && eventType != END_TAG) {
1096     * throw new XmlPullParserException("expected start or end tag", this, null);
1097     * }
1098     * return eventType;
1099     * </pre>
1100     */

1101    public int nextTag() throws XmlPullParserException, IOException JavaDoc;
1102
1103}
1104
1105
Popular Tags