KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xerces > readers > XMLDeclRecognizer


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.enhydra.apache.xerces.readers;
59
60 import java.util.Stack JavaDoc;
61
62 import org.enhydra.apache.xerces.framework.XMLErrorReporter;
63 import org.enhydra.apache.xerces.utils.ChunkyByteArray;
64 import org.enhydra.apache.xerces.utils.StringPool;
65
66 /**
67  * Abstract base class for encoding recognizers.
68  *
69  * When we encounter an external entity, including the document entity,
70  * and do not know what the encoding of the underlying byte stream is,
71  * we need to look at the contents of the stream to find out. We do this
72  * by asking a set of "recognizers" to look at the stream data and if
73  * the recognizer can understand the encoding it will try to read an
74  * XML or text declaration, if present, and construct the appropriate
75  * reader for that encoding. The recognizer subclasses will typically
76  * use the prescanXMLDeclOrTextDecl() method if the stream looks like
77  * it does begin with such a declaration using a temporary reader that
78  * can support the calls needed to scan through the encoding declaration.
79  */

80 public abstract class XMLDeclRecognizer {
81
82     /**
83      * Register the standard recognizers.
84      *
85      * @param recognizerStack The stack of recognizers used by the parser.
86      */

87     public static void registerDefaultRecognizers(Stack JavaDoc recognizerStack) {
88         recognizerStack.push(new EBCDICRecognizer());
89         recognizerStack.push(new UCSRecognizer());
90         recognizerStack.push(new UTF8Recognizer());
91     }
92
93     /**
94      * Subclasses override this method to support recognizing their encodings.
95      *
96      * @param readerFactory the factory object to use when constructing the entity reader.
97      * @param entityHandler the entity handler to get entity readers from
98      * @param errorReporter where to report errors
99      * @param sendCharDataAsCharArray true if the reader should use char arrays, not string handles.
100      * @param stringPool the <code>StringPool</code> to put strings in
101      * @param data initial bytes to perform recognition on
102      * @param xmlDecl true if attempting to recognize fron an XMLDecl, false if trying to recognize from a TextDecl.
103      * @param allowJavaEncodingName true if Java's encoding names are allowed, false if they are not.
104      * @return The reader that will be used to process the contents of the data stream.
105      * @exception java.lang.Exception
106      */

107     public abstract XMLEntityHandler.EntityReader recognize(XMLEntityReaderFactory readerFactory,
108                                                             XMLEntityHandler entityHandler,
109                                                             XMLErrorReporter errorReporter,
110                                                             boolean sendCharDataAsCharArray,
111                                                             StringPool stringPool,
112                                                             ChunkyByteArray data,
113                                                             boolean xmlDecl,
114                                                             boolean allowJavaEncodingName) throws Exception JavaDoc;
115
116     //
117
// From the standard:
118
//
119
// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
120
// [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
121
// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
122
// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
123
// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
124
//
125
/**
126      * Support for getting the value of an EncodingDecl using an XMLReader.
127      *
128      * This is the minimal logic from the scanner to recognize an XMLDecl or TextDecl using
129      * the XMLReader interface.
130      *
131      * @param entityReader data source for prescan
132      * @param xmlDecl true if attempting to recognize from an XMLDecl, false if trying to recognize from a TextDecl.
133      * @return <code>StringPool</code> handle to the name of the encoding recognized
134      * @exception java.lang.Exception
135      */

136     protected int prescanXMLDeclOrTextDecl(XMLEntityHandler.EntityReader entityReader, boolean xmlDecl) throws Exception JavaDoc
137     {
138         if (!entityReader.lookingAtChar('<', true)) {
139             return -1;
140         }
141         if (!entityReader.lookingAtChar('?', true)) {
142             return -1;
143         }
144         if (!entityReader.skippedString(xml_string)) {
145             return -1;
146         }
147         entityReader.skipPastSpaces();
148         boolean single;
149         char qchar;
150         if (entityReader.skippedString(version_string)) {
151             entityReader.skipPastSpaces();
152             if (!entityReader.lookingAtChar('=', true)) {
153                 return -1;
154             }
155             entityReader.skipPastSpaces();
156             int versionIndex = entityReader.scanStringLiteral();
157             if (versionIndex < 0) {
158                 return -1;
159             }
160             if (!entityReader.lookingAtSpace(true)) {
161                 return -1;
162             }
163             entityReader.skipPastSpaces();
164         }
165         else if (xmlDecl) {
166             return -1;
167         }
168         if (!entityReader.skippedString(encoding_string)) {
169             return -1;
170         }
171         entityReader.skipPastSpaces();
172         if (!entityReader.lookingAtChar('=', true)) {
173             return -1;
174         }
175         entityReader.skipPastSpaces();
176         int encodingIndex = entityReader.scanStringLiteral();
177         return encodingIndex;
178     }
179     //
180
// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
181
// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
182
//
183
private static final char[] xml_string = { 'x','m','l' };
184     //
185
// [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
186
//
187
private static final char[] version_string = { 'v','e','r','s','i','o','n' };
188     //
189
// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
190
//
191
private static final char[] encoding_string = { 'e','n','c','o','d','i','n','g' };
192 }
193
Popular Tags