KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > saxon > query > QueryReader


1 package net.sf.saxon.query;
2
3 import net.sf.saxon.Err;
4 import net.sf.saxon.expr.StaticProperty;
5 import net.sf.saxon.om.FastStringBuffer;
6 import net.sf.saxon.om.NamePool;
7 import net.sf.saxon.om.NamespaceConstant;
8 import net.sf.saxon.om.XMLChar;
9 import net.sf.saxon.pattern.CombinedNodeTest;
10 import net.sf.saxon.pattern.ContentTypeTest;
11 import net.sf.saxon.pattern.NodeTest;
12 import net.sf.saxon.trans.StaticError;
13 import net.sf.saxon.type.AnyItemType;
14 import net.sf.saxon.type.AtomicType;
15 import net.sf.saxon.type.ItemType;
16 import net.sf.saxon.type.SchemaType;
17 import net.sf.saxon.value.SequenceType;
18
19 import javax.xml.transform.stream.StreamSource JavaDoc;
20 import java.io.*;
21 import java.util.Iterator JavaDoc;
22
23 /**
24  * This class contains static methods used to read a query as a byte stream, infer the encoding if
25  * necessary, and return the text of the query as a string; also methods to import functions and variables
26  * from one module into another, and check their consistency.
27  */

28 public class QueryReader {
29
30     /**
31      * The class is never instantiated
32      */

33     private QueryReader() {}
34
35     /**
36      * Read a query module given a StreamSource
37      * @param ss the supplied StreamSource. This must contain a non-null systemID which defines the base
38      * URI of the query module, and either an InputStream or a Reader containing the query text. In the
39      * case of an InputStream the method attempts to infer the encoding; in the case of a Reader, this has
40      * already been done, and the encoding specified within the query itself is ignored.
41      */

42
43     public static String JavaDoc readSourceQuery(StreamSource JavaDoc ss) throws StaticError {
44         CharSequence JavaDoc queryText;
45         if (ss.getInputStream() != null) {
46             InputStream is = ss.getInputStream();
47             if (!is.markSupported()) {
48                 is = new BufferedInputStream(is);
49             }
50             String JavaDoc encoding = readEncoding(is);
51             queryText = readInputStream(is, encoding);
52         } else if (ss.getReader() != null) {
53             queryText = readQueryFromReader(ss.getReader());
54         } else {
55             throw new StaticError("Module URI Resolver must supply either an InputSource or a Reader");
56         }
57         return queryText.toString();
58     }
59
60     /**
61      * Read an input stream non-destructively to determine the encoding from the Query Prolog
62      * @param is the input stream: this must satisfy the precondition is.markSupported() = true.
63      * @return the encoding to be used: defaults to UTF-8 if no encoding was specified explicitly
64      * in the query prolog
65      * @throws StaticError if the input stream cannot be read
66      */

67
68     public static String JavaDoc readEncoding(InputStream is) throws StaticError {
69         try {
70             if (!is.markSupported()) {
71                 throw new IllegalArgumentException JavaDoc("InputStream must have markSupported() = true");
72             }
73             is.mark(100);
74             byte[] start = new byte[100];
75             int read = is.read(start, 0, 100);
76             is.reset();
77             return inferEncoding(start, read);
78         } catch (IOException e) {
79             StaticError se = new StaticError("Failed to read query source file", e);
80             throw se;
81         }
82     }
83
84     /**
85      * Read a query from an InputStream. The method checks that all characters are valid XML
86      * characters, and also performs normalization of line endings.
87      * @param is the input stream
88      * @param encoding the encoding, or null if the encoding is unknown
89      * @return the content of the InputStream as a string
90      */

91
92     public static String JavaDoc readInputStream(InputStream is, String JavaDoc encoding) throws StaticError {
93         if (encoding == null) {
94             if (!is.markSupported()) {
95                 is = new BufferedInputStream(is);
96             }
97             encoding = readEncoding(is);
98         }
99         try {
100             Reader reader = new BufferedReader(new InputStreamReader(is, encoding));
101             return readQueryFromReader(reader);
102         } catch (UnsupportedEncodingException encErr) {
103             StaticError e = new StaticError("Unknown encoding " + Err.wrap(encoding), encErr);
104             throw e;
105         } catch (IOException ioErr) {
106             StaticError e = new StaticError("Failed to read input file", ioErr);
107             throw e;
108         }
109     }
110
111     /**
112      * Read a query from a Reader. The method checks that all characters are valid XML
113      * characters.
114      * @param reader The Reader supplying the input
115      * @return the text of the query module, as a string
116      * @throws StaticError if the file cannot be read or contains illegal characters
117      */

118
119     private static String JavaDoc readQueryFromReader(Reader reader) throws StaticError {
120         try {
121             FastStringBuffer sb = new FastStringBuffer(2048);
122             char[] buffer = new char[2048];
123             boolean first = true;
124             int actual;
125             int line = 1; // track line/column position for reporting bad characters
126
int column = 1;
127             while (true) {
128                 actual = reader.read(buffer, 0, 2048);
129                 if (actual < 0) {
130                     break;
131                 }
132                 for (int c=0; c<actual;) {
133                     int ch32 = buffer[c++];
134                     if (ch32 == '\n') {
135                         line++;
136                         column = 0;
137                     }
138                     column++;
139                     if (XMLChar.isHighSurrogate(ch32)) {
140                         char low = buffer[c++];
141                         ch32 = XMLChar.supplemental((char)ch32, low);
142                     }
143                     if (!XMLChar.isValid(ch32)) {
144                         StaticError err = new StaticError(
145                                 "The query file contains a character illegal in XML (line=" +
146                         line + " column=" + column + " value=hex " + Integer.toHexString(ch32) + ')');
147                         err.setErrorCode("XPST0003");
148                         throw err;
149                     }
150                 }
151                 if (first) {
152                     first = false;
153                     if (buffer[0]=='\ufeff') {
154                         sb.append(buffer, 1, actual-1);
155                     } else {
156                         sb.append(buffer, 0, actual);
157                     }
158                 } else {
159                     sb.append(buffer, 0, actual);
160                 }
161             }
162             return sb.condense().toString();
163         } catch (IOException ioErr) {
164             StaticError e = new StaticError("Failed to read input file", ioErr);
165             throw e;
166         }
167     }
168
169     /**
170      * Attempt to infer the encoding of a file by reading its byte order mark and if necessary
171      * the encoding declaration in the query prolog
172      * @param start the bytes appearing at the start of the file
173      * @param read the number of bytes supplied
174      * @return the inferred encoding
175      * @throws StaticError
176      */

177
178     private static String JavaDoc inferEncoding(byte[] start, int read) throws StaticError {
179         // Debugging code
180
// StringBuffer sb = new StringBuffer(read*5);
181
// for (int i=0; i<read; i++) sb.append(Integer.toHexString(start[i]&255) + ", ");
182
// System.err.println(sb);
183
// End of debugging code
184

185         if (read >= 2) {
186             if (ch(start[0]) == 0xFE && ch(start[1]) == 0xFF) {
187                 return "UTF-16";
188             } else if (ch(start[0]) == 0xFF && ch(start[1]) == 0xFE) {
189                 return "UTF-16LE";
190             }
191         }
192         if (read >= 3) {
193             if (ch(start[0]) == 0xEF && ch(start[1]) == 0xBB && ch(start[2]) == 0xBF) {
194                 return "UTF-8";
195             }
196         }
197
198         // Try to handle a UTF-16 file with no BOM
199
if (read >= 8 && start[0] == 0 && start[2] == 0 && start[4] == 0 && start[6] == 0) {
200             return "UTF-16";
201         }
202         if (read >= 8 && start[1] == 0 && start[3] == 0 && start[5] == 0 && start[7] == 0) {
203             return "UTF-16LE";
204         }
205
206         // In all other cases, we assume an encoding that has ISO646 as a subset
207

208         // Note, we don't care about syntax errors here: they'll be reported later. We just need to
209
// establish the encoding.
210
int i=0;
211         String JavaDoc tok = readToken(start, i, read);
212         if (tok.trim().equals("xquery")) {
213             i += tok.length();
214         } else {
215             return "UTF-8";
216         }
217         tok = readToken(start, i, read);
218         if (tok.trim().equals("version")) {
219             i += tok.length();
220         } else {
221             return "UTF-8";
222         }
223         tok = readToken(start, i, read);
224         if (tok == null) {
225             return "UTF-8";
226         }
227         i += tok.length();
228         tok = readToken(start, i, read);
229         if (tok.trim().equals("encoding")) {
230             i += tok.length();
231         } else {
232             return "UTF-8";
233         }
234         tok = readToken(start, i, read).trim();
235         if (tok.startsWith("\"") && tok.endsWith("\"") && tok.length()>2) {
236             return tok.substring(1, tok.length()-1);
237         } else if (tok.startsWith("'") && tok.endsWith("'") && tok.length()>2) {
238             return tok.substring(1, tok.length()-1);
239         } else {
240             throw new StaticError("Unrecognized encoding " + Err.wrap(tok) + " in query prolog");
241         }
242
243     }
244
245     /**
246      * Simple tokenizer for use when reading the encoding declaration in the query prolog. A token
247      * is a sequence of characters delimited either by whitespace, or by single or double quotes; the
248      * quotes if present are returned as part of the token.
249      * @param in
250      * @param i
251      * @param len
252      * @return
253      */

254
255     private static String JavaDoc readToken(byte[] in, int i, int len) {
256         int p = i;
257         while (p<len && " \n\r\t".indexOf(ch(in[p])) >= 0) {
258             p++;
259         }
260         if (ch(in[p])=='"') {
261             p++;
262             while (p<len && ch(in[p]) != '"') {
263                 p++;
264             }
265         } else if (ch(in[p])=='\'') {
266             p++;
267             while (p<len && ch(in[p]) != '\'') {
268                 p++;
269             }
270         } else {
271             while (p<len && " \n\r\t".indexOf(ch(in[p])) < 0) {
272                 p++;
273             }
274         }
275         FastStringBuffer sb = new FastStringBuffer(p-i+1);
276         for (int c=i; c<=p; c++) {
277             sb.append((char)ch(in[c]));
278         }
279         return sb.toString();
280     }
281
282     /**
283      * Convert a byte containing an ASCII character to that character
284      * @param b the input byte
285      * @return the ASCII character
286      */

287
288     private static int ch(byte b) {
289         return ((int)b) & 0xff;
290     }
291
292     /**
293      * Import the functions and variables from an imported module into the importing module
294      * @param importedModule the imported module
295      * @param thisModule thus module
296      * @throws net.sf.saxon.trans.StaticError
297      */

298
299     public static void importModuleContents(StaticQueryContext importedModule, StaticQueryContext thisModule) throws StaticError {
300         short ns = importedModule.getModuleNamespaceCode();
301         NamePool pool = thisModule.getNamePool();
302         Iterator JavaDoc it = importedModule.getFunctionDefinitions();
303         while (it.hasNext()) {
304             XQueryFunction def = (XQueryFunction) it.next();
305             // don't import functions transitively
306
if (pool.getURICode(def.getFunctionFingerprint()) == ns &&
307                     def.getSystemId() == importedModule.getLocationURI()) {
308                 thisModule.declareFunction(def);
309                 checkImportedType(thisModule, def.getResultType(), def);
310                 for (int i=0; i<def.getNumberOfArguments(); i++) {
311                     SequenceType argType = def.getArgumentTypes()[i];
312                     checkImportedType(thisModule, argType, def);
313                 }
314             }
315         }
316         it = importedModule.getVariableDeclarations();
317         while (it.hasNext()) {
318             GlobalVariableDefinition def = (GlobalVariableDefinition) it.next();
319             // don't import variables transitively
320
if (!(def instanceof UndeclaredVariable) &&
321                     pool.getURICode(def.getNameCode()) == ns &&
322                     !def.getSystemId().equals(thisModule.getLocationURI())) {
323                 thisModule.declareVariable(def);
324                 checkImportedType(thisModule, def.getRequiredType(), def);
325             }
326         }
327     }
328
329     /**
330      * Check that a SequenceType used in the definition of an imported variable or function
331      * is available in the importing module
332      */

333
334     private static void checkImportedType(
335             StaticQueryContext env, SequenceType importedType, Declaration declaration)
336             throws StaticError {
337         ItemType type = importedType.getPrimaryType();
338         if (type instanceof AnyItemType) {
339             return;
340         }
341         if (type instanceof AtomicType) {
342             int f = ((AtomicType)type).getFingerprint();
343             checkSchemaNamespaceImported(env, f, declaration);
344         } else if (type instanceof ContentTypeTest) {
345             SchemaType annotation = ((ContentTypeTest)type).getSchemaType();
346             int f = annotation.getFingerprint();
347             checkSchemaNamespaceImported(env, f, declaration);
348         } else if (type instanceof CombinedNodeTest) {
349             NodeTest[] tests = ((CombinedNodeTest)type).getComponentNodeTests();
350             for (int i=0; i<tests.length; i++) {
351                 SequenceType st = SequenceType.makeSequenceType(tests[1], StaticProperty.EXACTLY_ONE);
352                 checkImportedType(env, st, declaration);
353             }
354         }
355     }
356
357     /**
358      * Check that the namespace of a given name is the namespace of an imported schema
359      */

360
361     static void checkSchemaNamespaceImported(StaticQueryContext env, int fingerprint, Declaration declaration)
362             throws StaticError {
363         String JavaDoc uri = env.getNamePool().getURI(fingerprint);
364         if (uri.equals(NamespaceConstant.SCHEMA)) {
365             return;
366         } else if (NamespaceConstant.isXDTNamespace(uri)) {
367             return;
368         } if (env.isImportedSchema(uri)) {
369             return; // schema namespace is imported in this module
370
} else {
371             String JavaDoc msg = "Schema component " + env.getNamePool().getDisplayName(fingerprint) + " used in ";
372             if (declaration instanceof GlobalVariableDefinition) {
373                 msg += "declaration of imported variable " +
374                         env.getNamePool().getDisplayName(((GlobalVariableDefinition)declaration).getNameCode());
375             } else {
376                 msg += "signature of imported function " +
377                         env.getNamePool().getDisplayName(((XQueryFunction)declaration).getNameCode());
378             }
379             msg += " is not declared in any schema imported by ";
380             String JavaDoc module = env.getModuleNamespace();
381             if (module == null) {
382                 msg += "the main query module";
383             } else {
384                 msg += "query module " + module;
385             }
386             StaticError err = new StaticError(msg);
387             err.setErrorCode("XQST0036");
388             err.setLocator(declaration);
389             throw err;
390         }
391     }
392 }
393
394 //
395
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
396
// you may not use this file except in compliance with the License. You may obtain a copy of the
397
// License at http://www.mozilla.org/MPL/
398
//
399
// Software distributed under the License is distributed on an "AS IS" basis,
400
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
401
// See the License for the specific language governing rights and limitations under the License.
402
//
403
// The Original Code is: all this file.
404
//
405
// The Initial Developer of the Original Code is Michael H. Kay.
406
//
407
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
408
//
409
// Contributor(s): none.
410
//
411
Popular Tags