KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > saxon > functions > UnparsedText


1 package net.sf.saxon.functions;
2
3 import net.sf.saxon.Err;
4 import net.sf.saxon.expr.Expression;
5 import net.sf.saxon.expr.StaticContext;
6 import net.sf.saxon.expr.XPathContext;
7 import net.sf.saxon.om.Item;
8 import net.sf.saxon.om.FastStringBuffer;
9 import net.sf.saxon.om.XMLChar;
10 import net.sf.saxon.trans.DynamicError;
11 import net.sf.saxon.trans.XPathException;
12 import net.sf.saxon.value.StringValue;
13 import net.sf.saxon.value.BooleanValue;
14
15 import java.io.*;
16 import java.net.MalformedURLException JavaDoc;
17 import java.net.URL JavaDoc;
18 import java.net.URLConnection JavaDoc;
19
20
21 public class UnparsedText extends SystemFunction implements XSLTFunction {
22
23     String JavaDoc expressionBaseURI = null;
24
25     public static final int UNPARSED_TEXT = 0;
26     public static final int UNPARSED_TEXT_AVAILABLE = 1;
27
28     public void checkArguments(StaticContext env) throws XPathException {
29         if (expressionBaseURI == null) {
30             super.checkArguments(env);
31             expressionBaseURI = env.getBaseURI();
32         }
33     }
34
35
36     /**
37      * preEvaluate: this method suppresses compile-time evaluation by doing nothing
38      */

39
40     public Expression preEvaluate(StaticContext env) {
41         return this;
42         // in principle we could pre-evaluate any call of unparsed-text() with
43
// constant arguments. But we don't, because the file contents might
44
// change before the stylesheet executes.
45
}
46
47
48     /**
49      * evaluateItem() handles evaluation of the function:
50      * it returns a String
51      */

52
53     public Item evaluateItem(XPathContext context) throws XPathException {
54         StringValue result;
55         try {
56             StringValue hrefVal = (StringValue)argument[0].evaluateItem(context);
57             if (hrefVal == null) {
58                 return null;
59             }
60             String JavaDoc href = hrefVal.getStringValue();
61
62             String JavaDoc encoding = null;
63             if (getNumberOfArguments() == 2) {
64                 encoding = argument[1].evaluateItem(context).getStringValue();
65             }
66
67             result = new StringValue(readFile(href, expressionBaseURI, encoding));
68         } catch (XPathException err) {
69             if (operation == UNPARSED_TEXT_AVAILABLE) {
70                 return BooleanValue.FALSE;
71             } else {
72                 throw err;
73             }
74         }
75         if (operation == UNPARSED_TEXT_AVAILABLE) {
76             return BooleanValue.TRUE;
77         } else {
78             return result;
79         }
80     }
81
82     /**
83      * Supporting routine to load one external file given a URI (href) and a baseURI
84      */

85
86     private CharSequence JavaDoc readFile(String JavaDoc href, String JavaDoc baseURI, String JavaDoc encoding)
87             throws XPathException {
88
89         // Resolve relative URI
90

91         URL JavaDoc absoluteURL;
92         if (baseURI == null) { // no base URI available
93
try {
94                 // the href might be an absolute URL
95
absoluteURL = new URL JavaDoc(href);
96             } catch (MalformedURLException JavaDoc err) {
97                 // it isn't
98
DynamicError e = new DynamicError("Cannot resolve absolute URI", err);
99                 e.setErrorCode("XTDE1170");
100                 throw e;
101             }
102         } else {
103             try {
104                 absoluteURL = new URL JavaDoc(new URL JavaDoc(baseURI), href);
105             } catch (MalformedURLException JavaDoc err) {
106                 DynamicError e = new DynamicError("Cannot resolve relative URI", err);
107                 e.setErrorCode("XTDE1170");
108                 throw e;
109             }
110         }
111         try {
112             InputStream is;
113             if (encoding != null) {
114                 is = absoluteURL.openStream();
115             } else {
116                 URLConnection JavaDoc connection = absoluteURL.openConnection();
117                 connection.connect();
118                 is = connection.getInputStream();
119
120                 try {
121
122                     if (!is.markSupported()) {
123                         is = new BufferedInputStream(is);
124                     }
125
126                     // Get any external (HTTP) encoding label.
127
String JavaDoc contentType;
128
129                     // The file:// URL scheme gives no useful information...
130
if (!"file".equals(connection.getURL().getProtocol())) {
131
132                         // Use the contentType from the HTTP header if available
133
contentType = connection.getContentType();
134
135                         if (contentType != null) {
136                             int pos = contentType.indexOf("charset");
137                             if (pos>=0) {
138                                 pos = contentType.indexOf('=', pos + 7);
139                                 if (pos>=0) {
140                                     contentType = contentType.substring(pos + 1);
141                                 }
142                                 if ((pos = contentType.indexOf(';')) > 0) {
143                                     contentType = contentType.substring(0, pos);
144                                 }
145
146                                 // attributes can have comment fields (RFC 822)
147
if ((pos = contentType.indexOf('(')) > 0) {
148                                     contentType = contentType.substring(0, pos);
149                                 }
150                                 // ... and values may be quoted
151
if ((pos = contentType.indexOf('"')) > 0) {
152                                     contentType = contentType.substring(pos + 1,
153                                             contentType.indexOf('"', pos + 2));
154                                 }
155                                 encoding = contentType.trim();
156                             }
157                         }
158                     }
159
160                     if (encoding == null) {
161                         // Try to detect the encoding from the start of the content
162
is.mark(100);
163                         byte[] start = new byte[100];
164                         int read = is.read(start, 0, 100);
165                         is.reset();
166                         encoding = inferEncoding(start, read);
167                     }
168
169                 } catch (IOException e) {
170                     encoding = "UTF-8";
171                 }
172
173             }
174             Reader reader = new BufferedReader(new InputStreamReader(is, encoding));
175             FastStringBuffer sb = new FastStringBuffer(2048);
176             char[] buffer = new char[2048];
177             boolean first = true;
178             int actual;
179             int line = 1;
180             int column = 1;
181             while (true) {
182                 actual = reader.read(buffer, 0, 2048);
183                 if (actual < 0) {
184                     break;
185                 }
186                 for (int c=0; c<actual;) {
187                     int ch32 = buffer[c++];
188                     if (ch32 == '\n') {
189                         line++;
190                         column = 0;
191                     }
192                     column++;
193                     if (XMLChar.isHighSurrogate(ch32)) {
194                         char low = buffer[c++];
195                         ch32 = XMLChar.supplemental((char)ch32, low);
196                     }
197                     if (!XMLChar.isValid(ch32)) {
198                         DynamicError err = new DynamicError(
199                                 "The unparsed-text file contains a character illegal in XML (line=" +
200                         line + " column=" + column + " value=hex " + Integer.toHexString(ch32) + ')');
201                         err.setErrorCode("XTDE1180");
202                         throw err;
203                     }
204                 }
205                 if (first) {
206                     first = false;
207                     if (buffer[0]=='\ufeff') {
208                         // don't include the BOM in the result
209
sb.append(buffer, 1, actual-1);
210                     } else {
211                         sb.append(buffer, 0, actual);
212                     }
213                 } else {
214                     sb.append(buffer, 0, actual);
215                 }
216             }
217             reader.close();
218             return sb.condense();
219         } catch (java.io.UnsupportedEncodingException JavaDoc encErr) {
220             DynamicError e = new DynamicError("Unknown encoding " + Err.wrap(encoding), encErr);
221             e.setErrorCode("XTDE1190");
222             throw e;
223         } catch (java.io.IOException JavaDoc ioErr) {
224             DynamicError e = new DynamicError("Failed to read input file", ioErr);
225             e.setErrorCode("XTDE1170");
226             e.setLocator(this);
227             throw e;
228         }
229     }
230
231     private String JavaDoc inferEncoding(byte[] start, int read) {
232         if (read >= 2) {
233             if (ch(start[0]) == 0xFE && ch(start[1]) == 0xFF) {
234                 return "UTF-16";
235             } else if (ch(start[0]) == 0xFF && ch(start[1]) == 0xFE) {
236                 return "UTF-16LE";
237             }
238         }
239         if (read >= 3) {
240             if (ch(start[0]) == 0xEF && ch(start[1]) == 0xBB && ch(start[2]) == 0xBF) {
241                 return "UTF-8";
242             }
243         }
244         if (read >= 4) {
245             if (ch(start[0]) == '<' && ch(start[1]) == '?' &&
246                     ch(start[2]) == 'x' && ch(start[3]) == 'm' && ch(start[4]) == 'l') {
247                 FastStringBuffer sb = new FastStringBuffer(read);
248                 for (int b = 0; b < read; b++) {
249                     sb.append((char)start[b]);
250                 }
251                 String JavaDoc p = sb.toString();
252                 int v = p.indexOf("encoding");
253                 if (v >= 0) {
254                     v += 8;
255                     while (v < p.length() && " \n\r\t=\"'".indexOf(p.charAt(v)) >= 0) {
256                         v++;
257                     }
258                     sb.setLength(0);
259                     while (v < p.length() && p.charAt(v) != '"' && p.charAt(v) != '\'') {
260                         sb.append(p.charAt(v++));
261                     }
262                     return sb.toString();
263                 }
264             }
265         } else if (read > 0 && start[0] == 0 && start[2] == 0 && start[4] == 0 && start[6] == 0) {
266             return "UTF-16";
267         } else if (read > 1 && start[1] == 0 && start[3] == 0 && start[5] == 0 && start[7] == 0) {
268             return "UTF-16LE";
269         }
270         // If all else fails, assume UTF-8
271
return "UTF-8";
272     }
273
274     private int ch(byte b) {
275         return ((int)b) & 0xff;
276     }
277
278 }
279
280
281 //
282
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
283
// you may not use this file except in compliance with the License. You may obtain a copy of the
284
// License at http://www.mozilla.org/MPL/
285
//
286
// Software distributed under the License is distributed on an "AS IS" basis,
287
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
288
// See the License for the specific language governing rights and limitations under the License.
289
//
290
// The Original Code is: all this file.
291
//
292
// The Initial Developer of the Original Code is Michael H. Kay. The detectEncoding() method includes
293
// code fragments taken from the AElfred XML Parser developed by David Megginson.
294
//
295
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
296
//
297
// Contributor(s): none.
298
//
299
Popular Tags