KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > hp > hpl > jena > rdf > model > impl > NTripleReader


1 /*
2  * (c) Copyright 2001, 2003, 2004, 2005 Hewlett-Packard Development Company, LP
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. The name of the author may not be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * $Id: NTripleReader.java,v 1.14 2005/02/21 12:14:46 andy_seaborne Exp $
28  */

29
30 package com.hp.hpl.jena.rdf.model.impl;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34
35 import com.hp.hpl.jena.graph.GraphEvents;
36 import com.hp.hpl.jena.rdf.model.*;
37 import com.hp.hpl.jena.util.FileUtils;
38 import com.hp.hpl.jena.shared.*;
39
40 import java.net.URL JavaDoc;
41 import java.io.*;
42 import java.util.*;
43
44 /** N-Triple Reader
45  *
46  * @author Brian McBride, Jeremy Carroll, Dave Banks
47  * @version Release=$Name: $ Date=$Date: 2005/02/21 12:14:46 $
48  */

49 public class NTripleReader extends Object JavaDoc implements RDFReader {
50     static final Log log = LogFactory.getLog(NTripleReader.class);
51
52     private Model model = null;
53     private Hashtable anons = new Hashtable();
54
55     private IStream in = null;
56     private boolean inErr = false;
57     private int errCount = 0;
58     private static final int sbLength = 200;
59
60     private RDFErrorHandler errorHandler = new RDFDefaultErrorHandler();
61
62     /**
63      * Already with ": " at end for error messages.
64      */

65     private String JavaDoc base;
66
67     NTripleReader() {
68     }
69     public void read(Model model, InputStream in, String JavaDoc base)
70          {
71         // N-Triples must be in ASCII, we permit UTF-8.
72
read(model, FileUtils.asUTF8(in), base);
73     }
74     public void read(Model model, Reader reader, String JavaDoc base)
75          {
76
77         if (!(reader instanceof BufferedReader)) {
78             reader = new BufferedReader(reader);
79         }
80
81         this.model = model;
82         this.base = base == null ? "" : (base + ": ");
83         in = new IStream(reader);
84         readRDF();
85         if (errCount != 0) {
86             throw new SyntaxError( "unknown" );
87         }
88     }
89
90     public void read(Model model, String JavaDoc url) {
91         try {
92             read(
93                 model,
94                 new InputStreamReader(((new URL JavaDoc(url))).openStream()),
95                 url);
96         } catch (Exception JavaDoc e) {
97             throw new JenaException(e);
98         } finally {
99             if (errCount != 0) {
100                 throw new SyntaxError( "unknown" );
101             }
102         }
103     }
104
105     public Object JavaDoc setProperty(String JavaDoc propName, Object JavaDoc propValue)
106          {
107         errorHandler.error(new UnknownPropertyException( propName ));
108         return null;
109     }
110
111     public RDFErrorHandler setErrorHandler(RDFErrorHandler errHandler) {
112         RDFErrorHandler old = this.errorHandler;
113         this.errorHandler = errHandler;
114         return old;
115     }
116
117     protected void readRDF() {
118         try {
119             model.notifyEvent( GraphEvents.startRead );
120             unwrappedReadRDF();
121         } finally {
122             model.notifyEvent( GraphEvents.finishRead );
123         }
124     }
125     
126     protected final void unwrappedReadRDF() {
127         Resource subject;
128         Property predicate = null;
129         RDFNode object;
130
131         while (!in.eof()) {
132             while (!in.eof()) {
133                 inErr = false;
134
135                 skipWhiteSpace();
136                 if (in.eof()) {
137                     return;
138                 }
139
140                 subject = readResource();
141                 if (inErr)
142                     break;
143
144                 skipWhiteSpace();
145                 try {
146                     predicate = model.createProperty(readResource().getURI());
147                 } catch (Exception JavaDoc e1) {
148                     errorHandler.fatalError(e1);
149                 }
150                 if (inErr)
151                     break;
152
153                 skipWhiteSpace();
154                 object = readNode();
155                 if (inErr)
156                     break;
157
158                 skipWhiteSpace();
159                 if (badEOF())
160                     break;
161
162                 if (!expect("."))
163                     break;
164
165                 try {
166                     model.add(subject, predicate, object);
167                 } catch (Exception JavaDoc e2) {
168                     errorHandler.fatalError(e2);
169                 }
170             }
171             if (inErr) {
172                 errCount++;
173                 while (!in.eof() && in.readChar() != '\n') {
174                 }
175             }
176         }
177     }
178
179     public Resource readResource() {
180         char inChar = in.readChar();
181         if (badEOF())
182             return null;
183
184         if (inChar == '_') { // anon resource
185
if (!expect(":"))
186                 return null;
187             String JavaDoc name = readName();
188             if (name == null) {
189                 syntaxError("expected bNode label");
190                 return null;
191             }
192             return lookupResource(name);
193         } else if (inChar == '<') { // uri
194
String JavaDoc uri = readURI();
195             if (uri == null) {
196                 inErr = true;
197                 return null;
198             }
199             inChar = in.readChar();
200             if (inChar != '>') {
201                 syntaxError("expected '>'");
202                 return null;
203             }
204             return model.createResource(uri);
205         } else {
206             syntaxError("unexpected input");
207             return null;
208         }
209     }
210
211     public RDFNode readNode() {
212         skipWhiteSpace();
213         switch (in.nextChar()) {
214             case '"' :
215                 return readLiteral(false);
216             case 'x' :
217                 return readLiteral(true);
218             case '<' :
219             case '_' :
220                 return readResource();
221             default :
222                 syntaxError("unexpected input");
223                 return null;
224         }
225     }
226
227     protected Literal readLiteral(boolean wellFormed) {
228
229         StringBuffer JavaDoc lit = new StringBuffer JavaDoc(sbLength);
230
231         if (wellFormed) {
232             deprecated("Use ^^rdf:XMLLiteral not xml\"literals\", .");
233
234             if (!expect("xml"))
235                 return null;
236         }
237
238         if (!expect("\""))
239             return null;
240
241         while (true) {
242             char inChar = in.readChar();
243             if (badEOF())
244                 return null;
245             if (inChar == '\\') {
246                 char c = in.readChar();
247                 if (in.eof()) {
248                     inErr = true;
249                     return null;
250                 }
251                 if (c == 'n') {
252                     inChar = '\n';
253                 } else if (c == 'r') {
254                     inChar = '\r';
255                 } else if (c == 't') {
256                     inChar = '\t';
257                 } else if (c == '\\' || c == '"') {
258                     inChar = c;
259                 } else if (c == 'u') {
260                     inChar = readUnicode4Escape();
261                     if (inErr)
262                         return null;
263                 } else {
264                     syntaxError("illegal escape sequence '" + c + "'");
265                     return null;
266                 }
267             } else if (inChar == '"') {
268                 String JavaDoc lang;
269                 if ('@' == in.nextChar()) {
270                     expect("@");
271                    lang = readLang();
272                 } else if ('-' == in.nextChar()) {
273                     expect("-");
274                     deprecated("Language tags should be introduced with @ not -.");
275                     lang = readLang();
276                 } else {
277                     lang = "";
278                 }
279                 if (wellFormed) {
280                     return model.createLiteral(
281                         lit.toString(),
282 // "",
283
wellFormed);
284                 } else if ('^' == in.nextChar()) {
285                     String JavaDoc datatypeURI = null;
286                     if (!expect("^^<")) {
287                         syntaxError("ill-formed datatype");
288                         return null;
289                     }
290                     datatypeURI = readURI();
291                     if (datatypeURI == null || !expect(">"))
292                         return null;
293                     if ( lang.length() > 0 )
294                        deprecated("Language tags are not permitted on typed literals.");
295                     
296                     return model.createTypedLiteral(
297                         lit.toString(),
298                         datatypeURI);
299                 } else {
300                     return model.createLiteral(lit.toString(), lang);
301                 }
302             }
303             lit = lit.append(inChar);
304         }
305     }
306
307     private char readUnicode4Escape() {
308         char buf[] =
309             new char[] {
310                 in.readChar(),
311                 in.readChar(),
312                 in.readChar(),
313                 in.readChar()};
314         if (badEOF()) {
315             return 0;
316         }
317         try {
318             return (char) Integer.parseInt(new String JavaDoc(buf), 16);
319         } catch (NumberFormatException JavaDoc e) {
320             syntaxError("bad unicode escape sequence");
321             return 0;
322         }
323     }
324     private void deprecated(String JavaDoc s) {
325         errorHandler.warning(
326             new SyntaxError(
327                 syntaxErrorMessage(
328                     "Deprecation warning",
329                     s,
330                     in.getLinepos(),
331                     in.getCharpos())));
332     }
333
334     private void syntaxError(String JavaDoc s) {
335         errorHandler.error(
336             new SyntaxError(
337                 syntaxErrorMessage(
338                     "Syntax error",
339                     s,
340                     in.getLinepos(),
341                     in.getCharpos())));
342         inErr = true;
343     }
344     private String JavaDoc readLang() {
345         StringBuffer JavaDoc lang = new StringBuffer JavaDoc(15);
346
347
348         while (true) {
349             char inChar = in.nextChar();
350             if (Character.isWhitespace(inChar) || inChar == '.' || inChar == '^')
351                 return lang.toString();
352             lang = lang.append(in.readChar());
353         }
354     }
355     private boolean badEOF() {
356         if (in.eof()) {
357             syntaxError("premature end of file");
358         }
359         return inErr;
360     }
361     protected String JavaDoc readURI() {
362         StringBuffer JavaDoc uri = new StringBuffer JavaDoc(sbLength);
363
364         while (in.nextChar() != '>') {
365             char inChar = in.readChar();
366
367             if (inChar == '\\') {
368                 expect("u");
369                 inChar = readUnicode4Escape();
370             }
371             if (badEOF()) {
372                 return null;
373             }
374             uri = uri.append(inChar);
375         }
376         return uri.toString();
377     }
378
379     protected String JavaDoc readName() {
380         StringBuffer JavaDoc name = new StringBuffer JavaDoc(sbLength);
381
382         while (!Character.isWhitespace(in.nextChar())) {
383             name = name.append(in.readChar());
384             if (badEOF())
385                 return null;
386         }
387         return name.toString();
388     }
389     private boolean expect(String JavaDoc str) {
390         for (int i = 0; i < str.length(); i++) {
391             char want = str.charAt(i);
392
393             if (badEOF())
394                 return false;
395
396             char inChar = in.readChar();
397
398             if (inChar != want) {
399                 //System.err.println("N-triple reader error");
400
syntaxError("expected \"" + str + "\"");
401                 return false;
402             }
403         }
404         return true;
405     }
406     protected void skipWhiteSpace() {
407         while (Character.isWhitespace(in.nextChar()) || in.nextChar() == '#') {
408             char inChar = in.readChar();
409             if (in.eof()) {
410                 return;
411             }
412             if (inChar == '#') {
413                 while (inChar != '\n') {
414                     inChar = in.readChar();
415                     if (in.eof()) {
416                         return;
417                     }
418                 }
419             }
420         }
421     }
422
423     protected Resource lookupResource(String JavaDoc name) {
424         Resource r;
425         r = (Resource) anons.get(name);
426         if (r == null) {
427             r = model.createResource();
428             anons.put(name, r);
429         }
430         return r;
431     }
432
433     protected String JavaDoc syntaxErrorMessage(
434         String JavaDoc sort,
435         String JavaDoc msg,
436         int linepos,
437         int charpos) {
438         return base
439             + sort
440             + " at line "
441             + linepos
442             + " position "
443             + charpos
444             + ": "
445             + msg;
446     }
447     
448 }
449
450 class IStream {
451
452     // simple input stream handler
453

454     Reader in;
455     char[] thisChar = new char[1];
456     boolean eof;
457     int charpos = 1;
458     int linepos = 1;
459
460     protected IStream(Reader in) {
461         try {
462             this.in = in;
463             eof = (in.read(thisChar, 0, 1) == -1);
464         } catch (IOException e) {
465             throw new JenaException(e);
466         }
467     }
468
469     protected char readChar() {
470         try {
471             if (eof)
472                 return '\000';
473             char rv = thisChar[0];
474             eof = (in.read(thisChar, 0, 1) == -1);
475             if (rv == '\n') {
476                 linepos++;
477                 charpos = 0;
478             } else {
479                 charpos++;
480             }
481             return rv;
482         } catch (java.io.IOException JavaDoc e) {
483             throw new JenaException(e);
484         }
485     }
486
487     protected char nextChar() {
488         return eof ? '\000' : thisChar[0];
489     }
490
491     protected boolean eof() {
492         return eof;
493     }
494
495     protected int getLinepos() {
496         return linepos;
497     }
498
499     protected int getCharpos() {
500         return charpos;
501     }
502     
503 }
Popular Tags