1 29 30 package com.hp.hpl.jena.rdf.model.impl; 31 32 import org.apache.commons.logging.Log; 33 import org.apache.commons.logging.LogFactory; 34 35 import com.hp.hpl.jena.graph.GraphEvents; 36 import com.hp.hpl.jena.rdf.model.*; 37 import com.hp.hpl.jena.util.FileUtils; 38 import com.hp.hpl.jena.shared.*; 39 40 import java.net.URL ; 41 import java.io.*; 42 import java.util.*; 43 44 49 public class NTripleReader extends Object implements RDFReader { 50 static final Log log = LogFactory.getLog(NTripleReader.class); 51 52 private Model model = null; 53 private Hashtable anons = new Hashtable(); 54 55 private IStream in = null; 56 private boolean inErr = false; 57 private int errCount = 0; 58 private static final int sbLength = 200; 59 60 private RDFErrorHandler errorHandler = new RDFDefaultErrorHandler(); 61 62 65 private String base; 66 67 NTripleReader() { 68 } 69 public void read(Model model, InputStream in, String base) 70 { 71 read(model, FileUtils.asUTF8(in), base); 73 } 74 public void read(Model model, Reader reader, String base) 75 { 76 77 if (!(reader instanceof BufferedReader)) { 78 reader = new BufferedReader(reader); 79 } 80 81 this.model = model; 82 this.base = base == null ? "" : (base + ": "); 83 in = new IStream(reader); 84 readRDF(); 85 if (errCount != 0) { 86 throw new SyntaxError( "unknown" ); 87 } 88 } 89 90 public void read(Model model, String url) { 91 try { 92 read( 93 model, 94 new InputStreamReader(((new URL (url))).openStream()), 95 url); 96 } catch (Exception e) { 97 throw new JenaException(e); 98 } finally { 99 if (errCount != 0) { 100 throw new SyntaxError( "unknown" ); 101 } 102 } 103 } 104 105 public Object setProperty(String propName, Object propValue) 106 { 107 errorHandler.error(new UnknownPropertyException( propName )); 108 return null; 109 } 110 111 public RDFErrorHandler setErrorHandler(RDFErrorHandler errHandler) { 112 RDFErrorHandler old = this.errorHandler; 113 this.errorHandler = errHandler; 114 return old; 115 } 116 117 protected void readRDF() { 118 try { 119 model.notifyEvent( GraphEvents.startRead ); 120 unwrappedReadRDF(); 121 } finally { 122 model.notifyEvent( GraphEvents.finishRead ); 123 } 124 } 125 126 protected final void unwrappedReadRDF() { 127 Resource subject; 128 Property predicate = null; 129 RDFNode object; 130 131 while (!in.eof()) { 132 while (!in.eof()) { 133 inErr = false; 134 135 skipWhiteSpace(); 136 if (in.eof()) { 137 return; 138 } 139 140 subject = readResource(); 141 if (inErr) 142 break; 143 144 skipWhiteSpace(); 145 try { 146 predicate = model.createProperty(readResource().getURI()); 147 } catch (Exception e1) { 148 errorHandler.fatalError(e1); 149 } 150 if (inErr) 151 break; 152 153 skipWhiteSpace(); 154 object = readNode(); 155 if (inErr) 156 break; 157 158 skipWhiteSpace(); 159 if (badEOF()) 160 break; 161 162 if (!expect(".")) 163 break; 164 165 try { 166 model.add(subject, predicate, object); 167 } catch (Exception e2) { 168 errorHandler.fatalError(e2); 169 } 170 } 171 if (inErr) { 172 errCount++; 173 while (!in.eof() && in.readChar() != '\n') { 174 } 175 } 176 } 177 } 178 179 public Resource readResource() { 180 char inChar = in.readChar(); 181 if (badEOF()) 182 return null; 183 184 if (inChar == '_') { if (!expect(":")) 186 return null; 187 String name = readName(); 188 if (name == null) { 189 syntaxError("expected bNode label"); 190 return null; 191 } 192 return lookupResource(name); 193 } else if (inChar == '<') { String uri = readURI(); 195 if (uri == null) { 196 inErr = true; 197 return null; 198 } 199 inChar = in.readChar(); 200 if (inChar != '>') { 201 syntaxError("expected '>'"); 202 return null; 203 } 204 return model.createResource(uri); 205 } else { 206 syntaxError("unexpected input"); 207 return null; 208 } 209 } 210 211 public RDFNode readNode() { 212 skipWhiteSpace(); 213 switch (in.nextChar()) { 214 case '"' : 215 return readLiteral(false); 216 case 'x' : 217 return readLiteral(true); 218 case '<' : 219 case '_' : 220 return readResource(); 221 default : 222 syntaxError("unexpected input"); 223 return null; 224 } 225 } 226 227 protected Literal readLiteral(boolean wellFormed) { 228 229 StringBuffer lit = new StringBuffer (sbLength); 230 231 if (wellFormed) { 232 deprecated("Use ^^rdf:XMLLiteral not xml\"literals\", ."); 233 234 if (!expect("xml")) 235 return null; 236 } 237 238 if (!expect("\"")) 239 return null; 240 241 while (true) { 242 char inChar = in.readChar(); 243 if (badEOF()) 244 return null; 245 if (inChar == '\\') { 246 char c = in.readChar(); 247 if (in.eof()) { 248 inErr = true; 249 return null; 250 } 251 if (c == 'n') { 252 inChar = '\n'; 253 } else if (c == 'r') { 254 inChar = '\r'; 255 } else if (c == 't') { 256 inChar = '\t'; 257 } else if (c == '\\' || c == '"') { 258 inChar = c; 259 } else if (c == 'u') { 260 inChar = readUnicode4Escape(); 261 if (inErr) 262 return null; 263 } else { 264 syntaxError("illegal escape sequence '" + c + "'"); 265 return null; 266 } 267 } else if (inChar == '"') { 268 String lang; 269 if ('@' == in.nextChar()) { 270 expect("@"); 271 lang = readLang(); 272 } else if ('-' == in.nextChar()) { 273 expect("-"); 274 deprecated("Language tags should be introduced with @ not -."); 275 lang = readLang(); 276 } else { 277 lang = ""; 278 } 279 if (wellFormed) { 280 return model.createLiteral( 281 lit.toString(), 282 wellFormed); 284 } else if ('^' == in.nextChar()) { 285 String datatypeURI = null; 286 if (!expect("^^<")) { 287 syntaxError("ill-formed datatype"); 288 return null; 289 } 290 datatypeURI = readURI(); 291 if (datatypeURI == null || !expect(">")) 292 return null; 293 if ( lang.length() > 0 ) 294 deprecated("Language tags are not permitted on typed literals."); 295 296 return model.createTypedLiteral( 297 lit.toString(), 298 datatypeURI); 299 } else { 300 return model.createLiteral(lit.toString(), lang); 301 } 302 } 303 lit = lit.append(inChar); 304 } 305 } 306 307 private char readUnicode4Escape() { 308 char buf[] = 309 new char[] { 310 in.readChar(), 311 in.readChar(), 312 in.readChar(), 313 in.readChar()}; 314 if (badEOF()) { 315 return 0; 316 } 317 try { 318 return (char) Integer.parseInt(new String (buf), 16); 319 } catch (NumberFormatException e) { 320 syntaxError("bad unicode escape sequence"); 321 return 0; 322 } 323 } 324 private void deprecated(String s) { 325 errorHandler.warning( 326 new SyntaxError( 327 syntaxErrorMessage( 328 "Deprecation warning", 329 s, 330 in.getLinepos(), 331 in.getCharpos()))); 332 } 333 334 private void syntaxError(String s) { 335 errorHandler.error( 336 new SyntaxError( 337 syntaxErrorMessage( 338 "Syntax error", 339 s, 340 in.getLinepos(), 341 in.getCharpos()))); 342 inErr = true; 343 } 344 private String readLang() { 345 StringBuffer lang = new StringBuffer (15); 346 347 348 while (true) { 349 char inChar = in.nextChar(); 350 if (Character.isWhitespace(inChar) || inChar == '.' || inChar == '^') 351 return lang.toString(); 352 lang = lang.append(in.readChar()); 353 } 354 } 355 private boolean badEOF() { 356 if (in.eof()) { 357 syntaxError("premature end of file"); 358 } 359 return inErr; 360 } 361 protected String readURI() { 362 StringBuffer uri = new StringBuffer (sbLength); 363 364 while (in.nextChar() != '>') { 365 char inChar = in.readChar(); 366 367 if (inChar == '\\') { 368 expect("u"); 369 inChar = readUnicode4Escape(); 370 } 371 if (badEOF()) { 372 return null; 373 } 374 uri = uri.append(inChar); 375 } 376 return uri.toString(); 377 } 378 379 protected String readName() { 380 StringBuffer name = new StringBuffer (sbLength); 381 382 while (!Character.isWhitespace(in.nextChar())) { 383 name = name.append(in.readChar()); 384 if (badEOF()) 385 return null; 386 } 387 return name.toString(); 388 } 389 private boolean expect(String str) { 390 for (int i = 0; i < str.length(); i++) { 391 char want = str.charAt(i); 392 393 if (badEOF()) 394 return false; 395 396 char inChar = in.readChar(); 397 398 if (inChar != want) { 399 syntaxError("expected \"" + str + "\""); 401 return false; 402 } 403 } 404 return true; 405 } 406 protected void skipWhiteSpace() { 407 while (Character.isWhitespace(in.nextChar()) || in.nextChar() == '#') { 408 char inChar = in.readChar(); 409 if (in.eof()) { 410 return; 411 } 412 if (inChar == '#') { 413 while (inChar != '\n') { 414 inChar = in.readChar(); 415 if (in.eof()) { 416 return; 417 } 418 } 419 } 420 } 421 } 422 423 protected Resource lookupResource(String name) { 424 Resource r; 425 r = (Resource) anons.get(name); 426 if (r == null) { 427 r = model.createResource(); 428 anons.put(name, r); 429 } 430 return r; 431 } 432 433 protected String syntaxErrorMessage( 434 String sort, 435 String msg, 436 int linepos, 437 int charpos) { 438 return base 439 + sort 440 + " at line " 441 + linepos 442 + " position " 443 + charpos 444 + ": " 445 + msg; 446 } 447 448 } 449 450 class IStream { 451 452 454 Reader in; 455 char[] thisChar = new char[1]; 456 boolean eof; 457 int charpos = 1; 458 int linepos = 1; 459 460 protected IStream(Reader in) { 461 try { 462 this.in = in; 463 eof = (in.read(thisChar, 0, 1) == -1); 464 } catch (IOException e) { 465 throw new JenaException(e); 466 } 467 } 468 469 protected char readChar() { 470 try { 471 if (eof) 472 return '\000'; 473 char rv = thisChar[0]; 474 eof = (in.read(thisChar, 0, 1) == -1); 475 if (rv == '\n') { 476 linepos++; 477 charpos = 0; 478 } else { 479 charpos++; 480 } 481 return rv; 482 } catch (java.io.IOException e) { 483 throw new JenaException(e); 484 } 485 } 486 487 protected char nextChar() { 488 return eof ? '\000' : thisChar[0]; 489 } 490 491 protected boolean eof() { 492 return eof; 493 } 494 495 protected int getLinepos() { 496 return linepos; 497 } 498 499 protected int getCharpos() { 500 return charpos; 501 } 502 503 } | Popular Tags |