1 31 36 37 package com.hp.hpl.jena.rdf.arp; 38 39 import java.util.*; 40 41 import com.hp.hpl.jena.rdf.arp.lang.LanguageTag; 42 import com.hp.hpl.jena.rdf.arp.lang.LanguageTagSyntaxException; 43 import com.hp.hpl.jena.rdf.arp.lang.LanguageTagCodes; 44 45 import org.apache.xerces.util.XMLChar; 46 47 52 class ParserSupport 53 implements ARPErrorNumbers, RDFParserConstants, LanguageTagCodes { 54 Map idsUsed = new HashMap(); 55 ParserSupport(XMLHandler arp) { 56 this.arp = arp; 57 } 58 XMLHandler arp; 59 void checkWhite(StrToken st, boolean maybeMissingParseType) 60 throws ParseException { 61 String s = st.value; 62 int lgth = s.length(); 63 int from = 0; 64 while (from < lgth) { 68 switch (s.charAt(from++)) { 69 case '\n' : 70 case '\r' : 71 case '\t' : 72 case ' ' : 73 continue; 74 default : 75 throw new ParseException( 76 ERR_NOT_WHITESPACE, 77 st.location, 78 "Expected whitespace found: '" 79 + s 80 + "'" 81 + (maybeMissingParseType 82 ? ". Maybe a missing rdf:parseType='Literal', or a striping problem." 83 : ".")); 84 } 85 } 86 } 87 90 void checkIdSymbol(XMLContext ctxt, StrToken s, String str) 91 throws ParseException { 92 if (!arp.ignoring(WARN_REDEFINITION_OF_ID)) { 93 Map idsUsedForBase = (Map) idsUsed.get(ctxt.getBase()); 94 if (idsUsedForBase == null) { 95 idsUsedForBase = new HashMap(); 96 idsUsed.put(ctxt.getBase(), idsUsedForBase); 97 } 98 Location prev = (Location) idsUsedForBase.get(s.value); 99 if (prev != null) { 100 arp.parseWarning( 101 WARN_REDEFINITION_OF_ID, 102 s.location, 103 "Redefinition of ID: " + s.value); 104 arp.parseWarning( 105 WARN_REDEFINITION_OF_ID, 106 prev, 107 "Previous definition of '" + s.value + "'."); 108 } else { 109 idsUsedForBase.put(s.value, s.location); 119 } 120 } 121 if (!ctxt.isSameAsDocument()) 122 arp.parseWarning( 123 IGN_XMLBASE_SIGNIFICANT, 124 s.location, 125 "The use of xml:base changes the meaning of ID '" 126 + s.value 127 + "'."); 128 129 checkXMLName(s, s.value); 130 checkEncoding(s); 131 } 132 private void checkXMLName(StrToken s, String str) throws ParseException { 133 if (!XMLChar.isValidNCName(str)) { 134 arp.parseWarning( 136 WARN_BAD_NAME, 137 s.location, 138 "Not an XML Name: '" + str + "'"); 139 } 140 141 } 144 String checkNodeID(Token s) throws ParseException { 145 String str = ((StrToken) s).value; 146 if (!XMLChar.isValidNCName(str)) { 147 arp.parseWarning( 149 WARN_BAD_NAME, 150 s.location, 151 "Not an XML Name: '" + str + "'"); 152 } 153 return str; 154 } 155 void checkString(Token t) throws ParseException { 156 if (!CharacterModel.isNormalFormC(((StrToken) t).value)) 157 arp.parseWarning( 158 WARN_STRING_NOT_NORMAL_FORM_C, 159 t.location, 160 "String not in Unicode Normal Form C: " + ((StrToken) t).value); 161 checkEncoding((StrToken) t); 162 checkComposingChar(t); 163 } 164 void checkComposingChar(Token t) throws ParseException { 165 if (CharacterModel.startsWithComposingCharacter(((StrToken) t).value)) 166 arp.parseWarning( 167 WARN_STRING_COMPOSING_CHAR, 168 t.location, 169 "String is not legal in XML 1.1; starts with composing char: " 170 + ((StrToken) t).value 171 + " (" + (int)((StrToken)t).value.charAt(0)+ ")"); 172 } 173 void checkNormalFormC(Token t, ARPString str) throws ParseException { 174 if (!CharacterModel.isNormalFormC(str.toString())) 175 arp.parseWarning( 176 WARN_STRING_NOT_NORMAL_FORM_C, 177 t.location, 178 "String not in Unicode Normal Form C: " + str.toString()); 179 } 180 181 void processingInstruction(Token t, boolean maybeMissingPT) 182 throws ParseException { 183 arp.parseWarning( 184 WARN_PROCESSING_INSTRUCTION_IN_RDF, 185 t.location, 186 "A processing instruction is in RDF content. No processing was done." 187 + (maybeMissingPT 188 ? " Maybe a missing rdf:parseType='Literal'" 189 : "")); 190 } 191 void saxException(Token t) throws ParseException { 192 ExceptionToken sax = (ExceptionToken) t; 193 arp.parseWarning(sax.errorCode, t.location, sax.toString()); 194 } 195 CollectionAction collectionAction(AResourceInternal rslt[]) { 196 return new RDFCollection(this, rslt); 197 } 198 CollectionAction damlCollectionAction(AResourceInternal rslt[]) { 199 return new DAMLCollection(this, rslt); 200 } 201 void checkXMLLang(StrToken s) throws ParseException { 202 String lang = s.value; 203 if (lang.equals("")) 204 return; 205 try { 206 LanguageTag tag = new LanguageTag(lang); 207 int tagType = tag.tagType(); 208 if (tagType == LT_ILLEGAL) { 209 arp.parseWarning( 210 WARN_BAD_XMLLANG, 211 s.location, 212 tag.errorMessage()); 213 } 214 if ((tagType & LT_UNDETERMINED) == LT_UNDETERMINED) { 215 arp.parseWarning( 216 WARN_BAD_XMLLANG, 217 s.location, 218 "Unnecessary use of language tag \"und\" prohibited by RFC3066"); 219 } 220 if ((tagType & LT_IANA_DEPRECATED) == LT_IANA_DEPRECATED) { 221 arp.parseWarning( 222 WARN_DEPRECATED_XMLLANG, 223 s.location, 224 "Use of deprecated language tag \"" + lang + "\"."); 225 } 226 if ((tagType & LT_PRIVATE_USE) == LT_PRIVATE_USE) { 227 arp.parseWarning( 228 IGN_PRIVATE_XMLLANG, 229 s.location, 230 "Use of (IANA) private language tag \"" + lang + "\"."); 231 } else if ((tagType & LT_LOCAL_USE) == LT_LOCAL_USE) { 232 arp.parseWarning( 233 IGN_PRIVATE_XMLLANG, 234 s.location, 235 "Use of (ISO639-2) local use language tag \"" 236 + lang 237 + "\"."); 238 } else if ((tagType & LT_EXTRA) == LT_EXTRA) { 239 arp.parseWarning( 240 IGN_PRIVATE_XMLLANG, 241 s.location, 242 "Use of additional private subtags on language \"" 243 + lang 244 + "\"."); 245 } 246 } catch (LanguageTagSyntaxException e) { 247 arp.parseWarning( 248 WARN_MALFORMED_XMLLANG, 249 s.location, 250 e.getMessage()); 251 } 252 } 253 254 private String truncateXMLBase(StrToken s) { 255 return truncateXMLBase(s.value); 256 } 257 static String truncateXMLBase(String rslt) { 258 int hash = rslt.indexOf('#'); 259 if (hash != -1) { 260 return rslt.substring(0, hash); 261 } 262 return rslt; 263 } 264 XMLContext changeXMLBase(XMLContext ctxt, Token t) throws ParseException { 265 arp.parseWarning( 266 IGN_XMLBASE_USED, 267 t.location, 268 "Use of attribute xml:base is not envisaged in RDF Model&Syntax."); 269 StrToken base = ((StrToken) t); 270 String bb = truncateXMLBase(base); 272 try { 273 ctxt = ctxt.withBase(bb); 274 } catch (MalformedURIException mal) { 275 arp.parseWarning( 276 WARN_MALFORMED_URI, 277 t.location, 278 "Bad URI <" + ((StrToken) t).value + ">: " + mal.getMessage()); 279 ctxt = ctxt.revertToDocument(); 280 } 281 return ctxt; 282 } 283 URIReference makeURIReference(XMLContext ctxt, Token t) 284 throws ParseException { 285 StrToken s = (StrToken) t; 286 String val = s.value; 288 289 checkEncoding(s); 290 try { 291 URIReference rslt = new URIReference(t.location, ctxt, val); 292 if (val.indexOf(':') == -1) { 293 if ((!arp.ignoring(IGN_XMLBASE_SIGNIFICANT)) 294 && !ctxt.isSameAsDocument()) { 295 boolean bad = false; 296 try { 297 URIReference other = 298 new URIReference( 299 t.location, 300 ctxt.getDocument(), 301 val); 302 bad = !other.equals(rslt); 303 } catch (Exception e) { 304 } 306 if (bad) { 307 arp.parseWarning( 308 IGN_XMLBASE_SIGNIFICANT, 309 t.location, 310 "Use of attribute xml:base changes interpretation of relative URI: \"" 311 + val 312 + "\"."); 313 } 314 } 315 } 316 return rslt; 317 } catch (MalformedURIException mal) { 318 arp.parseWarning( 319 WARN_MALFORMED_URI, 320 t.location, 321 "Bad URI <" + s.value + ">: " + mal.getMessage()); 322 return new BadURIReference(val); 323 } 324 } 325 326 void createTriple(ARPResource r, Token p, Object v, String reify) 327 throws ParseException { 328 switch (p.kind) { 329 case E_OTHER : 330 case E_RDF_N : 331 r.setPredicateObject( 332 ((ARPQname) p).asURIReference(arp), 333 v, 334 reify); 335 break; 336 case E_LI : 337 r.setLiObject(v, reify); 338 break; 339 default : 340 throw new RuntimeException ("Assertion failure in ParserSupport.createTriple"); 341 } 342 } 343 344 ARPDatatypeLiteral createDatatypeLiteral( 345 URIReference dtURI, 346 ARPString dtLex) { 347 return new ARPDatatypeLiteral(dtLex, dtURI); 348 } 349 void checkEncoding(String s, Location w) throws ParseException { 350 if (arp.encodingProblems) { 351 for (int i = s.length() - 1; i >= 0; i--) { 352 if (s.charAt(i) > 127) 353 arp.parseWarning( 354 ERR_ENCODING_MISMATCH, 355 w, 356 "Encoding error with non-ascii characters."); 357 } 358 359 } 360 } 361 void checkEncoding(StrToken t) throws ParseException { 362 if (arp.encodingProblems) { 363 checkEncoding(t.value, t.location); 364 } 365 } 366 388 private void useNameSpace(Map ns, ARPQname qn) { 389 useNameSpace(ns, qn.prefix(), qn.nameSpace); 390 } 391 private void useNameSpace(Map ns, String prefix, String uri) { 392 ns.put(prefix, uri); 393 } 394 void startLitElement(StringBuffer b, Token t, Map ns) { 395 ARPQname qn = (ARPQname) t; 396 b.append("<" + qn.qName); 397 useNameSpace(ns, qn); 398 return; 399 } 400 void checkNamespaceURI(Token t) throws ParseException { 401 checkEncoding((StrToken) t); 402 checkNamespaceURI(((StrToken) t).value, t); 403 } 404 private void checkNamespaceURI(String uri, Token t) throws ParseException { 405 if (uri.length() != 0) 406 try { 407 URI u = new URI(uri); 408 } catch (MalformedURIException m) { 409 arp.parseWarning( 410 WARN_BAD_NAMESPACE_URI, 411 t.location, 412 "Illegal URI in xmlns declaration: " + uri); 413 } 414 } 415 private void checkNamespace(Map allNs, String prefix, String uri, Token t) 416 throws ParseException { 417 checkNamespaceURI(uri, t); 418 String ns = (String ) allNs.get(prefix); 419 if (ns == null || !ns.equals(uri)) { 420 arp.parseWarning( 431 ERR_INTERNAL_ERROR, 432 t.location, 433 "Internal namespaces error, please report to jjc@hpl.hp.com."); 434 435 } 436 } 437 445 Map litAttributes( 446 StringBuffer buf, 447 SortedMap attrs, 448 SortedMap visiblyUsed, 449 Map ns, 450 Map allNs, 451 Token t) 452 throws ParseException { 453 boolean nsIsNew = false; 454 Iterator it = visiblyUsed.entrySet().iterator(); 455 while (it.hasNext()) { 456 Map.Entry entry = (Map.Entry) it.next(); 457 String prefix = (String ) entry.getKey(); 458 String uri = (String ) entry.getValue(); 459 checkNamespace(allNs, prefix, uri, t); 460 if (uri.equals(ns.get(prefix))) 461 continue; 462 if (!nsIsNew) { 463 ns = new HashMap(ns); 464 nsIsNew = true; 465 } 466 ns.put(prefix, uri); 467 String attr = prefix.equals("") ? "xmlns" : "xmlns:" + prefix; 468 buf.append(" " + attr + "=\"" + encodeAttributeText(uri) + "\""); 469 } 470 it = attrs.values().iterator(); 471 while (it.hasNext()) { 472 buf.append((String ) it.next()); 473 } 474 return ns; 475 } 476 Map litNamespace(Token prefix, Token uri, Map ns, Map used) { 477 String urins = ((StrToken) uri).value; 478 String prefixS = ((StrToken) prefix).value; 479 Map rslt = new HashMap(ns); 481 rslt.put(prefixS, urins); 482 return rslt; 483 } 484 String litAttrName(Token attr, Map visiblyUsed) { 485 ARPQname qn = (ARPQname) attr; 486 if (!qn.prefix().equals("")) { 487 useNameSpace(visiblyUsed, qn); 488 } 489 return qn.qName; 490 } 491 String litAttribute(Token attr, Token val) { 492 ARPQname qn = (ARPQname) attr; 493 return " " 494 + qn.qName 495 + "=\"" 496 + encodeAttributeText(((StrToken) val).value) 497 + "\""; 498 } 499 void litComment(StringBuffer b, Token comment) { 500 b.append("<!--" + ((StrToken) comment).value + "-->"); 501 } 502 void litProcessingInstruction(StringBuffer b, Token pi) { 503 b.append("<?" + ((StrToken) pi).value + "?>"); 504 } 505 void endLitElement(StringBuffer b, Token t) { 506 String q = ((ARPQname) t).qName; 507 b.append("</" + q + ">"); 508 } 509 520 void litText(StringBuffer b, Token t) { 521 b.append(encodeTextNode(((StrToken) t).value)); 522 } 523 static Map xmlNameSpace() { 524 Map rslt = new HashMap(); 525 rslt.put("xml", XMLHandler.xmlns); 526 rslt.put("", ""); 527 return rslt; 528 } 529 551 552 561 562 static private String encodeAttributeText(String s) { 563 StringBuffer rslt = null; 564 String replace; 565 char ch; 566 for (int i = 0; i < s.length(); i++) { 567 ch = s.charAt(i); 568 switch (ch) { 569 case '&' : 570 replace = "&"; 571 break; 572 case '<' : 573 replace = "<"; 574 break; 575 case '"' : 576 replace = """; 577 break; 578 case 9 : 579 replace = "	"; 580 break; 581 case 0xA : 582 replace = "
"; 583 break; 584 case 0xD : 585 replace = "
"; 586 break; 587 default : 588 replace = null; 589 } 590 if (replace != null) { 591 if (rslt == null) { 592 rslt = new StringBuffer (); 593 rslt.append(s.substring(0, i)); 594 } 595 rslt.append(replace); 596 } else if (rslt != null) { 597 rslt.append(ch); 598 } 599 } 600 return rslt == null ? s : rslt.toString(); 601 } 602 608 609 static private String encodeTextNode(String s) { 610 StringBuffer rslt = null; 611 String replace; 612 char ch; 613 for (int i = 0; i < s.length(); i++) { 614 ch = s.charAt(i); 615 switch (ch) { 616 case '&' : 617 replace = "&"; 618 break; 619 case '<' : 620 replace = "<"; 621 break; 622 case '>' : 623 replace = ">"; 624 break; 625 case 0xD : 626 replace = "
"; 627 break; 628 default : 629 replace = null; 630 } 631 if (replace != null) { 632 if (rslt == null) { 633 rslt = new StringBuffer (); 634 rslt.append(s.substring(0, i)); 635 } 636 rslt.append(replace); 637 } else if (rslt != null) { 638 rslt.append(ch); 639 } 640 } 641 return rslt == null ? s : rslt.toString(); 642 } 643 644 } 645 | Popular Tags |