1 7 8 package org.cyberneko.html.filters; 9 10 import org.cyberneko.html.HTMLAugmentations; 11 import org.cyberneko.html.HTMLEventInfo; 12 13 import java.lang.reflect.Method ; 14 import java.lang.reflect.InvocationTargetException ; 15 16 import org.apache.xerces.util.XMLChar; 17 import org.apache.xerces.util.XMLStringBuffer; 18 import org.apache.xerces.xni.Augmentations; 19 import org.apache.xerces.xni.NamespaceContext; 20 import org.apache.xerces.xni.QName; 21 import org.apache.xerces.xni.XMLAttributes; 22 import org.apache.xerces.xni.XMLLocator; 23 import org.apache.xerces.xni.XMLString; 24 import org.apache.xerces.xni.XNIException; 25 import org.apache.xerces.xni.parser.XMLComponentManager; 26 import org.apache.xerces.xni.parser.XMLConfigurationException; 27 28 67 public class Purifier 68 extends DefaultFilter { 69 70 74 75 public static final String SYNTHESIZED_NAMESPACE_PREFX = 76 "http://cyberneko.org/html/ns/synthesized/"; 77 78 79 protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces"; 80 81 82 protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations"; 83 84 85 private static final String [] RECOGNIZED_FEATURES = { 86 NAMESPACES, 87 AUGMENTATIONS, 88 }; 89 90 91 private static final Boolean [] RECOGNIZED_FEATURES_DEFAULTS = { 92 null, 93 null, 94 }; 95 96 98 99 protected static final HTMLEventInfo SYNTHESIZED_ITEM = 100 new HTMLEventInfo.SynthesizedItem(); 101 102 106 108 109 protected boolean fNamespaces; 110 111 112 protected boolean fAugmentations; 113 114 116 117 protected boolean fSeenDoctype; 118 119 120 protected boolean fSeenRootElement; 121 122 123 protected boolean fInCDATASection; 124 125 127 128 protected String fPublicId; 129 130 131 protected String fSystemId; 132 133 135 136 protected NamespaceContext fNamespaceContext; 137 138 139 protected int fSynthesizedNamespaceCount; 140 141 143 144 private QName fQName = new QName(); 145 146 147 private final HTMLAugmentations fInfosetAugs = new HTMLAugmentations(); 148 149 150 private final XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 151 152 156 public void reset(XMLComponentManager manager) 157 throws XMLConfigurationException { 158 159 fInCDATASection = false; 161 162 fNamespaces = manager.getFeature(NAMESPACES); 164 fAugmentations = manager.getFeature(AUGMENTATIONS); 165 166 } 168 172 173 public void startDocument(XMLLocator locator, String encoding, 174 Augmentations augs) throws XNIException { 175 fNamespaceContext = fNamespaces 176 ? new NamespaceBinder.NamespaceSupport() : null; 177 fSynthesizedNamespaceCount = 0; 178 handleStartDocument(); 179 super.startDocument(locator, encoding, augs); 180 } 182 183 public void startDocument(XMLLocator locator, String encoding, 184 NamespaceContext nscontext, Augmentations augs) 185 throws XNIException { 186 fNamespaceContext = nscontext; 187 fSynthesizedNamespaceCount = 0; 188 handleStartDocument(); 189 super.startDocument(locator, encoding, nscontext, augs); 190 } 192 193 public void xmlDecl(String version, String encoding, String standalone, 194 Augmentations augs) throws XNIException { 195 if (version == null || !version.equals("1.0")) { 196 version = "1.0"; 197 } 198 if (encoding != null && encoding.length() == 0) { 199 encoding = null; 200 } 201 if (standalone != null) { 202 if (!standalone.equalsIgnoreCase("true") && 203 !standalone.equalsIgnoreCase("false")) { 204 standalone = null; 205 } 206 else { 207 standalone = standalone.toLowerCase(); 208 } 209 } 210 super.xmlDecl(version,encoding,standalone,augs); 211 } 213 214 public void comment(XMLString text, Augmentations augs) 215 throws XNIException { 216 StringBuffer str = new StringBuffer (purifyText(text).toString()); 217 int length = str.length(); 218 for (int i = length-1; i >= 0; i--) { 219 char c = str.charAt(i); 220 if (c == '-') { 221 str.insert(i + 1, ' '); 222 } 223 } 224 fStringBuffer.length = 0; 225 fStringBuffer.append(str.toString()); 226 text = fStringBuffer; 227 super.comment(text, augs); 228 } 230 231 public void processingInstruction(String target, XMLString data, 232 Augmentations augs) 233 throws XNIException { 234 target = purifyName(target, true); 235 data = purifyText(data); 236 super.processingInstruction(target, data, augs); 237 } 239 240 public void doctypeDecl(String root, String pubid, String sysid, 241 Augmentations augs) throws XNIException { 242 fSeenDoctype = true; 243 fPublicId = pubid; 246 fSystemId = sysid; 247 if (fPublicId != null && fSystemId == null) { 250 fSystemId = ""; 251 } 252 } 256 257 public void startElement(QName element, XMLAttributes attrs, 258 Augmentations augs) throws XNIException { 259 handleStartElement(element, attrs); 260 super.startElement(element, attrs, augs); 261 } 263 264 public void emptyElement(QName element, XMLAttributes attrs, 265 Augmentations augs) throws XNIException { 266 handleStartElement(element, attrs); 267 super.emptyElement(element, attrs, augs); 268 } 270 271 public void startCDATA(Augmentations augs) throws XNIException { 272 fInCDATASection = true; 273 super.startCDATA(augs); 274 } 276 277 public void endCDATA(Augmentations augs) throws XNIException { 278 fInCDATASection = false; 279 super.endCDATA(augs); 280 } 282 283 public void characters(XMLString text, Augmentations augs) 284 throws XNIException { 285 text = purifyText(text); 286 if (fInCDATASection) { 287 StringBuffer str = new StringBuffer (text.toString()); 288 int length = str.length(); 289 for (int i = length-1; i >= 0; i--) { 290 char c = str.charAt(i); 291 if (c == ']') { 292 str.insert(i + 1, ' '); 293 } 294 } 295 fStringBuffer.length = 0; 296 fStringBuffer.append(str.toString()); 297 text = fStringBuffer; 298 } 299 super.characters(text,augs); 300 } 302 303 public void endElement(QName element, Augmentations augs) 304 throws XNIException { 305 element = purifyQName(element); 306 if (fNamespaces) { 307 if (element.prefix != null && element.uri == null) { 308 element.uri = fNamespaceContext.getURI(element.prefix); 309 } 310 } 311 super.endElement(element, augs); 312 } 314 318 319 protected void handleStartDocument() { 320 fSeenDoctype = false; 321 fSeenRootElement = false; 322 } 324 325 protected void handleStartElement(QName element, XMLAttributes attrs) { 326 327 element = purifyQName(element); 329 int attrCount = attrs != null ? attrs.getLength() : 0; 330 for (int i = attrCount-1; i >= 0; i--) { 331 attrs.getName(i, fQName); 333 attrs.setName(i, purifyQName(fQName)); 334 335 if (fNamespaces) { 337 if (!fQName.rawname.equals("xmlns") && 338 !fQName.rawname.startsWith("xmlns:")) { 339 attrs.getName(i, fQName); 343 if (fQName.prefix != null && fQName.uri == null) { 344 synthesizeBinding(attrs, fQName.prefix); 345 } 346 } 347 } 348 } 349 350 if (fNamespaces) { 352 if (element.prefix != null && element.uri == null) { 353 synthesizeBinding(attrs, element.prefix); 354 } 355 } 356 357 if (!fSeenRootElement && fSeenDoctype) { 359 Augmentations augs = synthesizedAugs(); 360 super.doctypeDecl(element.rawname, fPublicId, fSystemId, augs); 361 } 362 363 fSeenRootElement = true; 365 366 } 368 369 protected void synthesizeBinding(XMLAttributes attrs, String ns) { 370 String prefix = "xmlns"; 371 String localpart = ns; 372 String qname = prefix+':'+localpart; 373 String uri = NamespaceBinder.NAMESPACES_URI; 374 String atype = "CDATA"; 375 String avalue = SYNTHESIZED_NAMESPACE_PREFX+fSynthesizedNamespaceCount++; 376 377 fQName.setValues(prefix, localpart, qname, uri); 379 attrs.addAttribute(fQName, atype, avalue); 380 381 fNamespaceContext.declarePrefix(ns, avalue); 383 384 } 386 387 protected final Augmentations synthesizedAugs() { 388 HTMLAugmentations augs = null; 389 if (fAugmentations) { 390 augs = fInfosetAugs; 391 augs.removeAllItems(); 392 augs.putItem(AUGMENTATIONS, SYNTHESIZED_ITEM); 393 } 394 return augs; 395 } 397 401 402 protected QName purifyQName(QName qname) { 403 qname.prefix = purifyName(qname.prefix, true); 404 qname.localpart = purifyName(qname.localpart, true); 405 qname.rawname = purifyName(qname.rawname, false); 406 return qname; 407 } 409 410 protected String purifyName(String name, boolean localpart) { 411 if (name == null) { 412 return name; 413 } 414 StringBuffer str = new StringBuffer (); 415 int length = name.length(); 416 boolean seenColon = localpart; 417 for (int i = 0; i < length; i++) { 418 char c = name.charAt(i); 419 if (i == 0) { 420 if (!XMLChar.isNameStart(c)) { 421 str.append("_u"+toHexString(c,4)+"_"); 422 } 423 else { 424 str.append(c); 425 } 426 } 427 else { 428 if ((fNamespaces && c == ':' && seenColon) || !XMLChar.isName(c)) { 429 str.append("_u"+toHexString(c,4)+"_"); 430 } 431 else { 432 str.append(c); 433 } 434 seenColon = seenColon || c == ':'; 435 } 436 } 437 return str.toString(); 438 } 440 441 protected XMLString purifyText(XMLString text) { 442 fStringBuffer.length = 0; 443 for (int i = 0; i < text.length; i++) { 444 char c = text.ch[text.offset+i]; 445 if (XMLChar.isInvalid(c)) { 446 fStringBuffer.append("\\u"+toHexString(c,4)); 447 } 448 else { 449 fStringBuffer.append(c); 450 } 451 } 452 return fStringBuffer; 453 } 455 459 460 protected static String toHexString(int c, int padlen) { 461 StringBuffer str = new StringBuffer (padlen); 462 str.append(Integer.toHexString(c)); 463 int len = padlen - str.length(); 464 for (int i = 0; i < len; i++) { 465 str.insert(0, '0'); 466 } 467 return str.toString().toUpperCase(); 468 } 470 } | Popular Tags |