1 7 8 package org.cyberneko.html; 9 10 import java.io.BufferedReader ; 11 import java.io.File ; 12 import java.io.FileInputStream ; 13 import java.io.InputStream ; 14 import java.io.InputStreamReader ; 15 import java.io.IOException ; 16 import java.lang.reflect.Method ; 17 import java.lang.reflect.InvocationTargetException ; 18 import java.text.MessageFormat ; 19 import java.util.Locale ; 20 import java.util.MissingResourceException ; 21 import java.util.Properties ; 22 import java.util.ResourceBundle ; 23 import java.util.Vector ; 24 25 import org.cyberneko.html.filters.NamespaceBinder; 26 27 import org.apache.xerces.util.DefaultErrorHandler; 28 import org.apache.xerces.util.ParserConfigurationSettings; 29 import org.apache.xerces.xni.XMLDocumentHandler; 30 import org.apache.xerces.xni.XMLDTDHandler; 31 import org.apache.xerces.xni.XMLDTDContentModelHandler; 32 import org.apache.xerces.xni.XNIException; 33 import org.apache.xerces.xni.parser.XMLConfigurationException; 34 import org.apache.xerces.xni.parser.XMLDocumentFilter; 35 import org.apache.xerces.xni.parser.XMLDocumentSource; 36 import org.apache.xerces.xni.parser.XMLEntityResolver; 37 import org.apache.xerces.xni.parser.XMLErrorHandler; 38 import org.apache.xerces.xni.parser.XMLInputSource; 39 import org.apache.xerces.xni.parser.XMLParseException; 40 import org.apache.xerces.xni.parser.XMLPullParserConfiguration; 41 42 78 public class HTMLConfiguration 79 extends ParserConfigurationSettings 80 implements XMLPullParserConfiguration { 81 82 86 88 89 protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces"; 90 91 92 protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations"; 93 94 95 protected static final String REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors"; 96 97 98 protected static final String SIMPLE_ERROR_FORMAT = "http://cyberneko.org/html/features/report-errors/simple"; 99 100 101 protected static final String BALANCE_TAGS = "http://cyberneko.org/html/features/balance-tags"; 102 103 105 106 protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems"; 107 108 109 protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs"; 110 111 112 protected static final String FILTERS = "http://cyberneko.org/html/properties/filters"; 113 114 115 protected static final String ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter"; 116 117 119 120 protected static final String ERROR_DOMAIN = "http://cyberneko.org/html"; 121 122 124 125 private static final Class [] DOCSOURCE = { XMLDocumentSource.class }; 126 127 131 133 134 protected XMLDocumentHandler fDocumentHandler; 135 136 137 protected XMLDTDHandler fDTDHandler; 138 139 140 protected XMLDTDContentModelHandler fDTDContentModelHandler; 141 142 143 protected XMLErrorHandler fErrorHandler = new DefaultErrorHandler(); 144 145 147 148 protected XMLEntityResolver fEntityResolver; 149 150 151 protected Locale fLocale = Locale.getDefault(); 152 153 155 159 protected boolean fCloseStream; 160 161 163 164 protected Vector fHTMLComponents = new Vector (2); 165 166 168 169 protected HTMLScanner fDocumentScanner = new HTMLScanner(); 170 171 172 protected HTMLTagBalancer fTagBalancer = new HTMLTagBalancer(); 173 174 175 protected NamespaceBinder fNamespaceBinder = new NamespaceBinder(); 176 177 179 180 protected HTMLErrorReporter fErrorReporter = new ErrorReporter(); 181 182 184 185 protected static boolean XERCES_2_0_0 = false; 186 187 188 protected static boolean XERCES_2_0_1 = false; 189 190 191 protected static boolean XML4J_4_0_x = false; 192 193 197 static { 198 try { 199 String VERSION = "org.apache.xerces.impl.Version"; 200 Object version = ObjectFactory.createObject(VERSION, VERSION); 201 java.lang.reflect.Field field = version.getClass().getField("fVersion"); 202 String versionStr = String.valueOf(field.get(version)); 203 XERCES_2_0_0 = versionStr.equals("Xerces-J 2.0.0"); 204 XERCES_2_0_1 = versionStr.equals("Xerces-J 2.0.1"); 205 XML4J_4_0_x = versionStr.startsWith("XML4J 4.0."); 206 } 207 catch (Throwable e) { 208 } 210 } 212 216 217 public HTMLConfiguration() { 218 219 addComponent(fDocumentScanner); 221 addComponent(fTagBalancer); 222 addComponent(fNamespaceBinder); 223 224 228 String VALIDATION = "http://xml.org/sax/features/validation"; 230 String [] recognizedFeatures = { 231 AUGMENTATIONS, 232 NAMESPACES, 233 VALIDATION, 234 REPORT_ERRORS, 235 SIMPLE_ERROR_FORMAT, 236 BALANCE_TAGS, 237 }; 238 addRecognizedFeatures(recognizedFeatures); 239 setFeature(AUGMENTATIONS, false); 240 setFeature(NAMESPACES, true); 241 setFeature(VALIDATION, false); 242 setFeature(REPORT_ERRORS, false); 243 setFeature(SIMPLE_ERROR_FORMAT, false); 244 setFeature(BALANCE_TAGS, true); 245 246 if (XERCES_2_0_0) { 248 recognizedFeatures = new String [] { 252 "http://apache.org/xml/features/scanner/notify-builtin-refs", 253 }; 254 addRecognizedFeatures(recognizedFeatures); 255 } 256 257 if (XERCES_2_0_0 || XERCES_2_0_1 || XML4J_4_0_x) { 259 recognizedFeatures = new String [] { 263 "http://apache.org/xml/features/validation/schema/normalized-value", 264 "http://apache.org/xml/features/scanner/notify-char-refs", 265 }; 266 addRecognizedFeatures(recognizedFeatures); 267 } 268 269 273 String [] recognizedProperties = { 275 NAMES_ELEMS, 276 NAMES_ATTRS, 277 FILTERS, 278 ERROR_REPORTER, 279 }; 280 addRecognizedProperties(recognizedProperties); 281 setProperty(NAMES_ELEMS, "upper"); 282 setProperty(NAMES_ATTRS, "lower"); 283 setProperty(ERROR_REPORTER, fErrorReporter); 284 285 if (XERCES_2_0_0) { 287 String SYMBOL_TABLE = "http://apache.org/xml/properties/internal/symbol-table"; 293 recognizedProperties = new String [] { 294 SYMBOL_TABLE, 295 }; 296 addRecognizedProperties(recognizedProperties); 297 Object symbolTable = ObjectFactory.createObject("org.apache.xerces.util.SymbolTable", 298 "org.apache.xerces.util.SymbolTable"); 299 setProperty(SYMBOL_TABLE, symbolTable); 300 } 301 302 } 304 308 323 public void pushInputSource(XMLInputSource inputSource) { 324 fDocumentScanner.pushInputSource(inputSource); 325 } 327 330 331 public void setFeature(String featureId, boolean state) 332 throws XMLConfigurationException { 333 super.setFeature(featureId, state); 334 int size = fHTMLComponents.size(); 335 for (int i = 0; i < size; i++) { 336 HTMLComponent component = (HTMLComponent)fHTMLComponents.elementAt(i); 337 component.setFeature(featureId, state); 338 } 339 } 341 342 public void setProperty(String propertyId, Object value) 343 throws XMLConfigurationException { 344 super.setProperty(propertyId, value); 345 346 if (propertyId.equals(FILTERS)) { 347 XMLDocumentFilter[] filters = (XMLDocumentFilter[])getProperty(FILTERS); 348 if (filters != null) { 349 for (int i = 0; i < filters.length; i++) { 350 XMLDocumentFilter filter = filters[i]; 351 if (filter instanceof HTMLComponent) { 352 addComponent((HTMLComponent)filter); 353 } 354 } 355 } 356 } 357 358 int size = fHTMLComponents.size(); 359 for (int i = 0; i < size; i++) { 360 HTMLComponent component = (HTMLComponent)fHTMLComponents.elementAt(i); 361 component.setProperty(propertyId, value); 362 } 363 } 365 366 public void setDocumentHandler(XMLDocumentHandler handler) { 367 fDocumentHandler = handler; 368 } 370 371 public XMLDocumentHandler getDocumentHandler() { 372 return fDocumentHandler; 373 } 375 376 public void setDTDHandler(XMLDTDHandler handler) { 377 fDTDHandler = handler; 378 } 380 381 public XMLDTDHandler getDTDHandler() { 382 return fDTDHandler; 383 } 385 386 public void setDTDContentModelHandler(XMLDTDContentModelHandler handler) { 387 fDTDContentModelHandler = handler; 388 } 390 391 public XMLDTDContentModelHandler getDTDContentModelHandler() { 392 return fDTDContentModelHandler; 393 } 395 396 public void setErrorHandler(XMLErrorHandler handler) { 397 fErrorHandler = handler; 398 } 400 401 public XMLErrorHandler getErrorHandler() { 402 return fErrorHandler; 403 } 405 406 public void setEntityResolver(XMLEntityResolver resolver) { 407 fEntityResolver = resolver; 408 } 410 411 public XMLEntityResolver getEntityResolver() { 412 return fEntityResolver; 413 } 415 416 public void setLocale(Locale locale) { 417 if (locale == null) { 418 locale = Locale.getDefault(); 419 } 420 fLocale = locale; 421 } 423 424 public Locale getLocale() { 425 return fLocale; 426 } 428 429 public void parse(XMLInputSource source) throws XNIException, IOException { 430 setInputSource(source); 431 parse(true); 432 } 434 438 440 452 public void setInputSource(XMLInputSource inputSource) 453 throws XMLConfigurationException, IOException { 454 reset(); 455 fCloseStream = inputSource.getByteStream() == null && 456 inputSource.getCharacterStream() == null; 457 fDocumentScanner.setInputSource(inputSource); 458 } 460 476 public boolean parse(boolean complete) throws XNIException, IOException { 477 try { 478 boolean more = fDocumentScanner.scanDocument(complete); 479 if (!more) { 480 cleanup(); 481 } 482 return more; 483 } 484 catch (XNIException e) { 485 cleanup(); 486 throw e; 487 } 488 catch (IOException e) { 489 cleanup(); 490 throw e; 491 } 492 } 494 499 public void cleanup() { 500 fDocumentScanner.cleanup(fCloseStream); 501 } 503 507 508 protected void addComponent(HTMLComponent component) { 509 510 fHTMLComponents.addElement(component); 512 513 String [] features = component.getRecognizedFeatures(); 515 addRecognizedFeatures(features); 516 int featureCount = features != null ? features.length : 0; 517 for (int i = 0; i < featureCount; i++) { 518 Boolean state = component.getFeatureDefault(features[i]); 519 if (state != null) { 520 setFeature(features[i], state.booleanValue()); 521 } 522 } 523 524 String [] properties = component.getRecognizedProperties(); 526 addRecognizedProperties(properties); 527 int propertyCount = properties != null ? properties.length : 0; 528 for (int i = 0; i < propertyCount; i++) { 529 Object value = component.getPropertyDefault(properties[i]); 530 if (value != null) { 531 setProperty(properties[i], value); 532 } 533 } 534 535 } 537 538 protected void reset() throws XMLConfigurationException { 539 540 int size = fHTMLComponents.size(); 542 for (int i = 0; i < size; i++) { 543 HTMLComponent component = (HTMLComponent)fHTMLComponents.elementAt(i); 544 component.reset(this); 545 } 546 547 XMLDocumentSource lastSource = fDocumentScanner; 549 if (getFeature(BALANCE_TAGS)) { 550 lastSource.setDocumentHandler(fTagBalancer); 551 fTagBalancer.setDocumentSource(fDocumentScanner); 552 lastSource = fTagBalancer; 553 } 554 if (getFeature(NAMESPACES)) { 555 lastSource.setDocumentHandler(fNamespaceBinder); 556 fNamespaceBinder.setDocumentSource(fTagBalancer); 557 lastSource = fNamespaceBinder; 558 } 559 XMLDocumentFilter[] filters = (XMLDocumentFilter[])getProperty(FILTERS); 560 if (filters != null) { 561 for (int i = 0; i < filters.length; i++) { 562 XMLDocumentFilter filter = filters[i]; 563 Class filterClass = filter.getClass(); 564 try { 565 Method filterMethod = filterClass.getMethod("setDocumentSource", DOCSOURCE); 566 if (filterMethod != null) { 567 filterMethod.invoke(filter, new Object [] { lastSource }); 568 } 569 } 570 catch (IllegalAccessException e) { 571 } 573 catch (InvocationTargetException e) { 574 } 576 catch (NoSuchMethodException e) { 577 } 579 lastSource.setDocumentHandler(filter); 580 lastSource = filter; 581 } 582 } 583 lastSource.setDocumentHandler(fDocumentHandler); 584 585 } 587 591 609 protected class ErrorReporter 610 implements HTMLErrorReporter { 611 612 616 617 protected Locale fLastLocale; 618 619 620 protected ResourceBundle fErrorMessages; 621 622 626 627 public String formatMessage(String key, Object [] args) { 628 if (!getFeature(SIMPLE_ERROR_FORMAT)) { 629 if (!fLocale.equals(fLastLocale)) { 630 fErrorMessages = null; 631 fLastLocale = fLocale; 632 } 633 if (fErrorMessages == null) { 634 fErrorMessages = 635 ResourceBundle.getBundle("org/cyberneko/html/res/ErrorMessages", 636 fLocale); 637 } 638 try { 639 String value = fErrorMessages.getString(key); 640 String message = MessageFormat.format(value, args); 641 return message; 642 } 643 catch (MissingResourceException e) { 644 } 646 } 647 return formatSimpleMessage(key, args); 648 } 650 651 public void reportWarning(String key, Object [] args) 652 throws XMLParseException { 653 if (fErrorHandler != null) { 654 fErrorHandler.warning(ERROR_DOMAIN, key, createException(key, args)); 655 } 656 } 658 659 public void reportError(String key, Object [] args) 660 throws XMLParseException { 661 if (fErrorHandler != null) { 662 fErrorHandler.error(ERROR_DOMAIN, key, createException(key, args)); 663 } 664 } 666 670 671 protected XMLParseException createException(String key, Object [] args) { 672 String message = formatMessage(key, args); 673 return new XMLParseException(fDocumentScanner, message); 674 } 676 677 protected String formatSimpleMessage(String key, Object [] args) { 678 StringBuffer str = new StringBuffer (); 679 str.append(ERROR_DOMAIN); 680 str.append('#'); 681 str.append(key); 682 if (args != null && args.length > 0) { 683 str.append('\t'); 684 for (int i = 0; i < args.length; i++) { 685 if (i > 0) { 686 str.append('\t'); 687 } 688 str.append(String.valueOf(args[i])); 689 } 690 } 691 return str.toString(); 692 } 694 } 696 } | Popular Tags |