1 24 package org.archive.crawler.settings; 25 26 import java.lang.reflect.InvocationTargetException ; 27 import java.text.ParseException ; 28 import java.util.HashMap ; 29 import java.util.Map ; 30 import java.util.Stack ; 31 import java.util.logging.Level ; 32 import java.util.logging.Logger ; 33 34 import javax.management.Attribute ; 35 import javax.management.AttributeNotFoundException ; 36 import javax.management.InvalidAttributeValueException ; 37 38 import org.archive.crawler.settings.Constraint.FailedCheck; 39 import org.archive.crawler.settings.refinements.PortnumberCriteria; 40 import org.archive.crawler.settings.refinements.Refinement; 41 import org.archive.crawler.settings.refinements.RegularExpressionCriteria; 42 import org.archive.crawler.settings.refinements.TimespanCriteria; 43 import org.archive.util.ArchiveUtils; 44 import org.xml.sax.Attributes ; 45 import org.xml.sax.Locator ; 46 import org.xml.sax.SAXException ; 47 import org.xml.sax.SAXParseException ; 48 import org.xml.sax.helpers.DefaultHandler ; 49 50 57 public class CrawlSettingsSAXHandler extends DefaultHandler implements 58 ValueErrorHandler { 59 60 private static Logger logger = Logger 61 .getLogger("org.archive.crawler.settings.XMLSettingsHandler"); 62 63 private Locator locator; 64 65 private CrawlerSettings settings; 66 67 private SettingsHandler settingsHandler; 68 69 private Map <String ,ElementHandler> handlers 70 = new HashMap <String ,ElementHandler>(); 71 72 private Stack <ElementHandler> handlerStack = new Stack <ElementHandler>(); 73 74 private Stack <Object > stack = new Stack <Object >(); 75 76 77 private Stack <Boolean > skip = new Stack <Boolean >(); 78 79 private StringBuffer buffer = new StringBuffer (); 80 81 private String value; 82 83 89 public CrawlSettingsSAXHandler(CrawlerSettings settings) { 90 super(); 91 this.settings = settings; 92 this.settingsHandler = settings.getSettingsHandler(); 93 handlers.put(XMLSettingsHandler.XML_ROOT_ORDER, new RootHandler()); 94 handlers.put(XMLSettingsHandler.XML_ROOT_HOST_SETTINGS, 95 new RootHandler()); 96 handlers.put(XMLSettingsHandler.XML_ROOT_REFINEMENT, new RootHandler()); 97 handlers.put(XMLSettingsHandler.XML_ELEMENT_CONTROLLER, 98 new ModuleHandler()); 99 handlers 100 .put(XMLSettingsHandler.XML_ELEMENT_OBJECT, new ModuleHandler()); 101 handlers.put(XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT, 102 new NewModuleHandler()); 103 handlers.put(XMLSettingsHandler.XML_ELEMENT_META, new MetaHandler()); 104 handlers.put(XMLSettingsHandler.XML_ELEMENT_NAME, new NameHandler()); 105 handlers.put(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION, 106 new DescriptionHandler()); 107 handlers.put(XMLSettingsHandler.XML_ELEMENT_OPERATOR, 108 new OperatorHandler()); 109 handlers.put(XMLSettingsHandler.XML_ELEMENT_ORGANIZATION, 110 new OrganizationHandler()); 111 handlers.put(XMLSettingsHandler.XML_ELEMENT_AUDIENCE, 112 new AudienceHandler()); 113 handlers.put(XMLSettingsHandler.XML_ELEMENT_DATE, new DateHandler()); 114 handlers.put(SettingsHandler.MAP, new MapHandler()); 115 handlers.put(SettingsHandler.INTEGER_LIST, new ListHandler()); 116 handlers.put(SettingsHandler.STRING_LIST, new ListHandler()); 117 handlers.put(SettingsHandler.DOUBLE_LIST, new ListHandler()); 118 handlers.put(SettingsHandler.FLOAT_LIST, new ListHandler()); 119 handlers.put(SettingsHandler.LONG_LIST, new ListHandler()); 120 handlers.put(SettingsHandler.STRING, new SimpleElementHandler()); 121 handlers.put(SettingsHandler.TEXT, new SimpleElementHandler()); 122 handlers.put(SettingsHandler.INTEGER, new SimpleElementHandler()); 123 handlers.put(SettingsHandler.FLOAT, new SimpleElementHandler()); 124 handlers.put(SettingsHandler.LONG, new SimpleElementHandler()); 125 handlers.put(SettingsHandler.BOOLEAN, new SimpleElementHandler()); 126 handlers.put(SettingsHandler.DOUBLE, new SimpleElementHandler()); 127 128 handlers.put(XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST, 129 new RefinementListHandler()); 130 handlers.put(XMLSettingsHandler.XML_ELEMENT_REFINEMENT, 131 new RefinementHandler()); 132 handlers.put(XMLSettingsHandler.XML_ELEMENT_REFERENCE, 133 new ReferenceHandler()); 134 handlers 135 .put(XMLSettingsHandler.XML_ELEMENT_LIMITS, new LimitsHandler()); 136 handlers.put(XMLSettingsHandler.XML_ELEMENT_TIMESPAN, 137 new TimespanHandler()); 138 handlers.put(XMLSettingsHandler.XML_ELEMENT_PORTNUMBER, 139 new PortnumberHandler()); 140 handlers.put(XMLSettingsHandler.XML_ELEMENT_URIMATCHES, 141 new URIMatcherHandler()); 142 } 143 144 149 public void setDocumentLocator(Locator locator) { 150 super.setDocumentLocator(locator); 151 this.locator = locator; 152 } 153 154 159 public void startDocument() throws SAXException { 160 settingsHandler.registerValueErrorHandler(this); 161 skip.push(new Boolean (false)); 162 super.startDocument(); 163 } 164 165 170 public void endDocument() throws SAXException { 171 settingsHandler.unregisterValueErrorHandler(this); 172 super.endDocument(); 173 } 174 175 180 public void characters(char[] ch, int start, int length) 181 throws SAXException { 182 super.characters(ch, start, length); 183 buffer.append(ch, start, length); 184 } 185 186 195 public void startElement(String uri, String localName, String qName, 196 Attributes attributes) throws SAXException { 197 198 ElementHandler handler = ((ElementHandler) handlers.get(qName)); 199 if (handler != null) { 200 handlerStack.push(handler); 201 202 if (((Boolean ) skip.peek()).booleanValue()) { 203 skip.push(new Boolean (true)); 204 String moduleName = attributes 205 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME); 206 logger.fine("Skipping: " + qName + " " + moduleName); 207 } else { 208 try { 209 handler.startElement(qName, attributes); 210 skip.push(new Boolean (false)); 211 } catch (SAXException e) { 212 if (e.getException() instanceof InvocationTargetException 213 || e.getException() instanceof AttributeNotFoundException ) { 214 skip.push(new Boolean (true)); 215 } else { 216 skip.push(new Boolean (false)); 217 throw e; 218 } 219 } 220 } 221 } else { 222 String tmp = "Unknown element '" + qName + "' in '" + 223 locator.getSystemId() + "', line: " + locator.getLineNumber() + 224 ", column: " + locator.getColumnNumber(); 225 if (this.settingsHandler.getOrder() != null && 226 this.settingsHandler.getOrder().getController() != null) { 227 logger.log(Level.WARNING, tmp); 228 } 229 logger.warning(tmp); 230 } 231 } 232 233 241 public void endElement(String uri, String localName, String qName) 242 throws SAXException { 243 value = buffer.toString().trim(); 244 buffer.setLength(0); 245 ElementHandler handler = (ElementHandler) handlerStack.pop(); 246 if (!((Boolean ) skip.pop()).booleanValue()) { 247 if (handler != null) { 248 handler.endElement(qName); 249 } 250 } 251 } 252 253 public void illegalElementError(String name) throws SAXParseException { 254 throw new SAXParseException ("Element '" + name + "' not allowed here", 255 locator); 256 } 257 258 265 private class ElementHandler { 266 267 274 public void startElement(String name, Attributes atts) 275 throws SAXException { 276 } 277 278 284 public void endElement(String name) throws SAXException { 285 } 286 } 287 288 295 private class RootHandler extends ElementHandler { 296 297 public void startElement(String name, Attributes atts) 298 throws SAXException { 299 if ((name.equals(XMLSettingsHandler.XML_ROOT_ORDER) && settings 301 .getScope() != null) 302 || (name.equals(XMLSettingsHandler.XML_ROOT_HOST_SETTINGS) && settings 303 .getScope() == null) 304 || (name.equals(XMLSettingsHandler.XML_ROOT_REFINEMENT) && !settings 305 .isRefinement())) { 306 throw new SAXParseException ("Wrong document type '" + name 307 + "'", locator); 308 } 309 } 310 } 311 312 private class MetaHandler extends ElementHandler { 314 } 315 316 private class NameHandler extends ElementHandler { 317 318 public void endElement(String name) throws SAXException { 319 if (handlerStack.peek() instanceof MetaHandler) { 320 settings.setName(value); 321 } else { 322 illegalElementError(name); 323 } 324 } 325 } 326 327 private class DescriptionHandler extends ElementHandler { 328 329 public void endElement(String name) throws SAXException { 330 if (handlerStack.peek() instanceof MetaHandler) { 331 settings.setDescription(value); 332 } else if (handlerStack.peek() instanceof RefinementHandler) { 333 ((Refinement) stack.peek()).setDescription(value); 334 } else { 335 illegalElementError(name); 336 } 337 } 338 } 339 340 private class OrganizationHandler extends ElementHandler { 341 342 public void endElement(String name) throws SAXException { 343 if (handlerStack.peek() instanceof MetaHandler) { 344 settings.setOrganization(value); 345 } else if (handlerStack.peek() instanceof RefinementHandler) { 346 ((Refinement) stack.peek()).setOrganization(value); 347 } else { 348 illegalElementError(name); 349 } 350 } 351 } 352 353 private class OperatorHandler extends ElementHandler { 354 355 public void endElement(String name) throws SAXException { 356 if (handlerStack.peek() instanceof MetaHandler) { 357 settings.setOperator(value); 358 } else if (handlerStack.peek() instanceof RefinementHandler) { 359 ((Refinement) stack.peek()).setOperator(value); 360 } else { 361 illegalElementError(name); 362 } 363 } 364 } 365 366 private class AudienceHandler extends ElementHandler { 367 368 public void endElement(String name) throws SAXException { 369 if (handlerStack.peek() instanceof MetaHandler) { 370 settings.setAudience(value); 371 } else if (handlerStack.peek() instanceof RefinementHandler) { 372 ((Refinement) stack.peek()).setAudience(value); 373 } else { 374 illegalElementError(name); 375 } 376 } 377 } 378 379 private class DateHandler extends ElementHandler { 380 381 public void endElement(String name) throws SAXException { 382 if (handlerStack.peek() instanceof MetaHandler) { 383 try { 384 settings.setLastSavedTime(ArchiveUtils 385 .parse14DigitDate(value)); 386 } catch (ParseException e) { 387 throw new SAXException (e); 388 } 389 } else { 390 illegalElementError(name); 391 } 392 } 393 } 394 395 private class RefinementListHandler extends ElementHandler { 397 398 public void startElement(String name) throws SAXException { 399 if (!(handlerStack.peek() instanceof RootHandler)) { 400 illegalElementError(name); 401 } 402 } 403 } 404 405 private class RefinementHandler extends ElementHandler { 406 public void startElement(String name, Attributes atts) 407 throws SAXException { 408 stack.push(new Refinement(settings, atts 409 .getValue(XMLSettingsHandler.XML_ELEMENT_REFERENCE))); 410 } 411 } 412 413 private class ReferenceHandler extends ElementHandler { 414 415 public void endElement(String name) throws SAXException { 416 if (handlerStack.peek() instanceof RefinementHandler) { 417 ((Refinement) stack.peek()).setReference(value); 418 } else { 419 illegalElementError(name); 420 } 421 } 422 } 423 424 private class LimitsHandler extends ElementHandler { 425 } 426 427 private class TimespanHandler extends ElementHandler { 428 429 public void startElement(String name, Attributes atts) 430 throws SAXException { 431 if (stack.peek() instanceof Refinement) { 432 String from = atts 433 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_FROM); 434 String to = atts.getValue(XMLSettingsHandler.XML_ATTRIBUTE_TO); 435 try { 436 TimespanCriteria timespan = new TimespanCriteria(from, to); 437 ((Refinement) stack.peek()).addCriteria(timespan); 438 } catch (ParseException e) { 439 throw new SAXException (e); 440 } 441 } else { 442 illegalElementError(name); 443 } 444 } 445 } 446 447 private class PortnumberHandler extends ElementHandler { 448 449 public void endElement(String name) throws SAXException { 450 if (handlerStack.peek() instanceof LimitsHandler) { 451 ((Refinement) stack.peek()).addCriteria(new PortnumberCriteria(value)); 452 } else { 453 illegalElementError(name); 454 } 455 } 456 } 457 458 private class URIMatcherHandler extends ElementHandler { 459 460 public void endElement(String name) throws SAXException { 461 if (handlerStack.peek() instanceof LimitsHandler) { 462 ((Refinement) stack.peek()).addCriteria(new RegularExpressionCriteria(value)); 463 } else { 464 illegalElementError(name); 465 } 466 } 467 } 468 469 470 private class ModuleHandler extends ElementHandler { 472 473 public void startElement(String name, Attributes atts) 474 throws SAXException { 475 ModuleType module; 476 if (name.equals(XMLSettingsHandler.XML_ELEMENT_CONTROLLER)) { 477 module = settingsHandler.getOrder(); 478 } else { 479 module = settingsHandler.getSettingsObject(null).getModule( 480 atts.getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME)); 481 } 482 stack.push(module); 483 } 484 485 public void endElement(String name) throws SAXException { 486 stack.pop(); 487 } 488 } 489 490 private class NewModuleHandler extends ElementHandler { 491 492 public void startElement(String name, Attributes atts) 493 throws SAXException { 494 ComplexType parentModule = (ComplexType) stack.peek(); 495 String moduleName = atts 496 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME); 497 String moduleClass = atts 498 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_CLASS); 499 try { 500 ModuleType module = SettingsHandler 501 .instantiateModuleTypeFromClassName(moduleName, 502 moduleClass); 503 try { 504 parentModule.setAttribute(settings, module); 505 } catch (AttributeNotFoundException e) { 506 try { 510 parentModule.addElement(settings, module); 511 } catch (IllegalStateException ise) { 512 logger.log(Level.WARNING,"Module '" + moduleName + "' in '" 515 + locator.getSystemId() + "', line: " 516 + locator.getLineNumber() + ", column: " 517 + locator.getColumnNumber() 518 + " is not defined in '" 519 + parentModule.getName() + "'.",ise); 520 throw new SAXException (new AttributeNotFoundException ( 521 ise.getMessage())); 522 } 523 } 524 stack.push(module); 525 } catch (InvocationTargetException e) { 526 logger.log(Level.WARNING,"Couldn't instantiate " + moduleName 527 + ", from class: " + moduleClass + "' in '" 528 + locator.getSystemId() + "', line: " 529 + locator.getLineNumber() + ", column: " 530 + locator.getColumnNumber(), e); 531 throw new SAXException (e); 532 } catch (InvalidAttributeValueException e) { 533 throw new SAXException (e); 534 } 535 } 536 537 public void endElement(String name) throws SAXException { 538 stack.pop(); 539 } 540 } 541 542 private class MapHandler extends ElementHandler { 543 544 public void startElement(String name, Attributes atts) 545 throws SAXException { 546 String mapName = atts 547 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME); 548 ComplexType parentModule = (ComplexType) stack.peek(); 549 try { 550 stack.push(parentModule.getAttribute(settings, mapName)); 551 } catch (AttributeNotFoundException e) { 552 throw new SAXException (e); 553 } 554 } 555 556 public void endElement(String name) throws SAXException { 557 stack.pop(); 558 } 559 } 560 561 private class SimpleElementHandler extends ElementHandler { 562 563 public void startElement(String name, Attributes atts) 564 throws SAXException { 565 stack.push(atts.getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME)); 566 } 567 568 public void endElement(String name) throws SAXException { 569 String elementName = (String ) stack.pop(); 570 Object container = stack.peek(); 571 if (container instanceof ComplexType) { 572 try { 573 try { 574 ((ComplexType) container).setAttribute(settings, 575 new Attribute (elementName, value)); 576 } catch (AttributeNotFoundException e) { 577 try { 581 ((ComplexType) container).addElement(settings, 582 new SimpleType(elementName, "", value)); 583 } catch (IllegalStateException ise) { 584 logger.warning("Unknown attribute '" + elementName 585 + "' in '" + locator.getSystemId() 586 + "', line: " + locator.getLineNumber() 587 + ", column: " + locator.getColumnNumber()); 588 } 589 } 590 } catch (InvalidAttributeValueException e) { 591 try { 592 logger.warning("Illegal value '" 593 + value 594 + "' for attribute '" 595 + elementName 596 + "' in '" 597 + locator.getSystemId() 598 + "', line: " 599 + locator.getLineNumber() 600 + ", column: " 601 + locator.getColumnNumber() 602 + ", Value reset to default value: " 603 + ((ComplexType) container).getAttribute( 604 settings, elementName)); 605 } catch (AttributeNotFoundException e1) { 606 throw new SAXException (e1); 607 } 608 } 609 } else { 610 if (container == null) { 611 logger.severe("Empty container (Was a referenced parent" + 616 " filter removed?). Element details: elementName " + 617 elementName + ", name " + name); 618 } else { 619 ((ListType) container).add(value); 620 } 621 } 622 } 623 } 624 625 private class ListHandler extends ElementHandler { 626 627 public void startElement(String name, Attributes atts) 628 throws SAXException { 629 String listName = atts 630 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME); 631 ComplexType parentModule = (ComplexType) stack.peek(); 632 ListType list; 633 try { 634 list = (ListType) parentModule.getAttribute(settings, listName); 635 } catch (AttributeNotFoundException e) { 636 throw new SAXException (e); 637 } 638 list.clear(); 639 stack.push(list); 640 } 641 642 public void endElement(String name) throws SAXException { 643 stack.pop(); 644 } 645 } 646 647 652 public void handleValueError(FailedCheck error) { 653 logger.warning(error.getMessage() + "\n Attribute: '" 654 + error.getOwner().getName() + ":" 655 + error.getDefinition().getName() + "'\n Value: '" + value 656 + "'\n File: '" + locator.getSystemId() + "', line: " 657 + locator.getLineNumber() + ", column: " 658 + locator.getColumnNumber()); 659 } 660 } 661 | Popular Tags |