KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > settings > CrawlSettingsSAXSource


1 /* CrawlSettingsSAXSource
2  *
3  * $Id: CrawlSettingsSAXSource.java,v 1.8 2005/03/31 23:49:51 stack-sf Exp $
4  *
5  * Created on Dec 5, 2003
6  *
7  * Copyright (C) 2004 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.settings;
26
27 import java.io.IOException JavaDoc;
28 import java.text.ParseException JavaDoc;
29 import java.util.Iterator JavaDoc;
30
31 import javax.management.AttributeNotFoundException JavaDoc;
32 import javax.management.MBeanInfo JavaDoc;
33 import javax.xml.transform.sax.SAXSource JavaDoc;
34
35 import org.archive.crawler.settings.refinements.PortnumberCriteria;
36 import org.archive.crawler.settings.refinements.Refinement;
37 import org.archive.crawler.settings.refinements.RegularExpressionCriteria;
38 import org.archive.crawler.settings.refinements.TimespanCriteria;
39 import org.archive.util.ArchiveUtils;
40 import org.xml.sax.Attributes JavaDoc;
41 import org.xml.sax.ContentHandler JavaDoc;
42 import org.xml.sax.DTDHandler JavaDoc;
43 import org.xml.sax.EntityResolver JavaDoc;
44 import org.xml.sax.ErrorHandler JavaDoc;
45 import org.xml.sax.InputSource JavaDoc;
46 import org.xml.sax.SAXException JavaDoc;
47 import org.xml.sax.SAXNotRecognizedException JavaDoc;
48 import org.xml.sax.SAXNotSupportedException JavaDoc;
49 import org.xml.sax.XMLReader JavaDoc;
50 import org.xml.sax.helpers.AttributesImpl JavaDoc;
51
52 /** Class that takes a CrawlerSettings object and create SAXEvents from it.
53  *
54  * This is a helper class for XMLSettingsHandler.
55  *
56  * @author John Erik Halse
57  */

58 public class CrawlSettingsSAXSource extends SAXSource JavaDoc implements XMLReader JavaDoc {
59     // for prettyprinting XML file
60
private static final int indentAmount = 2;
61
62     private CrawlerSettings settings;
63     private ContentHandler JavaDoc handler;
64     private boolean orderFile = false;
65
66     /** Constructs a new CrawlSettingsSAXSource.
67      *
68      * @param settings the settings object to create SAX events from.
69      */

70     public CrawlSettingsSAXSource(CrawlerSettings settings) {
71         super();
72         this.settings = settings;
73         if (settings.getParent() == null) {
74             orderFile = true;
75         }
76     }
77
78     /* (non-Javadoc)
79      * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
80      */

81     public boolean getFeature(String JavaDoc name)
82         throws SAXNotRecognizedException JavaDoc, SAXNotSupportedException JavaDoc {
83         return false;
84     }
85
86     /* (non-Javadoc)
87      * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
88      */

89     public void setFeature(String JavaDoc name, boolean value)
90         throws SAXNotRecognizedException JavaDoc, SAXNotSupportedException JavaDoc {
91
92     }
93
94     /* (non-Javadoc)
95      * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
96      */

97     public Object JavaDoc getProperty(String JavaDoc name)
98         throws SAXNotRecognizedException JavaDoc, SAXNotSupportedException JavaDoc {
99         return null;
100     }
101
102     /* (non-Javadoc)
103      * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
104      */

105     public void setProperty(String JavaDoc name, Object JavaDoc value)
106         throws SAXNotRecognizedException JavaDoc, SAXNotSupportedException JavaDoc {
107
108     }
109
110     /* (non-Javadoc)
111      * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
112      */

113     public void setEntityResolver(EntityResolver JavaDoc resolver) {
114
115     }
116
117     /* (non-Javadoc)
118      * @see org.xml.sax.XMLReader#getEntityResolver()
119      */

120     public EntityResolver JavaDoc getEntityResolver() {
121         return null;
122     }
123
124     /* (non-Javadoc)
125      * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
126      */

127     public void setDTDHandler(DTDHandler JavaDoc handler) {
128     }
129
130     /* (non-Javadoc)
131      * @see org.xml.sax.XMLReader#getDTDHandler()
132      */

133     public DTDHandler JavaDoc getDTDHandler() {
134         return null;
135     }
136
137     /* (non-Javadoc)
138      * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler)
139      */

140     public void setContentHandler(ContentHandler JavaDoc handler) {
141         this.handler = handler;
142     }
143
144     /* (non-Javadoc)
145      * @see org.xml.sax.XMLReader#getContentHandler()
146      */

147     public ContentHandler JavaDoc getContentHandler() {
148         return handler;
149     }
150
151     /* (non-Javadoc)
152      * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
153      */

154     public void setErrorHandler(ErrorHandler JavaDoc handler) {
155     }
156
157     /* (non-Javadoc)
158      * @see org.xml.sax.XMLReader#getErrorHandler()
159      */

160     public ErrorHandler JavaDoc getErrorHandler() {
161         return null;
162     }
163
164     // We're not doing namespaces
165
private static final String JavaDoc nsu = ""; // NamespaceURI
166
private static final char[] indentArray =
167         "\n ".toCharArray();
168
169     /* (non-Javadoc)
170      * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
171      */

172     public void parse(InputSource JavaDoc input) throws IOException JavaDoc, SAXException JavaDoc {
173         if (handler == null) {
174             throw new SAXException JavaDoc("No content handler");
175         }
176         handler.startDocument();
177         AttributesImpl JavaDoc atts = new AttributesImpl JavaDoc();
178         atts.addAttribute(
179             "http://www.w3.org/2001/XMLSchema-instance",
180             "xsi",
181             "xmlns:xsi",
182             nsu,
183             "http://www.w3.org/2001/XMLSchema-instance");
184         atts.addAttribute(
185             "http://www.w3.org/2001/XMLSchema-instance",
186             "noNamespaceSchemaLocation",
187             "xsi:noNamespaceSchemaLocation",
188             nsu,
189             XMLSettingsHandler.XML_SCHEMA);
190         String JavaDoc rootElement;
191         if (settings.isRefinement()) {
192             rootElement = XMLSettingsHandler.XML_ROOT_REFINEMENT;
193         } else if (orderFile) {
194             rootElement = XMLSettingsHandler.XML_ROOT_ORDER;
195         } else {
196             rootElement = XMLSettingsHandler.XML_ROOT_HOST_SETTINGS;
197         }
198         handler.startElement(nsu, rootElement, rootElement, atts);
199
200         parseMetaData(1 + indentAmount);
201
202         if (settings.hasRefinements()) {
203             parseRefinements(1 + indentAmount);
204         }
205
206         // Write the modules
207
Iterator JavaDoc modules = settings.topLevelModules();
208         while (modules.hasNext()) {
209             ComplexType complexType = (ComplexType) modules.next();
210             parseComplexType(complexType, 1 + indentAmount);
211         }
212
213         handler.ignorableWhitespace(indentArray, 0, 1);
214         handler.endElement(nsu, rootElement, rootElement);
215         handler.ignorableWhitespace(indentArray, 0, 1);
216         handler.endDocument();
217     }
218
219     private void parseRefinements(int indent) throws SAXException JavaDoc {
220         Attributes JavaDoc nullAtts = new AttributesImpl JavaDoc();
221         handler.ignorableWhitespace(indentArray, 0, indent);
222         handler.startElement(nsu,
223                 XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,
224                 XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST, nullAtts);
225
226         Iterator JavaDoc it = settings.refinementsIterator();
227         while (it.hasNext()) {
228             Refinement refinement = (Refinement) it.next();
229             handler.ignorableWhitespace(indentArray, 0, indent + indentAmount);
230             AttributesImpl JavaDoc reference = new AttributesImpl JavaDoc();
231             reference.addAttribute(nsu,
232                     XMLSettingsHandler.XML_ELEMENT_REFERENCE,
233                     XMLSettingsHandler.XML_ELEMENT_REFERENCE, nsu, refinement
234                             .getReference());
235             handler.startElement(nsu,
236                     XMLSettingsHandler.XML_ELEMENT_REFINEMENT,
237                     XMLSettingsHandler.XML_ELEMENT_REFINEMENT, reference);
238
239             writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION,
240                     refinement.getDescription(), nullAtts, indent + 2
241                             * indentAmount);
242
243             parseRefinementLimits(refinement, indent + 2 * indentAmount);
244
245             handler.ignorableWhitespace(indentArray, 0, indent + indentAmount);
246             handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENT,
247                     XMLSettingsHandler.XML_ELEMENT_REFINEMENT);
248         }
249
250         handler.ignorableWhitespace(indentArray, 0, indent);
251         handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,
252                 XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST);
253     }
254
255     private void parseRefinementLimits(Refinement refinement, int indent)
256             throws SAXException JavaDoc {
257         Attributes JavaDoc nullAtts = new AttributesImpl JavaDoc();
258
259         handler.ignorableWhitespace(indentArray, 0, indent);
260         handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_LIMITS,
261                 XMLSettingsHandler.XML_ELEMENT_LIMITS, nullAtts);
262
263         Iterator JavaDoc it = refinement.criteriaIterator();
264         while (it.hasNext()) {
265             Object JavaDoc limit = it.next();
266             if (limit instanceof TimespanCriteria) {
267                 AttributesImpl JavaDoc timeSpan = new AttributesImpl JavaDoc();
268                 timeSpan.addAttribute(nsu,
269                         XMLSettingsHandler.XML_ATTRIBUTE_FROM,
270                         XMLSettingsHandler.XML_ATTRIBUTE_FROM, nsu,
271                         ((TimespanCriteria) limit).getFrom());
272                 timeSpan.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_TO,
273                         XMLSettingsHandler.XML_ATTRIBUTE_TO, nsu,
274                         ((TimespanCriteria) limit).getTo());
275                 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_TIMESPAN, "",
276                         timeSpan, indent + 2 * indentAmount);
277             } else if (limit instanceof PortnumberCriteria) {
278                 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_PORTNUMBER,
279                         ((PortnumberCriteria) limit).getPortNumber(), nullAtts,
280                         indent + 2 * indentAmount);
281             } else if (limit instanceof RegularExpressionCriteria) {
282                 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_URIMATCHES,
283                         ((RegularExpressionCriteria) limit).getRegexp(), nullAtts,
284                         indent + 2 * indentAmount);
285             }
286         }
287
288         handler.ignorableWhitespace(indentArray, 0, indent);
289         handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_LIMITS,
290                 XMLSettingsHandler.XML_ELEMENT_LIMITS);
291
292     }
293
294     private void parseMetaData(int indent) throws SAXException JavaDoc {
295         // Write meta information
296
Attributes JavaDoc nullAtts = new AttributesImpl JavaDoc();
297         handler.ignorableWhitespace(indentArray, 0, indent);
298         handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_META,
299                 XMLSettingsHandler.XML_ELEMENT_META, nullAtts);
300
301         // Write settings name
302
writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_NAME, settings
303                 .getName(), null, indent + indentAmount);
304
305         // Write settings description
306
writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION, settings
307                 .getDescription(), null, indent + indentAmount);
308
309         // Write settings operator
310
writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_OPERATOR, settings
311                 .getOperator(), null, indent + indentAmount);
312
313         // Write settings description
314
writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_ORGANIZATION, settings
315                 .getOrganization(), null, indent + indentAmount);
316
317         // Write settings description
318
writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_AUDIENCE, settings
319                 .getAudience(), null, indent + indentAmount);
320
321         // Write file date
322
String JavaDoc dateStamp = ArchiveUtils.get14DigitDate();
323         writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DATE, dateStamp,
324                 null, indent + indentAmount);
325         try {
326             settings.setLastSavedTime(ArchiveUtils.parse14DigitDate(dateStamp));
327         } catch (ParseException JavaDoc e) {
328             // Should never happen since we just created it. If this exception
329
// is thrown, then there is a bug in ArchiveUtils.
330
e.printStackTrace();
331         }
332
333         handler.ignorableWhitespace(indentArray, 0, indent);
334         handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_META,
335                 XMLSettingsHandler.XML_ELEMENT_META);
336     }
337
338     /**
339      * Create SAX events from a {@link ComplexType}.
340      *
341      * @param complexType the object to creat SAX events from.
342      * @param indent the indentation amount for prettyprinting XML.
343      * @throws SAXException is thrown if an error occurs.
344      */

345     private void parseComplexType(ComplexType complexType, int indent)
346             throws SAXException JavaDoc {
347         if (complexType.isTransient()) {
348             return;
349         }
350         MBeanInfo JavaDoc mbeanInfo = complexType.getMBeanInfo(settings);
351         String JavaDoc objectElement = resolveElementName(complexType);
352         AttributesImpl JavaDoc atts = new AttributesImpl JavaDoc();
353         atts.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_NAME,
354                 XMLSettingsHandler.XML_ATTRIBUTE_NAME, nsu, complexType
355                         .getName());
356         if (objectElement == XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT) {
357             // Only 'newObject' elements have a class attribute
358
atts.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_CLASS,
359                     XMLSettingsHandler.XML_ATTRIBUTE_CLASS, nsu, mbeanInfo
360                             .getClassName());
361         }
362         if (complexType.getParent() == null) {
363             atts = new AttributesImpl JavaDoc();
364         }
365         handler.ignorableWhitespace(indentArray, 0, indent);
366         handler.startElement(nsu, objectElement, objectElement, atts);
367         for (Iterator JavaDoc it = complexType.getAttributeInfoIterator(settings); it
368                 .hasNext();) {
369             ModuleAttributeInfo attribute = (ModuleAttributeInfo) it.next();
370             if (!attribute.isTransient()) {
371                 parseAttribute(complexType, attribute, indent);
372             }
373         }
374         handler.ignorableWhitespace(indentArray, 0, indent);
375         handler.endElement(nsu, objectElement, objectElement);
376     }
377
378     private void parseAttribute(ComplexType complexType,
379             ModuleAttributeInfo attribute, int indent) throws SAXException JavaDoc {
380         Object JavaDoc value;
381         try {
382             value = complexType
383                     .getLocalAttribute(settings, attribute.getName());
384         } catch (AttributeNotFoundException JavaDoc e) {
385             throw new SAXException JavaDoc(e);
386         }
387         if (orderFile || value != null) {
388             // Write only overridden values unless this is the order file
389
if (attribute.isComplexType()) {
390                 // Call method recursively for complex types
391
parseComplexType((ComplexType) value, indent + indentAmount);
392             } else {
393                 // Write element
394
String JavaDoc elementName = SettingsHandler.getTypeName(attribute
395                         .getType());
396                 AttributesImpl JavaDoc atts = new AttributesImpl JavaDoc();
397                 atts.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_NAME,
398                         XMLSettingsHandler.XML_ATTRIBUTE_NAME, nsu, attribute
399                                 .getName());
400                 if (value == null) {
401                     try {
402                         value = complexType.getAttribute(attribute.getName());
403                     } catch (Exception JavaDoc e) {
404                         throw new SAXException JavaDoc(
405                                 "Internal error in settings subsystem", e);
406                     }
407                 }
408                 if (value != null) {
409                     handler.ignorableWhitespace(indentArray, 0, indent
410                             + indentAmount);
411                     handler.startElement(nsu, elementName, elementName, atts);
412                     if (value instanceof ListType) {
413                         parseListData(value, indent + indentAmount);
414                         handler.ignorableWhitespace(indentArray, 0, indent
415                                 + indentAmount);
416                     } else {
417                         char valueArray[] = value.toString().toCharArray();
418                         handler.characters(valueArray, 0, valueArray.length);
419                     }
420                     handler.endElement(nsu, elementName, elementName);
421                 }
422             }
423         }
424     }
425
426     /** Create SAX events for the content of a {@link ListType}.
427      *
428      * @param value the ListType whose content we create SAX events for.
429      * @param indent the indentation amount for prettyprinting XML.
430      * @throws SAXException is thrown if an error occurs.
431      */

432     private void parseListData(Object JavaDoc value, int indent) throws SAXException JavaDoc {
433         ListType list = (ListType) value;
434         Iterator JavaDoc it = list.iterator();
435         while (it.hasNext()) {
436             Object JavaDoc element = it.next();
437             String JavaDoc elementName =
438                 SettingsHandler.getTypeName(element.getClass().getName());
439             writeSimpleElement(
440                 elementName,
441                 element.toString(),
442                 null,
443                 indent + indentAmount);
444         }
445     }
446
447     /** Resolve the XML element name of a {@link ComplexType}.
448      *
449      * @param complexType the object to investigate.
450      * @return the name of the XML element.
451      */

452     private String JavaDoc resolveElementName(ComplexType complexType) {
453         String JavaDoc elementName;
454         if (complexType instanceof ModuleType) {
455             if (complexType.getParent() == null) {
456                 // Top level controller element
457
elementName = XMLSettingsHandler.XML_ELEMENT_CONTROLLER;
458             } else if (
459                 !orderFile
460                     && complexType.globalSettings().getModule(
461                         complexType.getName())
462                         != null) {
463                 // This is not the order file and we are referencing an object
464
elementName = XMLSettingsHandler.XML_ELEMENT_OBJECT;
465             } else {
466                 // The object is not referenced before
467
elementName = XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT;
468             }
469         } else {
470             // It's a map
471
elementName =
472                 SettingsHandler.getTypeName(complexType.getClass().getName());
473         }
474         return elementName;
475     }
476
477     /** Create SAX events for a simple element.
478      *
479      * Creates all the SAX events needed for prettyprinting an XML element
480      * with a simple value and possible attributes.
481      *
482      * @param elementName the name of the XML element.
483      * @param value the value to pu inside the XML element.
484      * @param atts the attributes for the XML element.
485      * @param indent the indentation amount for prettyprinting XML.
486      * @throws SAXException is thrown if an error occurs.
487      */

488     private void writeSimpleElement(
489         String JavaDoc elementName,
490         String JavaDoc value,
491         Attributes JavaDoc atts,
492         int indent)
493         throws SAXException JavaDoc {
494         if (atts == null) {
495             atts = new AttributesImpl JavaDoc();
496         }
497         // make sure that the value is never null
498
value = value == null ? "" : value;
499         handler.ignorableWhitespace(indentArray, 0, indent);
500         handler.startElement(nsu, elementName, elementName, atts);
501         handler.characters(value.toCharArray(), 0, value.length());
502         handler.endElement(nsu, elementName, elementName);
503     }
504
505     /* (non-Javadoc)
506      * @see org.xml.sax.XMLReader#parse(java.lang.String)
507      */

508     public void parse(String JavaDoc systemId) throws IOException JavaDoc, SAXException JavaDoc {
509         // Do nothing. Just for conformance to the XMLReader API.
510
}
511
512     /* (non-Javadoc)
513      * @see javax.xml.transform.sax.SAXSource#getXMLReader()
514      */

515     public XMLReader JavaDoc getXMLReader() {
516         return this;
517     }
518
519     /* (non-Javadoc)
520      * @see javax.xml.transform.sax.SAXSource#getInputSource()
521      */

522     public InputSource JavaDoc getInputSource() {
523         return new InputSource JavaDoc();
524     }
525 }
526
Popular Tags