KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > settings > SettingsHandler


1 /* SettingsHandler
2  *
3  * $Id: SettingsHandler.java,v 1.11.4.1 2007/01/13 01:31:27 stack-sf Exp $
4  *
5  * Created on Dec 16, 2003
6  *
7  * Copyright (C) 2004 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.settings;
26
27 import java.io.File JavaDoc;
28 import java.lang.reflect.Constructor JavaDoc;
29 import java.lang.reflect.InvocationTargetException JavaDoc;
30 import java.text.ParseException JavaDoc;
31 import java.util.Collection JavaDoc;
32 import java.util.Collections JavaDoc;
33 import java.util.HashMap JavaDoc;
34 import java.util.HashSet JavaDoc;
35 import java.util.Iterator JavaDoc;
36 import java.util.List JavaDoc;
37 import java.util.Map JavaDoc;
38 import java.util.Set JavaDoc;
39 import java.util.logging.Level JavaDoc;
40
41 import javax.management.AttributeNotFoundException JavaDoc;
42 import javax.management.InvalidAttributeValueException JavaDoc;
43
44 import org.archive.crawler.datamodel.CrawlOrder;
45 import org.archive.crawler.settings.refinements.Refinement;
46 import org.archive.net.UURI;
47 import org.archive.util.ArchiveUtils;
48
49 /** An instance of this class holds a hierarchy of settings.
50  *
51  * More than one instance in memory is allowed so that a new CrawlJob could
52  * be configured while another job is running.
53  *
54  * This class should be subclassed to adapt to a persistent storage.
55  *
56  * @author John Erik Halse
57  */

58 public abstract class SettingsHandler {
59     /** Cached CrawlerSettings objects */
60     private SettingsCache settingsCache =
61         new SettingsCache(new CrawlerSettings(this, null));
62
63     /** Reference to the order module */
64     private CrawlOrder order;
65
66     private Set JavaDoc<ValueErrorHandler> valueErrorHandlers
67      = Collections.synchronizedSet(new HashSet JavaDoc<ValueErrorHandler>());
68     private int errorReportingLevel = Level.ALL.intValue();
69
70     /** Datatypes supported by the settings framwork */
71     final static String JavaDoc INTEGER = "integer";
72     final static String JavaDoc LONG = "long";
73     final static String JavaDoc FLOAT = "float";
74     final static String JavaDoc DOUBLE = "double";
75     final static String JavaDoc BOOLEAN = "boolean";
76     final static String JavaDoc STRING = "string";
77     final static String JavaDoc TEXT = "text";
78     final static String JavaDoc OBJECT = "object";
79     final static String JavaDoc TIMESTAMP = "timestamp";
80     final static String JavaDoc MAP = "map";
81     final static String JavaDoc INTEGER_LIST = "integerList";
82     final static String JavaDoc LONG_LIST = "longList";
83     final static String JavaDoc FLOAT_LIST = "floatList";
84     final static String JavaDoc DOUBLE_LIST = "doubleList";
85     final static String JavaDoc STRING_LIST = "stringList";
86     private final static String JavaDoc names[][] = new String JavaDoc[][] {
87             { INTEGER, "java.lang.Integer"},
88             { LONG, "java.lang.Long"},
89             { FLOAT, "java.lang.Float"},
90             { DOUBLE, "java.lang.Double"},
91             { BOOLEAN, "java.lang.Boolean"},
92             { STRING, "java.lang.String"},
93             { TEXT, "org.archive.crawler.settings.TextField"},
94             { OBJECT, "org.archive.crawler.settings.ModuleType"},
95             { TIMESTAMP, "java.util.Date"},
96             { MAP, "org.archive.crawler.settings.MapType"},
97             { INTEGER_LIST,
98                     "org.archive.crawler.settings.IntegerList"},
99             { LONG_LIST, "org.archive.crawler.settings.LongList"},
100             { FLOAT_LIST, "org.archive.crawler.settings.FloatList"},
101             { DOUBLE_LIST, "org.archive.crawler.settings.DoubleList"},
102             { STRING_LIST, "org.archive.crawler.settings.StringList"}};
103     private final static Map JavaDoc<String JavaDoc,String JavaDoc> name2class
104      = new HashMap JavaDoc<String JavaDoc,String JavaDoc>();
105     private final static Map JavaDoc<String JavaDoc,String JavaDoc> class2name
106      = new HashMap JavaDoc<String JavaDoc,String JavaDoc>();
107     static {
108         for (int i = 0; i < names.length; i++) {
109             name2class.put(names[i][0], names[i][1]);
110             class2name.put(names[i][1], names[i][0]);
111         }
112     }
113
114     /** Create a new SettingsHandler object.
115      *
116      * @throws InvalidAttributeValueException
117      */

118     public SettingsHandler() throws InvalidAttributeValueException JavaDoc {
119         order = new CrawlOrder();
120         order.setAsOrder(this);
121     }
122
123     /** Initialize the SettingsHandler.
124      *
125      * This method reads the default settings from the persistent storage.
126      */

127     public void initialize() {
128         readSettingsObject(settingsCache.getGlobalSettings());
129     }
130     
131     public void cleanup() {
132         this.settingsCache = null;
133         if (this.order != null) {
134             this.order.setController(null);
135         }
136         this.order = null;
137     }
138
139     /** Strip off the leftmost part of a domain name.
140      *
141      * @param scope the domain name.
142      * @return scope with everything before the first dot ripped off.
143      */

144     protected String JavaDoc getParentScope(String JavaDoc scope) {
145         int split = scope.indexOf('.');
146         return (split == -1)? null: scope.substring(split + 1);
147     }
148
149     /** Get a module by name.
150      *
151      * All modules in the order should have unique names. This method makes it
152      * possible to get the modules of the order by its name.
153      *
154      * @param name the modules name.
155      * @return the module the name references.
156      */

157     public ModuleType getModule(String JavaDoc name) {
158         return settingsCache.getGlobalSettings().getModule(name);
159     }
160
161     /** Get a complex type by its absolute name.
162      *
163      * The absolute name is the complex types name and the path leading to
164      * it.
165      *
166      * @param settings the settings object to query.
167      * @param absoluteName the absolute name of the complex type to get.
168      * @return the complex type referenced by the absolute name or null if
169      * the complex type could not be found in this settings object.
170      * @throws AttributeNotFoundException is thrown if no ComplexType by this
171      * name exist.
172      */

173     public ComplexType getComplexTypeByAbsoluteName(
174             CrawlerSettings settings, String JavaDoc absoluteName)
175             throws AttributeNotFoundException JavaDoc {
176
177         settings = settings == null ? settingsCache.getGlobalSettings() : settings;
178
179         DataContainer data = settings.getData(absoluteName);
180         if (data == null) {
181             CrawlerSettings parentSettings = settings.getParent();
182             if (parentSettings == null) {
183                 throw new AttributeNotFoundException JavaDoc(absoluteName);
184             }
185             return getComplexTypeByAbsoluteName(parentSettings, absoluteName);
186         }
187         return data.getComplexType();
188     }
189
190     protected static String JavaDoc getTypeName(String JavaDoc className) {
191         return (String JavaDoc) class2name.get(className);
192     }
193
194     protected static String JavaDoc getClassName(String JavaDoc typeName) {
195         return (String JavaDoc) name2class.get(typeName);
196     }
197
198     /** Convert a String object to an object of <code>typeName</code>.
199      *
200      * @param stringValue string to convert.
201      * @param typeName type to convert to. typeName should be one of the
202      * supported types represented by constants in this class.
203      * @return the new value object.
204      * @throws ClassCastException is thrown if string could not be converted.
205      */

206     protected static Object JavaDoc StringToType(String JavaDoc stringValue, String JavaDoc typeName) {
207         Object JavaDoc value;
208         if (typeName == SettingsHandler.STRING) {
209             value = stringValue;
210         } else if (typeName == SettingsHandler.TEXT) {
211             value = new TextField(stringValue);
212         } else if (typeName == SettingsHandler.INTEGER) {
213             value = Integer.decode(stringValue);
214         } else if (typeName == SettingsHandler.LONG) {
215             value = Long.decode(stringValue);
216         } else if (typeName == SettingsHandler.BOOLEAN) {
217             value = Boolean.valueOf(stringValue);
218         } else if (typeName == SettingsHandler.DOUBLE) {
219             value = Double.valueOf(stringValue);
220         } else if (typeName == SettingsHandler.FLOAT) {
221             value = Float.valueOf(stringValue);
222         } else if (typeName == SettingsHandler.TIMESTAMP) {
223             try {
224                 value = ArchiveUtils.parse14DigitDate(stringValue);
225             } catch (ParseException JavaDoc e) {
226                 throw new ClassCastException JavaDoc(
227                     "Cannot convert '"
228                         + stringValue
229                         + "' to type '"
230                         + typeName
231                         + "'");
232             }
233         } else {
234             throw new ClassCastException JavaDoc(
235                 "Cannot convert '"
236                     + stringValue
237                     + "' to type '"
238                     + typeName
239                     + "'");
240         }
241         return value;
242     }
243
244     /** Get CrawlerSettings object in effect for a host or domain.
245      *
246      * If there is no specific settings for the host/domain, it will recursively
247      * go up the hierarchy to find the settings object that should be used for
248      * this host/domain.
249      *
250      * @param host the host or domain to get the settings for.
251      * @return settings object in effect for the host/domain.
252      * @see #getSettingsObject(String)
253      * @see #getOrCreateSettingsObject(String)
254      */

255     public CrawlerSettings getSettings(String JavaDoc host) {
256         return getRefinementsForSettings(getSettingsForHost(host), null);
257     }
258
259     /** Get CrawlerSettings object in effect for a host or domain.
260     *
261     * If there is no specific settings for the host/domain, it will recursively
262     * go up the hierarchy to find the settings object that should be used for
263     * this host/domain.
264     * <p/>
265     * This method passes around a URI that refinement are checked against.
266     *
267     * @param host the host or domain to get the settings for.
268     * @param uuri UURI for context.
269     * @return settings object in effect for the host/domain.
270     * @see #getSettingsObject(String)
271     * @see #getOrCreateSettingsObject(String)
272     */

273     public CrawlerSettings getSettings(String JavaDoc host, UURI uuri) {
274         return getRefinementsForSettings(getSettingsForHost(host), uuri);
275     }
276
277     protected CrawlerSettings getSettingsForHost(String JavaDoc host) {
278         CrawlerSettings settings = settingsCache.getSettings(host, null);
279
280         if (settings == null) {
281             String JavaDoc tmpHost = host;
282             settings = getSettingsObject(tmpHost);
283             while (settings == null && tmpHost != null) {
284                 tmpHost = getParentScope(tmpHost);
285                 settings = getSettingsObject(tmpHost);
286             }
287
288             settingsCache.putSettings(host, settings);
289         }
290
291         return settings;
292     }
293
294     private CrawlerSettings getRefinementsForSettings(CrawlerSettings settings,
295             UURI uri) {
296         if (settings.hasRefinements()) {
297             for(Iterator JavaDoc it = settings.refinementsIterator(); it.hasNext();) {
298                 Refinement refinement = (Refinement) it.next();
299                 if (refinement.isWithinRefinementBounds(uri)) {
300                     settings = getSettingsObject(settings.getScope(),
301                             refinement.getReference());
302                 }
303             }
304         }
305
306         return settings;
307     }
308
309     /** Get CrawlerSettings object for a host or domain.
310      *
311      * The difference between this method and the
312      * <code>getSettings(String host)</code> is that this method will return
313      * null if there is no settings for particular host or domain.
314      *
315      * @param scope the host or domain to get the settings for.
316      * @return settings object for the host/domain or null if no
317      * settings exist for the host/domain.
318      * @see #getSettings(String)
319      * @see #getOrCreateSettingsObject(String)
320      */

321     public CrawlerSettings getSettingsObject(String JavaDoc scope) {
322         return getSettingsObject(scope, null);
323     }
324
325     /**
326      * Get CrawlerSettings object for a host/domain and a particular refinement.
327      *
328      * @param scope the host or domain to get the settings for.
329      * @param refinement the refinement reference to get.
330      * @return CrawlerSettings object for a host/domain and a particular
331      * refinement or null if no settings exist for the host/domain.
332      */

333     public CrawlerSettings getSettingsObject(String JavaDoc scope, String JavaDoc refinement) {
334         CrawlerSettings settings =
335             settingsCache.getSettingsObject(scope, refinement);
336
337         if (settings == null) {
338             // Reference not found
339
settings = new CrawlerSettings(this, scope, refinement);
340             // Try to read settings from persisten storage. If its not there
341
// it will be set to null.
342
settings = readSettingsObject(settings);
343             if (settings != null) {
344                 settingsCache.putSettings(scope, settings);
345             }
346         }
347         return settings;
348     }
349
350     /** Get or create CrawlerSettings object for a host or domain.
351      *
352      * This method is similar to {@link #getSettingsObject(String)} except that
353      * if there is no settings for this particular host or domain a new settings
354      * object will be returned.
355      *
356      * @param scope the host or domain to get or create the settings for.
357      * @return settings object for the host/domain.
358      * @see #getSettings(String)
359      * @see #getSettingsObject(String)
360      */

361     public CrawlerSettings getOrCreateSettingsObject(String JavaDoc scope) {
362         return getOrCreateSettingsObject(scope, null);
363     }
364
365     public CrawlerSettings getOrCreateSettingsObject(String JavaDoc scope,
366             String JavaDoc refinement) {
367         CrawlerSettings settings;
368         settings = getSettingsObject(scope, refinement);
369         if (settings == null) {
370             scope = scope.intern();
371
372             // No existing settings object found, create one
373
settings = new CrawlerSettings(this, scope, refinement);
374             settingsCache.refreshHostToSettings();
375             settingsCache.putSettings(scope, settings);
376         }
377         return settings;
378     }
379
380     /** Write the CrawlerSettings object to persistent storage.
381      *
382      * @param settings the settings object to write.
383      */

384     public abstract void writeSettingsObject(CrawlerSettings settings);
385
386     /** Read the CrawlerSettings object from persistent storage.
387      *
388      * @param settings the settings object to be updated with data from the
389      * persistent storage.
390      * @return the updated settings object or null if there was no data for this
391      * in the persistent storage.
392      */

393     protected abstract CrawlerSettings readSettingsObject(CrawlerSettings settings);
394
395     /** Delete a settings object from persistent storage.
396      *
397      * @param settings the settings object to delete.
398      */

399     public void deleteSettingsObject(CrawlerSettings settings) {
400         settingsCache.deleteSettingsObject(settings);
401     }
402
403     /** Get the CrawlOrder.
404      *
405      * @return the CrawlOrder
406      */

407     public CrawlOrder getOrder() {
408         return order;
409     }
410
411     /** Instatiate a new ModuleType given its name and className.
412      *
413      * @param name the name for the new ComplexType.
414      * @param className the class name of the new ComplexType.
415      * @return an instance of the class identified by className.
416      *
417      * @throws InvocationTargetException
418      */

419     public static ModuleType instantiateModuleTypeFromClassName(
420             String JavaDoc name, String JavaDoc className)
421             throws InvocationTargetException JavaDoc {
422
423         Class JavaDoc cl;
424         try {
425             cl = Class.forName(className);
426         } catch (ClassNotFoundException JavaDoc e) {
427             throw new InvocationTargetException JavaDoc(e);
428         }
429
430         ModuleType module;
431         try {
432             Constructor JavaDoc co =
433                 cl.getConstructor(new Class JavaDoc[] { String JavaDoc.class });
434             module = (ModuleType) co.newInstance(new Object JavaDoc[] { name });
435         } catch (IllegalArgumentException JavaDoc e) {
436             throw new InvocationTargetException JavaDoc(e);
437         } catch (InstantiationException JavaDoc e) {
438             throw new InvocationTargetException JavaDoc(e);
439         } catch (IllegalAccessException JavaDoc e) {
440             throw new InvocationTargetException JavaDoc(e);
441         } catch (SecurityException JavaDoc e) {
442             throw new InvocationTargetException JavaDoc(e);
443         } catch (NoSuchMethodException JavaDoc e) {
444             throw new InvocationTargetException JavaDoc(e);
445         }
446         return module;
447     }
448
449     /**
450      * Transforms a relative path so that it is relative to a location that is
451      * regarded as a working dir for these settings. If an absolute path is given,
452      * it will be returned unchanged.
453      * @param path A relative path to a file (or directory)
454      * @return The same path modified so that it is relative to the file level
455      * location that is considered the working directory for these settings.
456      */

457     public abstract File JavaDoc getPathRelativeToWorkingDirectory(String JavaDoc path);
458
459     /**
460      * Will return a Collection of strings with domains that contain 'per'
461      * domain overrides (or their subdomains contain them).
462      *
463      * The domains considered are
464      * limited to those that are subdomains of the supplied domain. If null or
465      * empty string is supplied the TLDs will be considered.
466      * @param rootDomain The domain to get domain overrides for. Examples:
467      * 'org', 'archive.org', 'crawler.archive.org' etc.
468      * @return An array of domains that contain overrides. If rootDomain does not
469      * exist an empty array will be returned.
470      */

471     public abstract Collection JavaDoc getDomainOverrides(String JavaDoc rootDomain);
472
473     /**
474      * Unregister an instance of {@link ValueErrorHandler}.
475      *
476      * @param errorHandler the <code>CalueErrorHandler</code> to unregister.
477      *
478      * @see ValueErrorHandler
479      * @see #setErrorReportingLevel(Level)
480      * @see #registerValueErrorHandler(ValueErrorHandler)
481      *
482      */

483     public void unregisterValueErrorHandler(ValueErrorHandler errorHandler) {
484         valueErrorHandlers.remove(errorHandler);
485     }
486
487     /**
488      * Register an instance of {@link ValueErrorHandler}.
489      * <p>
490      * If a ValueErrorHandler is registered, only constraints with level
491      * {@link Level#SEVERE}will throw an {@link InvalidAttributeValueException}.
492      * The ValueErrorHandler will recieve a notification for all failed checks
493      * with level equal or greater than the error reporting level.
494      *
495      * @param errorHandler the <code>CalueErrorHandler</code> to register.
496      *
497      * @see ValueErrorHandler
498      * @see #setErrorReportingLevel(Level)
499      * @see #unregisterValueErrorHandler(ValueErrorHandler)
500      */

501     public void registerValueErrorHandler(ValueErrorHandler errorHandler) {
502         if (errorHandler != null) {
503             valueErrorHandlers.add(errorHandler);
504         }
505     }
506
507     /**
508      * Fire events on all registered {@link ValueErrorHandler}.
509      *
510      * @param error the failed constraints return value.
511      * @return true if there was any registered ValueErrorHandlers to notify.
512      */

513     boolean fireValueErrorHandlers(Constraint.FailedCheck error) {
514         if (error.getLevel().intValue() >= errorReportingLevel) {
515             for (Iterator JavaDoc it = valueErrorHandlers.iterator(); it.hasNext();) {
516                 ((ValueErrorHandler) it.next()).handleValueError(error);
517             }
518         }
519         return valueErrorHandlers.size() > 0;
520     }
521
522     /**
523      * Set the level for which notification of failed constraints will be fired.
524      *
525      * @param level the error reporting level.
526      */

527     public void setErrorReportingLevel(Level JavaDoc level) {
528         errorReportingLevel = level.intValue();
529     }
530
531     /**
532      * Creates and returns a <tt>List</tt> of all files comprising the current
533      * settings framework.
534      *
535      * <p>The List contains the absolute String path of each file.
536      *
537      * <p>The list should contain any configurable files, including such files
538      * as seed file and any other files use by the various settings modules.
539      *
540      * <p>Implementations of the SettingsHandler that do not use files for
541      * permanent storage should return an empty list.
542      * @return <code>List</code> of framework files.
543      */

544     public abstract List JavaDoc getListOfAllFiles();
545     
546     /**
547      * Clear any per-host settings cached in memory; allows editting of
548      * per-host settings files on disk, perhaps in bulk/automated fashion,
549      * to take effect in running crawl.
550      */

551     public void clearPerHostSettingsCache() {
552         settingsCache.clear();
553     }
554 }
555
Popular Tags