KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > settings > CrawlerSettings


1 /* CrawlerSettings
2  *
3  * $Id: CrawlerSettings.java,v 1.9.12.1 2007/01/13 01:31:27 stack-sf Exp $
4  *
5  * Created on Dec 16, 2003
6  *
7  * Copyright (C) 2004 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.settings;
26
27 import java.util.ArrayList JavaDoc;
28 import java.util.Date JavaDoc;
29 import java.util.HashMap JavaDoc;
30 import java.util.Iterator JavaDoc;
31 import java.util.List JavaDoc;
32 import java.util.ListIterator JavaDoc;
33 import java.util.Map JavaDoc;
34
35 import org.archive.crawler.settings.refinements.Refinement;
36 import org.archive.net.UURI;
37
38 /**
39  * Class representing a settings file.
40  *
41  * More precisely it represents a collection of settings valid in a particular
42  * scope. The scope is either the global settings, or the settings to be used
43  * for a particular domain or host. For scopes other than global, the instance
44  * will only contain those settings that are different from the global.
45  *
46  * In the default implementation this is a one to one mapping from a file to
47  * an instance of this class, but in other implementations the information in
48  * an instance of this class might be stored in a different way (for example
49  * in a RDBMS).
50  *
51  * @author John Erik Halse
52  */

53 public class CrawlerSettings {
54     /** Registry of DataContainers for ComplexTypes in this settings object
55      * indexed on absolute name */

56     private final Map JavaDoc<String JavaDoc,DataContainer> localComplexTypes
57      = new HashMap JavaDoc<String JavaDoc,DataContainer>();
58
59     /** Registry of top level ModuleTypes in this settings object indexed on
60      * module name. These are modules that doesn't have parents in this
61      * settings object
62      */

63     private final Map JavaDoc<String JavaDoc,ModuleType> topLevelModules
64      = new HashMap JavaDoc<String JavaDoc,ModuleType>();
65
66     /** Registry of all ModuleTypes in this settings object indexed on
67      * module name.
68      */

69     private final Map JavaDoc<String JavaDoc,ComplexType> localModules
70      = new HashMap JavaDoc<String JavaDoc,ComplexType>();
71
72     /** Reference to the settings handler this settings object belongs to */
73     private final SettingsHandler settingsHandler;
74
75     /** Scope for this collection of settings (hostname) */
76     private final String JavaDoc scope;
77
78     /** List of refinements applied to this settings object */
79     private List JavaDoc<Refinement> refinements;
80
81     /** True if this settings object is a refinement */
82     private boolean isRefinement = false;
83
84     /** Name of this collection of settings */
85     private String JavaDoc name = "";
86
87     /** Description of this collection of settings */
88     private String JavaDoc description = "";
89
90     /**
91      * Operator of this crawl job.
92      */

93     private String JavaDoc operator = "Admin";
94
95     /**
96      * Organization running this crawl job.
97      */

98     private String JavaDoc organization = "";
99
100     /**
101      * Audience/recipient/customer on whose behalf this crawl is being run.
102      */

103     private String JavaDoc audience = "";
104
105
106     /** Time when this collection was last saved to persistent storage */
107     private Date JavaDoc lastSaved = null;
108
109     /**
110      * Constructs a new CrawlerSettings object.
111      *
112      * Application code should not call the constructor directly, but use the
113      * methods in SettingsHandler instead.
114      *
115      * @param handler The SettingsHandler this object belongs to.
116      * @param scope The scope of this settings object (ie. host or domain).
117      *
118      * @see SettingsHandler#getSettings(String)
119      * @see SettingsHandler#getSettingsObject(String)
120      */

121     public CrawlerSettings(SettingsHandler handler, String JavaDoc scope) {
122         this.settingsHandler = handler;
123         this.scope = scope;
124     }
125
126     /**
127     * Constructs a new CrawlerSettings object which is a refinement of another
128     * settings object.
129     *
130     * Application code should not call the constructor directly, but use the
131     * methods in SettingsHandler instead.
132     *
133     * @param handler The SettingsHandler this object belongs to.
134     * @param scope The scope of this settings object (ie. host or domain).
135     * @param refinement the name or reference to the refinement.
136     *
137     * @see SettingsHandler#getSettings(String)
138     * @see SettingsHandler#getSettingsObject(String)
139     */

140     public CrawlerSettings(SettingsHandler handler, String JavaDoc scope,
141             String JavaDoc refinement) {
142         this(handler, scope);
143         if (refinement != null && !refinement.equals("")) {
144             this.isRefinement = true;
145             this.name = refinement;
146         }
147     }
148
149     /** Get the description of this CrawlerSettings object.
150      *
151      * @return the description of this CrawlerSettings object.
152      */

153     public String JavaDoc getDescription() {
154         return description;
155     }
156
157     /** Get the name of this CrawlerSettings object.
158      *
159      * @return the name of this CrawlerSettings object.
160      */

161     public String JavaDoc getName() {
162         return name;
163     }
164
165     /**
166      * Get the name of operator of this crawl from this CrawlerSettings object.
167      *
168      * @return the name of this CrawlerSettings object.
169      */

170     public String JavaDoc getOperator() {
171         return operator;
172     }
173
174     /**
175      * Get the name of the organization running this crawl from this
176      * CrawlerSettings object.
177      *
178      * @return the name of the organization running this crawl.
179      */

180     public String JavaDoc getOrganization() {
181         return organization;
182     }
183
184     /**
185      * Get the audience/customer/recipient of the crawl job product from
186      * this CrawlerSettings object.
187      *
188      * @return the audience/customer/recipient of the crawl job product.
189      */

190     public String JavaDoc getAudience() {
191         return audience;
192     }
193
194     /** Get the scope of this CrawlerSettings object.
195      *
196      * @return the scope of this CrawlerSettings object.
197      */

198     public String JavaDoc getScope() {
199         return scope;
200     }
201
202     /** Set the description of this CrawlerSettings object.
203      *
204      * @param string the description to be set for this CrawlerSettings object.
205      */

206     public void setDescription(String JavaDoc string) {
207         description = string;
208     }
209
210     /**
211      * Set the operator of this crawl job.
212      * @param name Operator running this crawl.
213      */

214     public void setOperator(String JavaDoc name) {
215         this.operator = name;
216     }
217
218     /**
219      * Set the name of the organization who is running this crawl.
220      * @param name Name of organization running this crawl.
221      */

222     public void setOrganization(String JavaDoc name) {
223         this.organization = name;
224     }
225
226     /**
227      * Set the recipient/customer for the crawl job product.
228      * @param name Recipient of crawl job product.
229      */

230     public void setAudience(String JavaDoc name) {
231         this.audience = name;
232     }
233
234     /** Set the name of this CrawlerSettings object.
235      *
236      * @param string the name to be set for this CrawlerSettings object.
237      */

238     public void setName(String JavaDoc string) {
239         name = string;
240     }
241
242     /**
243      * Get the time when this CrawlerSettings was last saved to persistent
244      * storage.
245      *
246      * @return the time when this CrawlerSettings was last saved to persistent
247      * storage. Null if it has not been saved.
248      */

249     public Date JavaDoc getLastSavedTime() {
250         return lastSaved;
251     }
252
253     /**
254      * Set the time when this CrawlerSettings was last saved to persistent
255      * storage.
256      *
257      * @param lastSaved the time when this CrawlerSettings was last saved to
258      * persistent storage.
259      */

260     protected void setLastSavedTime(Date JavaDoc lastSaved) {
261         this.lastSaved = lastSaved;
262     }
263
264     protected void addTopLevelModule(ModuleType module) {
265 // if (topLevelModules.containsKey(module.getName())) {
266
// throw new IllegalArgumentException(
267
// "Duplicate module name: " + module.getName());
268
// } else {
269
topLevelModules.put(module.getName(), module);
270 // }
271
}
272
273     protected DataContainer addComplexType(ComplexType type) {
274         DataContainer data = new DataContainer(this, type);
275         localComplexTypes.put(type.getAbsoluteName(), data);
276         if (type instanceof ModuleType) {
277             localModules.put(type.getName(), type);
278         }
279         return data;
280     }
281
282     protected DataContainer getData(ComplexType complex) {
283         return getData(complex.getAbsoluteName());
284     }
285
286     protected DataContainer getData(String JavaDoc absoluteName) {
287         return (DataContainer) localComplexTypes.get(absoluteName);
288     }
289
290     protected ModuleType getTopLevelModule(String JavaDoc name) {
291         return (ModuleType) topLevelModules.get(name);
292     }
293
294     public ModuleType getModule(String JavaDoc name) {
295         return (ModuleType) localModules.get(name);
296     }
297
298     protected Iterator JavaDoc topLevelModules() {
299         return topLevelModules.values().iterator();
300     }
301
302     /** Get the parent of this CrawlerSettings object.
303      *
304      * @return the parent of this CrawlerSettings object.
305      */

306     public CrawlerSettings getParent() {
307         return getParent(null);
308     }
309
310     /**
311      * Get the parent of this CrawlerSettings object.
312      * This method passes around a URI so that refinements could be checked.
313      *
314      * @param uri The uri for which parents of this object shoul be found.
315      * @return the parent of this CrawlerSettings object.
316      */

317     public CrawlerSettings getParent(UURI uri) {
318         return (isRefinement())?
319             settingsHandler.getSettingsForHost(scope):
320             (scope == null || scope.equals(""))?
321                 null:
322                 settingsHandler.
323                     getSettings(settingsHandler.getParentScope(scope), uri);
324     }
325
326     /** Get the SettingHandler this CrawlerSettings object belongs to.
327      *
328      * @return the SettingHandler this CrawlerSettings object belongs to.
329      */

330     public SettingsHandler getSettingsHandler() {
331         return settingsHandler;
332     }
333
334     /**
335      * Get an <code>ListIterator</code> over the refinements for this
336      * settings object.
337      *
338      * @return Returns an iterator over the refinements.
339      */

340     public ListIterator JavaDoc refinementsIterator() {
341         if (refinements == null) {
342             refinements = new ArrayList JavaDoc<Refinement>();
343         }
344         return refinements.listIterator();
345     }
346
347     /**
348      * Add a refinement to this settings object.
349      *
350      * @param refinement The refinements to set.
351      */

352     public void addRefinement(Refinement refinement) {
353         if (refinements == null) {
354             refinements = new ArrayList JavaDoc<Refinement>();
355         }
356         this.refinements.remove(refinement);
357         this.refinements.add(refinement);
358     }
359
360     /**
361      * Remove a refinement from this settings object.
362      *
363      * @param reference the reference (name) to the refinement to be removed.
364      * @return true if something was removed, false if the refinement was not
365      * found.
366      */

367     public boolean removeRefinement(String JavaDoc reference) {
368         if (hasRefinements()) {
369             for(Iterator JavaDoc it = refinements.iterator(); it.hasNext();) {
370                 if (((Refinement) it.next()).getReference().equals(reference)) {
371                     it.remove();
372                     return true;
373                 }
374             }
375         }
376         return false;
377     }
378
379     /**
380      * Get a refinement with a given reference.
381      *
382      * @param reference the reference (name) to the refinement to get.
383      * @return the refinement having the specified reference or null if no
384      * refinement matches it.
385      */

386     public Refinement getRefinement(String JavaDoc reference) {
387         if (hasRefinements()) {
388             for(Iterator JavaDoc it = refinements.iterator(); it.hasNext();) {
389                 Refinement tmp = (Refinement) it.next();
390                 if (tmp.getReference().equals(reference)) {
391                     return tmp;
392                 }
393             }
394         }
395         return null;
396     }
397
398     /**
399      * Returns true if this settings object has refinements attached to it.
400      *
401      * @return true if this settings object has refinements attached to it.
402      */

403     public boolean hasRefinements() {
404         return refinements != null && !refinements.isEmpty();
405     }
406
407     /**
408      * Returns true if this settings object is a refinement.
409      *
410      * @return true if this settings object is a refinement.
411      */

412     public boolean isRefinement() {
413         return isRefinement;
414     }
415
416     /**
417      * Mark this settings object as an refinement.
418      *
419      * @param isRefinement Set this to true if this settings object is a
420      * refinement.
421      */

422     public void setRefinement(boolean isRefinement) {
423         this.isRefinement = isRefinement;
424     }
425 }
426
Popular Tags