KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > matuschek > spider > HashedMemoryTaskList


1 package net.matuschek.spider;
2
3 /*********************************************
4      Copyright (c) 2001 by Daniel Matuschek
5  *********************************************/

6
7 import java.util.HashSet JavaDoc;
8 import java.util.LinkedList JavaDoc;
9
10 /**
11  * Memory based implementation of the TaskList interface.
12  * Uses a LinkedList for sequential access
13  * and a HashSet for fast retrieval
14  * To be thread safe, all methods are synchronized.
15  * <p>
16  * @author Daniel Matuschek, Jo Christen, Oliver Schmidt
17  * @version $Id: HashedMemoryTaskList.java,v 1.7 2004/08/09 13:07:22 matuschd Exp $*
18  */

19 public class HashedMemoryTaskList implements TaskList {
20
21    public static int DEFAULT_CAPACITY = 50000;
22    
23   /**
24    * Task store as linked list for sequential access.
25    *
26    * @link aggregation
27    * @associates <{RobotTask}>
28    */

29   //private Vector list = new Vector();
30
private LinkedList JavaDoc list = new LinkedList JavaDoc();
31
32   /**
33    * Task store as hash set for fast searching
34    * In this list the internal string represantation of
35    * the task will be stored.
36    *
37    * @link aggregation
38    * @associates <{RobotTask}>
39    */

40   private HashSet JavaDoc hashset = null;
41
42
43   /**
44    * Defines, if sequential access if allowed.
45    *
46    * Under some circumstances it may be useful to NOT have
47    * sequential access but only to find, if an object is in
48    * the list. E.g. this makes sense for the list of URLs
49    * already visited.
50    */

51   private boolean sequentialAccess;
52
53
54   /**
55    * Standard constructor.
56    * (sequentialAccess=true, initialCapacity=DEFAULT_CAPACITY)
57    */

58   public HashedMemoryTaskList() {
59     this(true, DEFAULT_CAPACITY);
60   }
61
62
63   /**
64    * Constructor with sequentialAccess-flag
65    *
66    * @param sequentialAccess can be set to false if the removeFirst
67    * and elementAt methods are not used. In this case, storage
68    * of a task will need less memory. This can be interesting if you
69    * have to store lots of objects in this list.
70    */

71   public HashedMemoryTaskList(boolean sequentialAccess) {
72     this(sequentialAccess, DEFAULT_CAPACITY);
73   }
74
75   /**
76    * Constructor with sequentialAccess-flag and initialCapacity.
77    *
78    * @param sequentialAccess can be set to false if the removeFirst
79    * @param initial capacity (expected document count)
80    * and elementAt methods are not used. In this case, storage
81    * of a task will need less memory. This can be interesting if you
82    * have to store lots of objects in this list.
83    */

84   public HashedMemoryTaskList(boolean sequentialAccess, int initialCapacity) {
85     this.sequentialAccess = sequentialAccess;
86     this.hashset = new HashSet JavaDoc(initialCapacity);
87   }
88
89
90
91   /**
92    * Add a task to the end of the list
93    *
94    * @param task a RobotTask object to store in the queue
95    */

96   public synchronized void add(RobotTask task) {
97     if (this.sequentialAccess) {
98       list.add(task);
99     }
100     hashset.add(task);
101   }
102
103
104   /**
105    * Add a task at the beginning of list
106    * @param task a RobotTask object to store in the queue
107    */

108   public synchronized void addAtStart(RobotTask task) {
109     if (this.sequentialAccess) {
110       list.add(0,task);
111     }
112     // we need to put a dummy value object to the HashTable, adding
113
// null as value do not work
114
hashset.add(task);
115   }
116
117
118   /**
119    * Clean up the list, remove all objects
120    */

121   public synchronized void clear() {
122     list.clear();
123     hashset.clear();
124   }
125
126
127   /**
128    * Is this robot task stored in the list ?
129    */

130   public synchronized boolean contains(RobotTask task) {
131     return hashset.contains(task);
132   }
133
134
135   /**
136    * Remove this object from the list
137    */

138   public synchronized boolean remove(RobotTask task) {
139     hashset.remove(task);
140     if (this.sequentialAccess) {
141       return list.remove(task);
142     } else {
143       return true;
144     }
145   }
146
147   
148   /**
149    * Get and remove the first element.
150    *
151    * @return the first task in the list. This object will also be removed
152    * from the list.
153    * @exception ArrayIndexOutOfBoundsException if the list is empty or
154    * sequential access is not allowed
155    */

156   public synchronized RobotTask removeFirst()
157     throws ArrayIndexOutOfBoundsException JavaDoc
158   {
159     if (! this.sequentialAccess) {
160       throw
161     new ArrayIndexOutOfBoundsException JavaDoc("sequential access not allowed");
162     }
163     //RobotTask task = (RobotTask)list.elementAt(0);
164
//list.removeElementAt(0);
165
RobotTask task = (RobotTask) list.removeFirst();
166     hashset.remove(task);
167     return task;
168   }
169
170
171   /**
172    * Returns the number of elements in this list
173    */

174   public synchronized int size() {
175     return hashset.size();
176   }
177
178
179   /**
180    * Get the n-th element in the list. Elements are numbered form 0 to
181    * size-1.
182    * @param position
183    * @exception ArrayIndexOutOfBoundsException if the given position doesn't
184    * exist
185    */

186   public synchronized RobotTask elementAt(int position)
187     throws ArrayIndexOutOfBoundsException JavaDoc
188   {
189     if (! this.sequentialAccess) {
190       throw
191     new ArrayIndexOutOfBoundsException JavaDoc("sequential access not allowed");
192     }
193     //return (RobotTask)(list.elementAt(position));
194
return (RobotTask)(list.get(position));
195   }
196
197 }
198
Popular Tags