KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > matuschek > spider > RobotTask


1 package net.matuschek.spider;
2
3 /*********************************************
4     Copyright (c) 2001 by Daniel Matuschek
5  *********************************************/

6
7 import java.net.MalformedURLException JavaDoc;
8 import java.net.URL JavaDoc;
9
10 import net.matuschek.http.HttpConstants;
11
12 /**
13  * The RobotTask implements a simple object that represents a task
14  * for the web robot.
15  *
16  * @author Daniel Matuschek
17  * @version $Id: RobotTask.java,v 1.7 2003/02/25 13:34:48 oliver_schmidt Exp $
18  */

19 public class RobotTask
20   implements Comparable JavaDoc
21 {
22
23   /**
24    * Creates a new RobotTask with the given parameters
25    *
26    * @param url - the URL of the RobotTask
27    * @param maxDepth - maximal search depth starting from this task
28    * @param referer - content of the HTTP Referer header, use "-" if
29    * you don't want to use a Referer
30    */

31   public RobotTask(URL JavaDoc url, int maxDepth, String JavaDoc referer) {
32     setUrl(url);
33     this.maxDepth=maxDepth;
34     this.referer=referer;
35   }
36   
37   /**
38    * Creates a new RobotTask with the given parameters
39    *
40    * @param urlString - the URL (as String) of the RobotTask
41    * @param maxDepth - maximal search depth starting from this task
42    * @param referer - content of the HTTP Referer header, use "-" if
43    * you don't want to use a Referer
44    */

45   public RobotTask(String JavaDoc urlString, int maxDepth, String JavaDoc referer) {
46     this.urlString = urlString;
47     this.maxDepth=maxDepth;
48     this.referer=referer;
49   }
50   
51   public URL JavaDoc getUrl() {
52     try {
53         return new URL JavaDoc(urlString);
54     } catch (MalformedURLException JavaDoc e) {
55         e.printStackTrace();
56         return null;
57     }
58   }
59   
60   public void setUrl(URL JavaDoc url) {
61     urlString = url.toString();
62     hashCode = 0;
63   }
64   
65   public int getMaxDepth() {
66     return maxDepth;
67   }
68   
69   public void setMaxDepth(int maxDepth) {
70     this.maxDepth = maxDepth;
71   }
72   
73   public String JavaDoc getReferer() {
74     return referer;
75   }
76   
77   public void setReferer(String JavaDoc referer) {
78     this.referer = referer;
79   }
80
81   public int getMethod() {
82     return method;
83   }
84
85   public void setMethod(int method) {
86     this.method = method;
87     hashCode = 0;
88   }
89
90   public String JavaDoc getParamString() {
91     return paramString;
92   }
93
94   public void setParamString(String JavaDoc paramString) {
95     this.paramString = paramString;
96     hashCode = 0;
97   }
98
99
100
101
102   /**
103    * two RobotTasks are equal, if they represent the
104    * same URL
105    */

106   public boolean equals(Object JavaDoc o) {
107     try {
108       return (compareTo(o) == 0);
109     } catch (ClassCastException JavaDoc e) {
110       return false;
111     }
112   }
113
114   
115   /**
116    * Implements a natural order for RobotTasks. This is based
117    * on
118    * @param o another RobotTask object to compare to
119    * @return 0 if o is equal to this object, 1 if it is smaller,
120    * -1 otherwise
121    * @exception ClassCastException if o is no RobotTask object
122    */

123   public int compareTo(Object JavaDoc o)
124     throws ClassCastException JavaDoc
125   {
126     RobotTask r = (RobotTask)o;
127
128     if (r == null) {
129       throw new ClassCastException JavaDoc("object to compare to is null");
130     }
131
132     int diff = hashCode() - r.hashCode();
133     if (diff == 0) {
134         String JavaDoc me = this.getInternalStringRepresentation();
135         String JavaDoc it = r.getInternalStringRepresentation();
136         diff = me.compareTo(it);
137     }
138
139     return diff;
140     }
141
142
143   /**
144    * Gets a String representation for this RobotTask object. Format
145    * may change without notice. Should be used for debugging and logging
146    * only.
147    * @return a String represantation for this task
148    */

149   public String JavaDoc toString() {
150     return urlString + " " + paramString + " Method " + method;
151   }
152
153
154   /**
155    * Gets a hashcode for this object. It is based on the String hash code
156    * implementation used with the internal string representation of this
157    * object
158    */

159   public int hashCode() {
160     if (hashCode != 0) {
161         return hashCode;
162     }
163     hashCode = getInternalStringRepresentation().hashCode();
164     return hashCode;
165   }
166
167   /**
168    * Gets an internal String representation for comparisons
169    * and hash code generation.
170    *
171    * Currently this contains the url, the parameters and the method.
172    * Because this is only used as an internal key and the URL and
173    * parameters can be very long, I have decided to use the MD5 hash of
174    * the longer representation.
175    *
176    * @return a String that should be unique for every object
177    */

178   public String JavaDoc getInternalStringRepresentation() {
179       return (paramString == null && method == HttpConstants.GET)
180         ? urlString
181         : urlString + paramString + method;
182   }
183   
184 // protected URL url; -> urlString holds all the information to save memory
185
private int maxDepth;
186   private String JavaDoc referer;
187   protected int method=HttpConstants.GET;
188   protected String JavaDoc paramString=null;
189   
190   protected int hashCode = 0; // cached hashCode for quick retrieval and comparison
191
protected String JavaDoc urlString; // URL as String because the class URL uses lots of space
192
protected int retries = 0; // number of retries
193

194 /*
195  * Increases retries and returns increased value.
196  */

197 public int retry() { return ++retries; }
198
199 }
200
Popular Tags