KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > javacoding > jspider > core > storage > memory > ResourceDAOImpl


1 package net.javacoding.jspider.core.storage.memory;
2
3 import net.javacoding.jspider.core.event.impl.*;
4 import net.javacoding.jspider.core.model.*;
5 import net.javacoding.jspider.core.storage.spi.ResourceDAOSPI;
6 import net.javacoding.jspider.core.storage.spi.StorageSPI;
7 import net.javacoding.jspider.core.storage.exception.InvalidStateTransitionException;
8 import net.javacoding.jspider.core.util.URLUtil;
9
10 import java.net.URL JavaDoc;
11 import java.util.*;
12
13 /**
14  * $Id: ResourceDAOImpl.java,v 1.12 2003/04/11 16:37:07 vanrogu Exp $
15  */

16 class ResourceDAOImpl implements ResourceDAOSPI {
17
18     protected StorageSPI storage;
19
20     protected Map knownURLs;
21     protected Map byId;
22
23     protected Set spideredResources; /* urls visited by a spider, but not yet parsed */
24
25     protected Set ignoredForFetchingResources; /* urls ignored because of rule decisions */
26     protected Set ignoredForParsingResources; /* urls ignored because non-HTML */
27     protected Set forbiddenResources; /* forbidden urls */
28     protected Set fetchErrorResources; /* urls that could not be visited by the spider */
29     protected Set parseErrorResources; /* resources that could not be parsed correctly */
30     protected Set parsedResources; /* urls that were spidered AND interpreted */
31
32     protected Map referers;
33     protected Map referees;
34
35     protected Map byFolder;
36     protected Map rootResources;
37
38     public ResourceDAOImpl(StorageSPI storage) {
39         this.storage = storage;
40         spideredResources = new HashSet();
41         ignoredForFetchingResources = new HashSet();
42         ignoredForParsingResources = new HashSet();
43         forbiddenResources = new HashSet();
44         fetchErrorResources = new HashSet();
45         parseErrorResources = new HashSet();
46         parsedResources = new HashSet();
47         knownURLs = new HashMap();
48         this.byId = new HashMap();
49         this.referees = new HashMap();
50         this.referers = new HashMap();
51         this.byFolder = new HashMap();
52         this.rootResources = new HashMap();
53     }
54
55     public void create(int id, ResourceInternal resource) {
56         URL JavaDoc url = resource.getURL();
57             knownURLs.put(url, resource);
58             byId.put(new Integer JavaDoc(id), resource);
59
60             if (resource.getFolder() == null) {
61                 Set set = (Set) rootResources.get(URLUtil.getSiteURL(url));
62                 if (set == null) {
63                     set = new HashSet();
64                     rootResources.put(URLUtil.getSiteURL(url), set);
65                 }
66                 set.add(resource);
67             } else {
68                 Set set = (Set) byFolder.get(resource.getFolder());
69                 if (set == null) {
70                     set = new HashSet();
71                     byFolder.put(resource.getFolder(), set);
72                 }
73                 set.add(resource);
74             }
75     }
76
77     public void registerURLReference(URL JavaDoc url, URL JavaDoc refererURL) {
78         ResourceInternal resource = (ResourceInternal) knownURLs.get(url);
79         if (refererURL != null) {
80             ResourceInternal referer = (ResourceInternal) knownURLs.get(refererURL);
81             storeRef(referers, resource, referer, refererURL, url);
82             storeRef(referees, referer, resource, refererURL, url);
83         }
84     }
85
86     public ResourceInternal[] findByFolder(FolderInternal folder) {
87         Set set = (Set) byFolder.get(folder);
88         if (set == null) {
89             return new ResourceInternal[0];
90         }
91         return (ResourceInternal[]) set.toArray(new ResourceInternal[set.size()]);
92     }
93
94     protected void storeRef(Map map, ResourceInternal key, ResourceInternal data, URL JavaDoc referer, URL JavaDoc referee) {
95         Map refmap = (Map) map.get(key.getURL());
96         if (refmap == null) {
97             refmap = new HashMap();
98             map.put(key.getURL(), refmap);
99         }
100         ResourceReferenceInternal rri = (ResourceReferenceInternal) refmap.get(data.getURL());
101         if (rri == null) {
102             rri = new ResourceReferenceInternal(storage, referer, referee, 0);
103             refmap.put(data.getURL(), rri);
104         }
105         rri.incrementCount();
106     }
107
108     public ResourceInternal[] findAllResources() {
109         return (ResourceInternal[]) knownURLs.values().toArray(new ResourceInternal[knownURLs.size()]);
110     }
111
112     public ResourceInternal[] getRefereringResources(ResourceInternal resource) {
113         ResourceReferenceInternal[] refs = getIncomingReferences(resource);
114         ArrayList al = new ArrayList();
115         for (int i = 0; i < refs.length; i++) {
116             ResourceReferenceInternal ref = refs[i];
117             al.add(ref.getReferer());
118         }
119         return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]);
120     }
121
122     public ResourceReferenceInternal[] getOutgoingReferences(ResourceInternal resource) {
123         Map map = (Map) referees.get(resource.getURL());
124         if (map == null) {
125             return new ResourceReferenceInternal[0];
126         } else {
127             return (ResourceReferenceInternal[]) map.values().toArray(new ResourceReferenceInternal[map.size()]);
128         }
129     }
130
131     public ResourceReferenceInternal[] getIncomingReferences(ResourceInternal resource) {
132         Map map = (Map) referers.get(resource.getURL());
133         if (map == null) {
134             return new ResourceReferenceInternal[0];
135         } else {
136             return (ResourceReferenceInternal[]) map.values().toArray(new ResourceReferenceInternal[map.size()]);
137         }
138     }
139
140     public ResourceInternal[] getReferencedResources(ResourceInternal resource) {
141         ResourceReferenceInternal[] refs = getOutgoingReferences(resource);
142         ArrayList al = new ArrayList();
143         for (int i = 0; i < refs.length; i++) {
144             ResourceReferenceInternal ref = refs[i];
145             al.add(ref.getReferee());
146         }
147         return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]);
148     }
149
150     public ResourceInternal[] getBySite(SiteInternal site) {
151         ArrayList al = new ArrayList();
152         Iterator it = knownURLs.keySet().iterator();
153         while (it.hasNext()) {
154             URL JavaDoc url = (URL JavaDoc) it.next();
155             URL JavaDoc siteURL = URLUtil.getSiteURL(url);
156             if (site.getURL().equals(siteURL)) {
157                 al.add(getResource(url));
158             }
159         }
160         return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]);
161     }
162
163     public ResourceInternal[] getRootResources(SiteInternal site) {
164         Set set = (Set) rootResources.get(site.getURL());
165         if ( set == null ) {
166             return new ResourceInternal[0];
167         } else {
168             return (ResourceInternal[]) set.toArray(new ResourceInternal[set.size()]);
169         }
170     }
171
172     public ResourceInternal getResource(int id) {
173         return (ResourceInternal)byId.get(new Integer JavaDoc(id));
174     }
175
176     public ResourceInternal getResource(URL JavaDoc url) {
177         return (ResourceInternal) knownURLs.get(url);
178     }
179
180     public synchronized void setSpidered(URL JavaDoc url, URLSpideredOkEvent event) {
181         ResourceInternal resource = getResource(url);
182         resource.setFetched(event.getHttpStatus(), event.getSize(), event.getTimeMs(), event.getMimeType(), null, event.getHeaders());
183         resource.setBytes(event.getBytes());
184     }
185
186     public synchronized void setIgnoredForParsing(URL JavaDoc url) throws InvalidStateTransitionException {
187         ResourceInternal resource = getResource(url);
188         resource.setParseIgnored();
189         ignoredForParsingResources.add(url);
190     }
191
192     public synchronized void setIgnoredForFetching(URL JavaDoc url, URLFoundEvent event) throws InvalidStateTransitionException {
193         ResourceInternal resource = getResource(url);
194         resource.setFetchIgnored();
195         ignoredForFetchingResources.add(event.getFoundURL());
196     }
197
198     public synchronized void setForbidden(URL JavaDoc url, URLFoundEvent event) throws InvalidStateTransitionException {
199         ResourceInternal resource = getResource(url);
200         resource.setForbidden();
201         forbiddenResources.add(event.getFoundURL());
202     }
203
204     public synchronized void setError(URL JavaDoc url, ResourceParsedErrorEvent event) throws InvalidStateTransitionException {
205         ResourceInternal resource = getResource(url);
206         resource.setParseError();
207         parseErrorResources.add(url);
208     }
209
210     public synchronized void setParsed(URL JavaDoc url, ResourceParsedOkEvent event) throws InvalidStateTransitionException {
211         ResourceInternal resource = getResource(url);
212         resource.setParsed();
213         parsedResources.add(resource);
214     }
215
216     public synchronized void setError(URL JavaDoc url, URLSpideredErrorEvent event) throws InvalidStateTransitionException {
217         ResourceInternal resource = getResource(url);
218         resource.setFetchError(event.getHttpStatus(), event.getHeaders());
219         fetchErrorResources.add(url);
220     }
221
222
223 }
224
Popular Tags