| 1 package net.javacoding.jspider.core.storage.memory; 2 3 import net.javacoding.jspider.core.event.impl.*; 4 import net.javacoding.jspider.core.model.*; 5 import net.javacoding.jspider.core.storage.spi.ResourceDAOSPI; 6 import net.javacoding.jspider.core.storage.spi.StorageSPI; 7 import net.javacoding.jspider.core.storage.exception.InvalidStateTransitionException; 8 import net.javacoding.jspider.core.util.URLUtil; 9 10 import java.net.URL ; 11 import java.util.*; 12 13 16 class ResourceDAOImpl implements ResourceDAOSPI { 17 18 protected StorageSPI storage; 19 20 protected Map knownURLs; 21 protected Map byId; 22 23 protected Set spideredResources; 24 25 protected Set ignoredForFetchingResources; 26 protected Set ignoredForParsingResources; 27 protected Set forbiddenResources; 28 protected Set fetchErrorResources; 29 protected Set parseErrorResources; 30 protected Set parsedResources; 31 32 protected Map referers; 33 protected Map referees; 34 35 protected Map byFolder; 36 protected Map rootResources; 37 38 public ResourceDAOImpl(StorageSPI storage) { 39 this.storage = storage; 40 spideredResources = new HashSet(); 41 ignoredForFetchingResources = new HashSet(); 42 ignoredForParsingResources = new HashSet(); 43 forbiddenResources = new HashSet(); 44 fetchErrorResources = new HashSet(); 45 parseErrorResources = new HashSet(); 46 parsedResources = new HashSet(); 47 knownURLs = new HashMap(); 48 this.byId = new HashMap(); 49 this.referees = new HashMap(); 50 this.referers = new HashMap(); 51 this.byFolder = new HashMap(); 52 this.rootResources = new HashMap(); 53 } 54 55 public void create(int id, ResourceInternal resource) { 56 URL url = resource.getURL(); 57 knownURLs.put(url, resource); 58 byId.put(new Integer (id), resource); 59 60 if (resource.getFolder() == null) { 61 Set set = (Set) rootResources.get(URLUtil.getSiteURL(url)); 62 if (set == null) { 63 set = new HashSet(); 64 rootResources.put(URLUtil.getSiteURL(url), set); 65 } 66 set.add(resource); 67 } else { 68 Set set = (Set) byFolder.get(resource.getFolder()); 69 if (set == null) { 70 set = new HashSet(); 71 byFolder.put(resource.getFolder(), set); 72 } 73 set.add(resource); 74 } 75 } 76 77 public void registerURLReference(URL url, URL refererURL) { 78 ResourceInternal resource = (ResourceInternal) knownURLs.get(url); 79 if (refererURL != null) { 80 ResourceInternal referer = (ResourceInternal) knownURLs.get(refererURL); 81 storeRef(referers, resource, referer, refererURL, url); 82 storeRef(referees, referer, resource, refererURL, url); 83 } 84 } 85 86 public ResourceInternal[] findByFolder(FolderInternal folder) { 87 Set set = (Set) byFolder.get(folder); 88 if (set == null) { 89 return new ResourceInternal[0]; 90 } 91 return (ResourceInternal[]) set.toArray(new ResourceInternal[set.size()]); 92 } 93 94 protected void storeRef(Map map, ResourceInternal key, ResourceInternal data, URL referer, URL referee) { 95 Map refmap = (Map) map.get(key.getURL()); 96 if (refmap == null) { 97 refmap = new HashMap(); 98 map.put(key.getURL(), refmap); 99 } 100 ResourceReferenceInternal rri = (ResourceReferenceInternal) refmap.get(data.getURL()); 101 if (rri == null) { 102 rri = new ResourceReferenceInternal(storage, referer, referee, 0); 103 refmap.put(data.getURL(), rri); 104 } 105 rri.incrementCount(); 106 } 107 108 public ResourceInternal[] findAllResources() { 109 return (ResourceInternal[]) knownURLs.values().toArray(new ResourceInternal[knownURLs.size()]); 110 } 111 112 public ResourceInternal[] getRefereringResources(ResourceInternal resource) { 113 ResourceReferenceInternal[] refs = getIncomingReferences(resource); 114 ArrayList al = new ArrayList(); 115 for (int i = 0; i < refs.length; i++) { 116 ResourceReferenceInternal ref = refs[i]; 117 al.add(ref.getReferer()); 118 } 119 return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]); 120 } 121 122 public ResourceReferenceInternal[] getOutgoingReferences(ResourceInternal resource) { 123 Map map = (Map) referees.get(resource.getURL()); 124 if (map == null) { 125 return new ResourceReferenceInternal[0]; 126 } else { 127 return (ResourceReferenceInternal[]) map.values().toArray(new ResourceReferenceInternal[map.size()]); 128 } 129 } 130 131 public ResourceReferenceInternal[] getIncomingReferences(ResourceInternal resource) { 132 Map map = (Map) referers.get(resource.getURL()); 133 if (map == null) { 134 return new ResourceReferenceInternal[0]; 135 } else { 136 return (ResourceReferenceInternal[]) map.values().toArray(new ResourceReferenceInternal[map.size()]); 137 } 138 } 139 140 public ResourceInternal[] getReferencedResources(ResourceInternal resource) { 141 ResourceReferenceInternal[] refs = getOutgoingReferences(resource); 142 ArrayList al = new ArrayList(); 143 for (int i = 0; i < refs.length; i++) { 144 ResourceReferenceInternal ref = refs[i]; 145 al.add(ref.getReferee()); 146 } 147 return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]); 148 } 149 150 public ResourceInternal[] getBySite(SiteInternal site) { 151 ArrayList al = new ArrayList(); 152 Iterator it = knownURLs.keySet().iterator(); 153 while (it.hasNext()) { 154 URL url = (URL ) it.next(); 155 URL siteURL = URLUtil.getSiteURL(url); 156 if (site.getURL().equals(siteURL)) { 157 al.add(getResource(url)); 158 } 159 } 160 return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]); 161 } 162 163 public ResourceInternal[] getRootResources(SiteInternal site) { 164 Set set = (Set) rootResources.get(site.getURL()); 165 if ( set == null ) { 166 return new ResourceInternal[0]; 167 } else { 168 return (ResourceInternal[]) set.toArray(new ResourceInternal[set.size()]); 169 } 170 } 171 172 public ResourceInternal getResource(int id) { 173 return (ResourceInternal)byId.get(new Integer (id)); 174 } 175 176 public ResourceInternal getResource(URL url) { 177 return (ResourceInternal) knownURLs.get(url); 178 } 179 180 public synchronized void setSpidered(URL url, URLSpideredOkEvent event) { 181 ResourceInternal resource = getResource(url); 182 resource.setFetched(event.getHttpStatus(), event.getSize(), event.getTimeMs(), event.getMimeType(), null, event.getHeaders()); 183 resource.setBytes(event.getBytes()); 184 } 185 186 public synchronized void setIgnoredForParsing(URL url) throws InvalidStateTransitionException { 187 ResourceInternal resource = getResource(url); 188 resource.setParseIgnored(); 189 ignoredForParsingResources.add(url); 190 } 191 192 public synchronized void setIgnoredForFetching(URL url, URLFoundEvent event) throws InvalidStateTransitionException { 193 ResourceInternal resource = getResource(url); 194 resource.setFetchIgnored(); 195 ignoredForFetchingResources.add(event.getFoundURL()); 196 } 197 198 public synchronized void setForbidden(URL url, URLFoundEvent event) throws InvalidStateTransitionException { 199 ResourceInternal resource = getResource(url); 200 resource.setForbidden(); 201 forbiddenResources.add(event.getFoundURL()); 202 } 203 204 public synchronized void setError(URL url, ResourceParsedErrorEvent event) throws InvalidStateTransitionException { 205 ResourceInternal resource = getResource(url); 206 resource.setParseError(); 207 parseErrorResources.add(url); 208 } 209 210 public synchronized void setParsed(URL url, ResourceParsedOkEvent event) throws InvalidStateTransitionException { 211 ResourceInternal resource = getResource(url); 212 resource.setParsed(); 213 parsedResources.add(resource); 214 } 215 216 public synchronized void setError(URL url, URLSpideredErrorEvent event) throws InvalidStateTransitionException { 217 ResourceInternal resource = getResource(url); 218 resource.setFetchError(event.getHttpStatus(), event.getHeaders()); 219 fetchErrorResources.add(url); 220 } 221 222 223 } 224 | Popular Tags |