1 package net.javacoding.jspider.core.model; 2 3 import net.javacoding.jspider.api.model.*; 4 import net.javacoding.jspider.Constants; 5 import net.javacoding.jspider.core.storage.spi.StorageSPI; 6 7 import java.net.URL ; 8 9 12 public class SiteInternal implements Site { 13 14 protected StorageSPI storage; 15 16 protected int id; 17 protected boolean handle; 18 protected URL url; 19 protected String host; 20 protected int port; 21 protected int hashCode; 22 protected boolean isBaseSite; 23 protected boolean hasRobotsTXT; 24 protected boolean useCookies; 25 protected boolean useProxy; 26 protected int state; 27 protected boolean obeyRobotsTXT; 28 protected boolean fetchRobotsTXT; 29 protected String userAgent; 30 31 public SiteInternal ( StorageSPI storage, int id, boolean handle, URL url, boolean isBaseSite ) { 32 this(storage, id, handle, url, Site.STATE_DISCOVERED, true, true, true, Constants.USERAGENT, isBaseSite ); 33 } 34 35 public SiteInternal ( int id, StorageSPI storage, URL url ) { 36 this ( storage, id, true, url, false ); 37 } 38 39 public SiteInternal ( StorageSPI storage, int id, boolean handle, URL url, int state, boolean obeyRobotsTXT, boolean useProxy, boolean useCookies,String userAgent, boolean isBaseSite ) { 40 this.storage = storage; 41 this.handle = handle; 42 this.id = id; 43 this.url = url; 44 this.host = url.getHost(); 45 this.port = url.getPort(); 46 this.state = state; 47 this.obeyRobotsTXT = obeyRobotsTXT; 48 this.useProxy = useProxy; 49 this.useCookies = useCookies; 50 this.userAgent = userAgent; 51 this.isBaseSite = isBaseSite; 52 hashCode = (host + port).hashCode(); 53 } 54 55 public int getId ( ) { 56 return id; 57 } 58 59 public void setId ( int id) { 60 this.id = id; 61 } 62 63 public int getState() { 64 return state; 65 } 66 67 public String getHost() { 68 return host; 69 } 70 71 public int getPort() { 72 return port; 73 } 74 75 public boolean isRobotsTXTHandled() { 76 return (state == Site.STATE_ROBOTSTXT_HANDLED) || (state == Site.STATE_ROBOTSTXT_UNEXISTING) || (state == Site.STATE_ROBOTSTXT_ERROR ) || (state == Site.STATE_ROBOTSTXT_SKIPPED); 77 } 78 79 public URL getURL() { 80 return url; 81 } 82 83 public Folder[] getRootFolders() { 84 return storage.getFolderDAO().findSiteRootFolders(this); 85 } 86 87 public Folder getRootFolder(String name) { 88 Folder[] folders = getRootFolders(); 89 for (int i = 0; i < folders.length; i++) { 90 Folder folder = folders[i]; 91 if ( folder.getName().equals(name)) { 92 return folder; 93 } 94 } 95 return null; 96 } 97 98 public Resource[] getRootResources() { 99 return storage.getResourceDAO().getRootResources(this); 100 } 101 102 public Resource[] getAllResources() { 103 return storage.getResourceDAO().getBySite(this); 104 } 105 106 public Cookie[] getCookies() { 107 return storage.getCookieDAO().find(id); 108 } 109 110 public String getCookieString() { 111 Cookie[] c = getCookies(); 112 StringBuffer sb = new StringBuffer (); 113 for (int i = 0; i < c.length; i++) { 114 Cookie cookie = c[i]; 115 sb.append(cookie.getName()); 116 sb.append("="); 117 sb.append(cookie.getValue()); 118 sb.append("; "); 119 } 120 return sb.toString(); 121 } 122 123 public boolean getUseCookies() { 124 return useCookies; 125 } 126 127 public boolean equals(Object object) { 128 if (object instanceof Site) { 129 Site other = (Site) object; 130 return (other.getHost().equalsIgnoreCase(host) && other.getPort() == port); 131 } else { 132 return false; 133 } 134 } 135 136 public int hashCode() { 137 return hashCode; 138 } 139 140 public boolean getUseProxy() { 141 return useProxy; 142 } 143 144 public void registerNoRobotsTXTFound ( ) { 145 state = Site.STATE_ROBOTSTXT_UNEXISTING; 146 } 147 148 public void registerRobotsTXTError() { 149 state = Site.STATE_ROBOTSTXT_ERROR; 150 } 151 152 public void registerRobotsTXT() { 153 this.state = Site.STATE_ROBOTSTXT_HANDLED; 154 } 155 156 public void registerRobotsTXTSkipped() { 157 this.state = Site.STATE_ROBOTSTXT_SKIPPED; 158 } 159 160 public void setUseCookies(boolean useCookies) { 161 this.useCookies = useCookies; 162 } 163 164 public void setUseProxy(boolean useProxy) { 165 this.useProxy = useProxy; 166 } 167 168 public void setObeyRobotsTXT(boolean obey) { 169 this.obeyRobotsTXT = obey; 170 } 171 172 public boolean getObeyRobotsTXT() { 173 return this.obeyRobotsTXT; 174 } 175 176 public boolean getFetchRobotsTXT ( ) { 177 return fetchRobotsTXT; 178 } 179 180 public void setFetchRobotsTXT ( boolean fetchRobotsTXT ) { 181 this.fetchRobotsTXT = fetchRobotsTXT; 182 } 183 184 public String translateState ( ) { 185 switch ( state ) { 186 case Site.STATE_DISCOVERED: 187 return "DISCOVERED"; 188 case Site.STATE_ROBOTSTXT_ERROR: 189 return "ROBOTSTXT_ERROR"; 190 case Site.STATE_ROBOTSTXT_UNEXISTING: 191 return "ROBOTSTXT_UNEXISTING"; 192 case Site.STATE_ROBOTSTXT_HANDLED: 193 return "ROBOTSTXT_HANDLED"; 194 case Site.STATE_ROBOTSTXT_SKIPPED: 195 return "ROBOTSTXT_SKIPPED"; 196 } 197 return "<ERROR_UNKNOWN_STATE>"; 198 } 199 200 public String toString ( ) { 201 return "[Site: " + url + " - " + translateState() + ((isBaseSite)?" *":"") + "]"; 202 } 203 204 public String getUserAgent() { 205 return userAgent; 206 } 207 208 public void setUserAgent ( String userAgent ) { 209 this.userAgent = userAgent; 210 } 211 212 public void setBaseSite ( boolean isBaseSite ) { 213 this.isBaseSite = isBaseSite; 214 } 215 216 public boolean isBaseSite ( ) { 217 return isBaseSite; 218 } 219 220 public boolean mustHandle ( ) { 221 return handle; 222 } 223 224 227 public boolean getMustHandle ( ) { 228 return handle; 229 } 230 231 public void setHandle ( boolean mustHandle ) { 232 this.handle = mustHandle; 233 } 234 235 } 236 | Popular Tags |