1 package net.javacoding.jspider.core.storage.jdbc; 2 3 import net.javacoding.jspider.core.event.impl.*; 4 import net.javacoding.jspider.core.model.*; 5 import net.javacoding.jspider.core.storage.spi.ResourceDAOSPI; 6 import net.javacoding.jspider.core.storage.spi.StorageSPI; 7 import net.javacoding.jspider.core.storage.exception.InvalidStateTransitionException; 8 import net.javacoding.jspider.core.logging.LogFactory; 9 import net.javacoding.jspider.core.logging.Log; 10 11 import java.net.MalformedURLException ; 12 import java.net.URL ; 13 import java.sql.*; 14 import java.util.ArrayList ; 15 16 19 class ResourceDAOImpl implements ResourceDAOSPI { 20 21 public static final String ATTRIBUTE_ID = "id"; 22 public static final String ATTRIBUTE_SITE = "site"; 23 public static final String ATTRIBUTE_URL = "url"; 24 public static final String ATTRIBUTE_STATE = "state"; 25 public static final String ATTRIBUTE_MIME = "mimetype"; 26 public static final String ATTRIBUTE_TIME = "timems"; 27 public static final String ATTRIBUTE_SIZE = "size"; 28 public static final String ATTRIBUTE_FOLDER = "folder"; 29 public static final String ATTRIBUTE_HTTP_STATUS = "httpstatus"; 30 31 protected DBUtil dbUtil; 32 protected StorageSPI storage; 33 protected Log log; 34 35 public ResourceDAOImpl(StorageSPI storage, DBUtil dbUtil) { 36 this.storage = storage; 37 this.dbUtil = dbUtil; 38 this.log = LogFactory.getLog(ResourceDAOImpl.class); 39 } 40 41 public void registerURLReference(URL url, URL refererURL) { 42 ResourceInternal resource = getResource(url); 43 Statement st = null; 44 ResultSet rs = null; 45 if (refererURL != null) { 46 ResourceInternal referer = getResource(refererURL); 47 try { 48 int from = referer.getId(); 49 int to = resource.getId(); 50 Connection connection = dbUtil.getConnection(); 51 52 st = connection.createStatement(); 53 rs = st.executeQuery("select count(*) from jspider_resource_reference where referer = " + from + " and referee = " + to); 54 rs.next(); 55 Statement st2 = connection.createStatement(); 56 if (rs.getInt(1) == 0) { 57 st2.executeUpdate("insert into jspider_resource_reference ( referer, referee, count ) values (" + from + "," + to + ", 1)"); 58 } else { 59 st2.executeUpdate("update jspider_resource_reference set count = count + 1 where referer = " + from + " and referee = " + to); 60 } 61 } catch (SQLException e) { 62 log.error("SQLException", e); 63 } finally { 64 dbUtil.safeClose(rs, log); 65 dbUtil.safeClose(st, log); 66 } 67 } 68 } 69 70 public ResourceInternal[] findAllResources() { 71 ArrayList al = new ArrayList (); 72 Statement st = null; 73 ResultSet rs = null; 74 try { 75 Connection connection = dbUtil.getConnection(); 76 st = connection.createStatement(); 77 rs = st.executeQuery("select * from jspider_resource"); 78 while (rs.next()) { 79 al.add(createResourceFromRecord(rs)); 80 } 81 } catch (SQLException e) { 82 log.error("SQLException", e); 83 } finally { 84 dbUtil.safeClose(rs, log); 85 dbUtil.safeClose(st, log); 86 } 87 return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]); 88 } 89 90 public ResourceInternal[] getRefereringResources(ResourceInternal resource) { 91 ArrayList al = new ArrayList (); 92 Statement st = null; 93 ResultSet rs = null; 94 try { 95 Connection connection = dbUtil.getConnection(); 96 st = connection.createStatement(); 97 rs = st.executeQuery("select * from jspider_resource, jspider_resource_reference where jspider_resource.id = jspider_resource_reference.referer and jspider_resource_reference.referee = " + resource.getId()); 98 while (rs.next()) { 99 al.add(createResourceFromRecord(rs)); 100 } 101 } catch (SQLException e) { 102 log.error("SQLException", e); 103 } finally { 104 dbUtil.safeClose(rs, log); 105 dbUtil.safeClose(st, log); 106 } 107 return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]); 108 } 109 110 public ResourceReferenceInternal[] getOutgoingReferences(ResourceInternal resource) { 111 ArrayList al = new ArrayList (); 112 Statement st = null; 113 ResultSet rs = null; 114 try { 115 Connection connection = dbUtil.getConnection(); 116 st = connection.createStatement(); 117 rs = st.executeQuery("select referer.url as referer, referee.url as referee, count from jspider_resource referer, jspider_resource referee, jspider_resource_reference where jspider_resource_reference.referer = " + resource.getId() + " and jspider_resource_reference.referee = referee.id and jspider_resource_reference.referer = referer.id"); 118 while (rs.next()) { 119 al.add(createResourceReferenceFromRecord(rs)); 120 } 121 } catch (SQLException e) { 122 log.error("SQLException", e); 123 } finally { 124 dbUtil.safeClose(rs, log); 125 dbUtil.safeClose(st, log); 126 } 127 return (ResourceReferenceInternal[]) al.toArray(new ResourceReferenceInternal[al.size()]); 128 } 129 130 public ResourceReferenceInternal[] getIncomingReferences(ResourceInternal resource) { 131 ArrayList al = new ArrayList (); 132 Statement st = null; 133 ResultSet rs = null; 134 try { 135 Connection connection = dbUtil.getConnection(); 136 st = connection.createStatement(); 137 rs = st.executeQuery("select referer.url as referer, referee.url as referee, count from jspider_resource referer, jspider_resource referee, jspider_resource_reference where jspider_resource_reference.referee = " + resource.getId() + " and jspider_resource_reference.referee = referee.id and jspider_resource_reference.referer = referer.id"); 138 while (rs.next()) { 139 al.add(createResourceReferenceFromRecord(rs)); 140 } 141 } catch (SQLException e) { 142 log.error("SQLException", e); 143 } finally { 144 dbUtil.safeClose(rs, log); 145 dbUtil.safeClose(st, log); 146 } 147 return (ResourceReferenceInternal[]) al.toArray(new ResourceReferenceInternal[al.size()]); 148 } 149 150 public ResourceInternal[] getReferencedResources(ResourceInternal resource) { 151 ArrayList al = new ArrayList (); 152 Statement st = null; 153 ResultSet rs = null; 154 try { 155 Connection connection = dbUtil.getConnection(); 156 st = connection.createStatement(); 157 rs = st.executeQuery("select * from jspider_resource, jspider_resource_reference where jspider_resource.id = jspider_resource_reference.referee and jspider_resource_reference.referer = " + resource.getId()); 158 while (rs.next()) { 159 al.add(createResourceFromRecord(rs)); 160 } 161 } catch (SQLException e) { 162 log.error("SQLException", e); 163 } finally { 164 dbUtil.safeClose(rs, log); 165 dbUtil.safeClose(st, log); 166 } 167 return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]); 168 } 169 170 171 public ResourceInternal[] findByFolder(FolderInternal folder) { 172 ArrayList al = new ArrayList (); 173 Statement st = null; 174 ResultSet rs = null; 175 try { 176 Connection connection = dbUtil.getConnection(); 177 st = connection.createStatement(); 178 rs = st.executeQuery("select * from jspider_resource where folder=" + folder.getId()); 179 while (rs.next()) { 180 al.add(createResourceFromRecord(rs)); 181 } 182 } catch (SQLException e) { 183 log.error("SQLException", e); 184 } finally { 185 dbUtil.safeClose(rs, log); 186 dbUtil.safeClose(st, log); 187 } 188 return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]); 189 } 190 191 public ResourceInternal[] getBySite(SiteInternal site) { 192 ArrayList al = new ArrayList (); 193 Statement st = null; 194 ResultSet rs = null; 195 try { 196 Connection connection = dbUtil.getConnection(); 197 st = connection.createStatement(); 198 rs = st.executeQuery("select * from jspider_resource where site=" + site.getId()); 199 while (rs.next()) { 200 al.add(createResourceFromRecord(rs)); 201 } 202 } catch (SQLException e) { 203 log.error("SQLException", e); 204 } finally { 205 dbUtil.safeClose(rs, log); 206 dbUtil.safeClose(st, log); 207 } 208 return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]); 209 } 210 211 public ResourceInternal[] getRootResources(SiteInternal site) { 212 ArrayList al = new ArrayList (); 213 Statement st = null; 214 ResultSet rs = null; 215 try { 216 Connection connection = dbUtil.getConnection(); 217 st = connection.createStatement(); 218 rs = st.executeQuery("select * from jspider_resource where site=" + site.getId() + " and folder=0"); 219 while (rs.next()) { 220 al.add(createResourceFromRecord(rs)); 221 } 222 } catch (SQLException e) { 223 log.error("SQLException", e); 224 } finally { 225 dbUtil.safeClose(rs, log); 226 dbUtil.safeClose(st, log); 227 } 228 return (ResourceInternal[]) al.toArray(new ResourceInternal[al.size()]); 229 } 230 231 public synchronized void setSpidered(URL url, URLSpideredOkEvent event) { 232 ResourceInternal resource = getResource(url); 233 resource.setFetched(event.getHttpStatus(), event.getSize(), event.getTimeMs(), event.getMimeType(), null, event.getHeaders()); 234 save(resource); 235 resource.setBytes(event.getBytes()); 236 } 237 238 public synchronized void setIgnoredForParsing(URL url) throws InvalidStateTransitionException { 239 ResourceInternal resource = getResource(url); 240 resource.setParseIgnored(); 241 save(resource); 242 } 243 244 public synchronized void setIgnoredForFetching(URL url, URLFoundEvent event) throws InvalidStateTransitionException { 245 ResourceInternal resource = getResource(url); 246 resource.setFetchIgnored(); 247 save(resource); 248 } 249 250 public synchronized void setForbidden(URL url, URLFoundEvent event) throws InvalidStateTransitionException { 251 ResourceInternal resource = getResource(url); 252 resource.setForbidden(); 253 save(resource); 254 } 255 256 public synchronized void setError(URL url, ResourceParsedErrorEvent event) throws InvalidStateTransitionException { 257 ResourceInternal resource = getResource(url); 258 resource.setParseError(); 259 save(resource); 260 } 261 262 public synchronized void setParsed(URL url, ResourceParsedOkEvent event) throws InvalidStateTransitionException { 263 ResourceInternal resource = getResource(url); 264 resource.setParsed(); 265 save(resource); 266 } 267 268 public synchronized void setError(URL url, URLSpideredErrorEvent event) throws InvalidStateTransitionException { 269 ResourceInternal resource = getResource(url); 270 resource.setFetchError(event.getHttpStatus(), event.getHeaders()); 271 save(resource); 272 } 273 274 public ResourceInternal getResource(int id) { 275 ResourceInternal resource = null; 276 Statement st = null; 277 ResultSet rs = null; 278 try { 279 st = dbUtil.getConnection().createStatement(); 280 rs = st.executeQuery("select * from jspider_resource where id='" + id + "'"); 281 if (rs.next()) { 282 resource = createResourceFromRecord(rs); 283 } 284 } catch (SQLException e) { 285 log.error("SQLException", e); 286 } finally { 287 dbUtil.safeClose(rs, log); 288 dbUtil.safeClose(st, log); 289 } 290 return resource; 291 } 292 293 public ResourceInternal getResource(URL url) { 294 ResourceInternal resource = null; 295 Statement st = null; 296 ResultSet rs = null; 297 if (url != null) { 298 try { 299 st = dbUtil.getConnection().createStatement(); 300 rs = st.executeQuery("select * from jspider_resource where url='" + url + "'"); 301 if (rs.next()) { 302 resource = createResourceFromRecord(rs); 303 } 304 } catch (SQLException e) { 305 log.error("SQLException", e); 306 } finally { 307 dbUtil.safeClose(rs, log); 308 dbUtil.safeClose(st, log); 309 } 310 } 311 return resource; 312 } 313 314 public void create(int id, ResourceInternal resource) { 315 Connection connection = dbUtil.getConnection(); 316 StringBuffer sb = new StringBuffer (); 317 Statement st = null; 318 319 sb.append("insert into jspider_resource ("); 320 sb.append("id,"); 321 sb.append("url,"); 322 sb.append("site,"); 323 sb.append("state,"); 324 sb.append("httpstatus,"); 325 sb.append("timems,"); 326 sb.append("folder"); 327 sb.append(") values ("); 328 sb.append(DBUtil.format(id)); 329 sb.append(","); 330 sb.append(DBUtil.format(resource.getURL())); 331 sb.append(","); 332 sb.append(DBUtil.format(resource.getSiteId())); 333 sb.append(","); 334 sb.append(DBUtil.format(resource.getState())); 335 sb.append(","); 336 sb.append(DBUtil.format(resource.getHttpStatusInternal())); 337 sb.append(","); 338 sb.append(DBUtil.format(resource.getTimeMsInternal())); 339 sb.append(","); 340 FolderInternal folder = (FolderInternal) resource.getFolder(); 341 int folderId = (folder == null) ? 0 : folder.getId(); 342 sb.append(DBUtil.format(folderId)); 343 sb.append(")"); 344 try { 345 st = connection.createStatement(); 346 st.executeUpdate(sb.toString()); 347 } catch (SQLException e) { 348 log.error("SQLException", e); 349 } finally { 350 dbUtil.safeClose(st, log); 351 } 352 } 353 354 public void save(ResourceInternal resource) { 355 Connection connection = dbUtil.getConnection(); 356 StringBuffer sb = new StringBuffer (); 357 Statement st = null; 358 sb.append("update jspider_resource set "); 359 sb.append("state="); 360 sb.append(DBUtil.format(resource.getState())); 361 sb.append(",mimetype="); 362 sb.append(DBUtil.format(resource.getMimeInternal())); 363 sb.append(",httpstatus="); 364 sb.append(DBUtil.format(resource.getHttpStatusInternal())); 365 sb.append(",size="); 366 sb.append(DBUtil.format(resource.getSizeInternal())); 367 sb.append(",timems="); 368 sb.append(DBUtil.format(resource.getTimeMsInternal())); 369 sb.append(" where id="); 370 sb.append(DBUtil.format(resource.getId())); 371 try { 372 st = connection.createStatement(); 373 st.executeUpdate(sb.toString()); 374 } catch (SQLException e) { 375 log.error("SQLException", e); 376 } finally { 377 dbUtil.safeClose(st, log); 378 } 379 } 380 381 protected ResourceInternal createResourceFromRecord(ResultSet rs) throws SQLException { 382 int id = rs.getInt(ATTRIBUTE_ID); 383 int folderId = rs.getInt(ATTRIBUTE_FOLDER); 384 int siteId = rs.getInt(ATTRIBUTE_SITE); 385 String urlString = rs.getString(ATTRIBUTE_URL); 386 int state = rs.getInt(ATTRIBUTE_STATE); 387 String mime = rs.getString(ATTRIBUTE_MIME); 388 int time = rs.getInt(ATTRIBUTE_TIME); 389 int size = rs.getInt(ATTRIBUTE_SIZE); 390 int httpStatus = rs.getInt(ATTRIBUTE_HTTP_STATUS); 391 392 FolderInternal folder = storage.getFolderDAO().findById(folderId); 393 394 URL url = null; 395 try { 396 url = new URL (urlString); 397 } catch (MalformedURLException e) { 398 log.error("MalformedURLException", e); 399 } 400 ResourceInternal ri = new ResourceInternal(storage, id, siteId, url, null, folder); 401 ri.setSize(size); 402 ri.setTime(time); 403 ri.setState(state); 404 ri.setMime(mime); 405 ri.setHttpStatus(httpStatus); 406 return ri; 407 } 408 409 protected ResourceReferenceInternal createResourceReferenceFromRecord(ResultSet rs) throws SQLException { 410 ResourceReferenceInternal rr = null; 411 try { 412 String refererURL = rs.getString("referer"); 413 String refereeURL = rs.getString("referee"); 414 URL referer = new URL (refererURL); 415 URL referee = new URL (refereeURL); 416 int count = rs.getInt("count"); 417 rr = new ResourceReferenceInternal(storage, referer, referee, count); 418 } catch (MalformedURLException e) { 419 log.error("MalformedURLException", e); 420 } 421 return rr; 422 } 423 424 } 425 | Popular Tags |