1 package net.javacoding.jspider.core.impl; 2 3 4 import net.javacoding.jspider.api.event.engine.*; 5 import net.javacoding.jspider.core.Spider; 6 import net.javacoding.jspider.core.SpiderContext; 7 import net.javacoding.jspider.core.logging.Log; 8 import net.javacoding.jspider.core.logging.LogFactory; 9 import net.javacoding.jspider.core.task.dispatch.DispatchSpiderTasks; 10 import net.javacoding.jspider.core.task.dispatch.DispatchThinkerTasks; 11 import net.javacoding.jspider.core.threading.ThreadPoolMonitorThread; 12 import net.javacoding.jspider.core.threading.WorkerThreadPool; 13 import net.javacoding.jspider.core.util.config.*; 14 15 16 22 public class SpiderImpl implements Spider { 23 24 public static final int DEFAULT_MONITORING_INTERVAL = 1000; 25 26 protected WorkerThreadPool spiders; 27 protected WorkerThreadPool thinkers; 28 29 public SpiderImpl(SpiderContext context, int spiderThreads, int thinkerThreads) { 30 LogFactory.getLog(Spider.class).info("Spider born - threads: spiders: " + spiderThreads + ", thinkers: " + thinkerThreads); 31 spiders = new WorkerThreadPool("Spiders", "Spider", spiderThreads); 32 thinkers = new WorkerThreadPool("Thinkers", "Thinker", thinkerThreads); 33 34 PropertySet props = ConfigurationFactory.getConfiguration().getJSpiderConfiguration(); 35 PropertySet threadProps = new MappedPropertySet ( ConfigConstants.CONFIG_THREADING, props); 36 PropertySet spidersProps = new MappedPropertySet ( ConfigConstants.CONFIG_THREADING_SPIDERS, threadProps); 37 PropertySet thinkerProps = new MappedPropertySet ( ConfigConstants.CONFIG_THREADING_THINKERS, threadProps); 38 PropertySet spidersMonitoringProps = new MappedPropertySet ( ConfigConstants.CONFIG_THREADING_MONITORING, spidersProps); 39 PropertySet thinkerMonitoringProps = new MappedPropertySet ( ConfigConstants.CONFIG_THREADING_MONITORING, thinkerProps); 40 41 if (spidersMonitoringProps.getBoolean(ConfigConstants.CONFIG_THREADING_MONITORING_ENABLED, false)) { 42 int interval = spidersMonitoringProps.getInteger(ConfigConstants.CONFIG_THREADING_MONITORING_INTERVAL, DEFAULT_MONITORING_INTERVAL); 43 new ThreadPoolMonitorThread(context.getEventDispatcher(), interval, spiders); 44 } 45 if (thinkerMonitoringProps.getBoolean(ConfigConstants.CONFIG_THREADING_MONITORING_ENABLED, false)) { 46 int interval = thinkerMonitoringProps.getInteger(ConfigConstants.CONFIG_THREADING_MONITORING_INTERVAL, DEFAULT_MONITORING_INTERVAL); 47 new ThreadPoolMonitorThread(context.getEventDispatcher(), interval, thinkers); 48 } 49 } 50 51 public void crawl(SpiderContext context) { 52 53 long start = System.currentTimeMillis(); 54 55 context.getEventDispatcher().dispatch(new SpideringStartedEvent(context.getBaseURL())); 56 57 DispatchSpiderTasks dispatchSpiderTask = new DispatchSpiderTasks(spiders, context); 58 DispatchThinkerTasks dispatchThinkerTask = new DispatchThinkerTasks(thinkers, context); 59 60 synchronized (dispatchSpiderTask) { 61 62 context.getAgent().start(); 63 64 spiders.assignGroupTask(dispatchSpiderTask); 65 thinkers.assignGroupTask(dispatchThinkerTask); 66 67 try { 68 dispatchSpiderTask.wait(); 70 } catch (InterruptedException e) { 71 Thread.currentThread().interrupt(); 72 } 73 } 74 75 Log log = LogFactory.getLog(Spider.class); 76 log.debug("Stopping spider workers..."); 77 spiders.stopAll(); 78 log.info("Stopped spider workers..."); 79 log.debug("Stopping thinker workers..."); 80 thinkers.stopAll(); 81 log.info("Stopped thinker workers..."); 82 83 context.getEventDispatcher().dispatch(new SpideringSummaryEvent(context.getStorage().getSummary())); 84 context.getEventDispatcher().dispatch(new SpideringStoppedEvent(context.getStorage())); 85 86 context.getEventDispatcher().shutdown(); 87 88 log.info("Spidering done!"); 89 log.info("Elapsed time : " + (System.currentTimeMillis() - start)); 90 } 91 92 } 93 | Popular Tags |