1 package net.javacoding.jspider.functional.specific.robotstxt; 2 3 import junit.framework.TestCase; 4 import net.javacoding.jspider.JSpider; 5 import net.javacoding.jspider.functional.TestingConstants; 6 import net.javacoding.jspider.core.util.config.ConfigurationFactory; 7 import net.javacoding.jspider.core.util.config.JSpiderConfiguration; 8 import net.javacoding.jspider.mockobjects.OverridingJSpiderConfiguration; 9 import net.javacoding.jspider.mockobjects.OverridingPropertySet; 10 import net.javacoding.jspider.mockobjects.plugin.JUnitEventSink; 11 12 import java.net.URL ; 13 14 17 public class RobotsTXTTest extends TestCase { 18 19 protected JUnitEventSink sink; 20 protected JSpiderConfiguration config; 21 protected JSpiderConfiguration config2; 22 23 26 public RobotsTXTTest ( ) { 27 super ( "RobotsTXTTest "); 28 } 29 30 34 protected void setUp() throws Exception { 35 System.err.println("setUp"); 36 config = ConfigurationFactory.getConfiguration(ConfigurationFactory.CONFIG_UNITTEST); 37 config2 = new OverridingJSpiderConfiguration ( config ); 38 ((OverridingPropertySet)config2.getJSpiderConfiguration()).setValue("jspider.userAgent", "JSpiderUnitTest"); 39 ConfigurationFactory.setConfiguration(config); 40 sink = JUnitEventSink.getInstance(); 41 } 42 43 47 protected void tearDown() throws Exception { 48 System.err.println("tearDown"); 49 ConfigurationFactory.cleanConfiguration(); 50 sink.reset(); 51 } 52 53 public void testAllowedNormalUserAgent ( ) throws Exception { 54 55 URL url = new URL ( "http://" + TestingConstants.HOST + "/testcases/specific/robotstxt/resource.html" ); 56 ConfigurationFactory.setConfiguration(config); 57 58 JSpider jspider = new JSpider ( url ); 59 jspider.start ( ); 60 61 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStartedEvent.class,1); 62 testEventCount(net.javacoding.jspider.api.event.engine.SpideringSummaryEvent.class,1); 63 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStoppedEvent.class,1); 64 65 testEventCount(net.javacoding.jspider.api.event.site.SiteDiscoveredEvent.class,1); 66 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTMissingEvent.class,0); 67 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchedEvent.class,1); 68 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchErrorEvent.class,0); 69 70 testEventCount(net.javacoding.jspider.api.event.resource.ResourceDiscoveredEvent.class,2); 71 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchedEvent.class,2); 72 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchErrorEvent.class,0); 73 testEventCount(net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent.class,0); 74 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent.class,0); 75 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForParsingEvent.class,0); 76 testEventCount(net.javacoding.jspider.api.event.resource.ResourceReferenceDiscoveredEvent.class,0); 77 testEventCount(net.javacoding.jspider.api.event.resource.ResourceParsedEvent.class,1); 78 } 79 80 public void testDisallowedResourceNormalUserAgent ( ) throws Exception { 81 82 URL url = new URL ( "http://" + TestingConstants.HOST + "/testcases/specific/robotstxt/disallowedResource1.html" ); 83 ConfigurationFactory.setConfiguration(config); 84 85 JSpider jspider = new JSpider ( url ); 86 jspider.start ( ); 87 88 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStartedEvent.class,1); 89 testEventCount(net.javacoding.jspider.api.event.engine.SpideringSummaryEvent.class,1); 90 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStoppedEvent.class,1); 91 92 testEventCount(net.javacoding.jspider.api.event.site.SiteDiscoveredEvent.class,1); 93 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTMissingEvent.class,0); 94 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchedEvent.class,1); 95 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchErrorEvent.class,0); 96 97 testEventCount(net.javacoding.jspider.api.event.resource.ResourceDiscoveredEvent.class,2); 98 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchedEvent.class,1); 99 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchErrorEvent.class,0); 100 testEventCount(net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent.class,1); 101 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent.class,0); 102 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForParsingEvent.class,0); 103 testEventCount(net.javacoding.jspider.api.event.resource.ResourceReferenceDiscoveredEvent.class,0); 104 testEventCount(net.javacoding.jspider.api.event.resource.ResourceParsedEvent.class,0); 105 } 106 107 public void testDisallowedFolderNormalUserAgent ( ) throws Exception { 108 109 URL url = new URL ( "http://" + TestingConstants.HOST + "/testcases/specific/robotstxt/disallowedFolder1/resource.html" ); 110 ConfigurationFactory.setConfiguration(config); 111 112 JSpider jspider = new JSpider ( url ); 113 jspider.start ( ); 114 115 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStartedEvent.class,1); 116 testEventCount(net.javacoding.jspider.api.event.engine.SpideringSummaryEvent.class,1); 117 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStoppedEvent.class,1); 118 119 testEventCount(net.javacoding.jspider.api.event.site.SiteDiscoveredEvent.class,1); 120 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTMissingEvent.class,0); 121 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchedEvent.class,1); 122 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchErrorEvent.class,0); 123 124 testEventCount(net.javacoding.jspider.api.event.resource.ResourceDiscoveredEvent.class,2); 125 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchedEvent.class,1); 126 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchErrorEvent.class,0); 127 testEventCount(net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent.class,1); 128 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent.class,0); 129 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForParsingEvent.class,0); 130 testEventCount(net.javacoding.jspider.api.event.resource.ResourceReferenceDiscoveredEvent.class,0); 131 testEventCount(net.javacoding.jspider.api.event.resource.ResourceParsedEvent.class,0); 132 } 133 134 public void testSometimesAllowedResourceNormalUserAgent ( ) throws Exception { 135 136 URL url = new URL ( "http://" + TestingConstants.HOST + "/testcases/specific/robotstxt/disallowedResource2.html" ); 137 ConfigurationFactory.setConfiguration(config); 138 139 JSpider jspider = new JSpider ( url ); 140 jspider.start ( ); 141 142 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStartedEvent.class,1); 143 testEventCount(net.javacoding.jspider.api.event.engine.SpideringSummaryEvent.class,1); 144 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStoppedEvent.class,1); 145 146 testEventCount(net.javacoding.jspider.api.event.site.SiteDiscoveredEvent.class,1); 147 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTMissingEvent.class,0); 148 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchedEvent.class,1); 149 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchErrorEvent.class,0); 150 151 testEventCount(net.javacoding.jspider.api.event.resource.ResourceDiscoveredEvent.class,2); 152 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchedEvent.class,2); 153 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchErrorEvent.class,0); 154 testEventCount(net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent.class,0); 155 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent.class,0); 156 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForParsingEvent.class,0); 157 testEventCount(net.javacoding.jspider.api.event.resource.ResourceReferenceDiscoveredEvent.class,0); 158 testEventCount(net.javacoding.jspider.api.event.resource.ResourceParsedEvent.class,1); 159 } 160 161 public void testSometimesAllowedFolderNormalUserAgent ( ) throws Exception { 162 163 URL url = new URL ( "http://" + TestingConstants.HOST + "/testcases/specific/robotstxt/disallowedFolder2/resource.html" ); 164 ConfigurationFactory.setConfiguration(config); 165 166 JSpider jspider = new JSpider ( url ); 167 jspider.start ( ); 168 169 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStartedEvent.class,1); 170 testEventCount(net.javacoding.jspider.api.event.engine.SpideringSummaryEvent.class,1); 171 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStoppedEvent.class,1); 172 173 testEventCount(net.javacoding.jspider.api.event.site.SiteDiscoveredEvent.class,1); 174 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTMissingEvent.class,0); 175 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchedEvent.class,1); 176 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchErrorEvent.class,0); 177 178 testEventCount(net.javacoding.jspider.api.event.resource.ResourceDiscoveredEvent.class,2); 179 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchedEvent.class,2); 180 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchErrorEvent.class,0); 181 testEventCount(net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent.class,0); 182 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent.class,0); 183 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForParsingEvent.class,0); 184 testEventCount(net.javacoding.jspider.api.event.resource.ResourceReferenceDiscoveredEvent.class,0); 185 testEventCount(net.javacoding.jspider.api.event.resource.ResourceParsedEvent.class,1); 186 } 187 188 189 190 191 192 193 194 public void testAllowedTestUserAgent ( ) throws Exception { 195 196 URL url = new URL ( "http://" + TestingConstants.HOST + "/testcases/specific/robotstxt/resource.html" ); 197 ConfigurationFactory.setConfiguration(config2); 198 199 JSpider jspider = new JSpider ( url ); 200 jspider.start ( ); 201 202 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStartedEvent.class,1); 203 testEventCount(net.javacoding.jspider.api.event.engine.SpideringSummaryEvent.class,1); 204 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStoppedEvent.class,1); 205 206 testEventCount(net.javacoding.jspider.api.event.site.SiteDiscoveredEvent.class,1); 207 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTMissingEvent.class,0); 208 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchedEvent.class,1); 209 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchErrorEvent.class,0); 210 211 testEventCount(net.javacoding.jspider.api.event.resource.ResourceDiscoveredEvent.class,2); 212 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchedEvent.class,2); 213 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchErrorEvent.class,0); 214 testEventCount(net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent.class,0); 215 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent.class,0); 216 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForParsingEvent.class,0); 217 testEventCount(net.javacoding.jspider.api.event.resource.ResourceReferenceDiscoveredEvent.class,0); 218 testEventCount(net.javacoding.jspider.api.event.resource.ResourceParsedEvent.class,1); 219 } 220 221 public void testDisallowedResourceTestUserAgent ( ) throws Exception { 222 223 URL url = new URL ( "http://" + TestingConstants.HOST + "/testcases/specific/robotstxt/disallowedResource2.html" ); 224 ConfigurationFactory.setConfiguration(config2); 225 226 JSpider jspider = new JSpider ( url ); 227 jspider.start ( ); 228 229 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStartedEvent.class,1); 230 testEventCount(net.javacoding.jspider.api.event.engine.SpideringSummaryEvent.class,1); 231 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStoppedEvent.class,1); 232 233 testEventCount(net.javacoding.jspider.api.event.site.SiteDiscoveredEvent.class,1); 234 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTMissingEvent.class,0); 235 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchedEvent.class,1); 236 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchErrorEvent.class,0); 237 238 testEventCount(net.javacoding.jspider.api.event.resource.ResourceDiscoveredEvent.class,2); 239 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchedEvent.class,1); 240 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchErrorEvent.class,0); 241 testEventCount(net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent.class,1); 242 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent.class,0); 243 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForParsingEvent.class,0); 244 testEventCount(net.javacoding.jspider.api.event.resource.ResourceReferenceDiscoveredEvent.class,0); 245 testEventCount(net.javacoding.jspider.api.event.resource.ResourceParsedEvent.class,0); 246 } 247 248 public void testDisallowedFolderTestUserAgent ( ) throws Exception { 249 250 URL url = new URL ( "http://" + TestingConstants.HOST + "/testcases/specific/robotstxt/disallowedFolder2/resource.html" ); 251 ConfigurationFactory.setConfiguration(config2); 252 253 JSpider jspider = new JSpider ( url ); 254 jspider.start ( ); 255 256 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStartedEvent.class,1); 257 testEventCount(net.javacoding.jspider.api.event.engine.SpideringSummaryEvent.class,1); 258 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStoppedEvent.class,1); 259 260 testEventCount(net.javacoding.jspider.api.event.site.SiteDiscoveredEvent.class,1); 261 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTMissingEvent.class,0); 262 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchedEvent.class,1); 263 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchErrorEvent.class,0); 264 265 testEventCount(net.javacoding.jspider.api.event.resource.ResourceDiscoveredEvent.class,2); 266 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchedEvent.class,1); 267 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchErrorEvent.class,0); 268 testEventCount(net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent.class,1); 269 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent.class,0); 270 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForParsingEvent.class,0); 271 testEventCount(net.javacoding.jspider.api.event.resource.ResourceReferenceDiscoveredEvent.class,0); 272 testEventCount(net.javacoding.jspider.api.event.resource.ResourceParsedEvent.class,0); 273 } 274 275 public void testSometimesAllowedResourceTestUserAgent ( ) throws Exception { 276 277 URL url = new URL ( "http://" + TestingConstants.HOST + "/testcases/specific/robotstxt/disallowedResource1.html" ); 278 ConfigurationFactory.setConfiguration(config2); 279 280 JSpider jspider = new JSpider ( url ); 281 jspider.start ( ); 282 283 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStartedEvent.class,1); 284 testEventCount(net.javacoding.jspider.api.event.engine.SpideringSummaryEvent.class,1); 285 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStoppedEvent.class,1); 286 287 testEventCount(net.javacoding.jspider.api.event.site.SiteDiscoveredEvent.class,1); 288 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTMissingEvent.class,0); 289 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchedEvent.class,1); 290 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchErrorEvent.class,0); 291 292 testEventCount(net.javacoding.jspider.api.event.resource.ResourceDiscoveredEvent.class,2); 293 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchedEvent.class,2); 294 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchErrorEvent.class,0); 295 testEventCount(net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent.class,0); 296 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent.class,0); 297 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForParsingEvent.class,0); 298 testEventCount(net.javacoding.jspider.api.event.resource.ResourceReferenceDiscoveredEvent.class,0); 299 testEventCount(net.javacoding.jspider.api.event.resource.ResourceParsedEvent.class,1); 300 } 301 302 public void testSometimesAllowedFolderTestUserAgent ( ) throws Exception { 303 304 URL url = new URL ( "http://" + TestingConstants.HOST + "/testcases/specific/robotstxt/disallowedFolder1/resource.html" ); 305 ConfigurationFactory.setConfiguration(config2); 306 307 JSpider jspider = new JSpider ( url ); 308 jspider.start ( ); 309 310 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStartedEvent.class,1); 311 testEventCount(net.javacoding.jspider.api.event.engine.SpideringSummaryEvent.class,1); 312 testEventCount(net.javacoding.jspider.api.event.engine.SpideringStoppedEvent.class,1); 313 314 testEventCount(net.javacoding.jspider.api.event.site.SiteDiscoveredEvent.class,1); 315 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTMissingEvent.class,0); 316 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchedEvent.class,1); 317 testEventCount(net.javacoding.jspider.api.event.site.RobotsTXTFetchErrorEvent.class,0); 318 319 testEventCount(net.javacoding.jspider.api.event.resource.ResourceDiscoveredEvent.class,2); 320 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchedEvent.class,2); 321 testEventCount(net.javacoding.jspider.api.event.resource.ResourceFetchErrorEvent.class,0); 322 testEventCount(net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent.class,0); 323 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent.class,0); 324 testEventCount(net.javacoding.jspider.api.event.resource.ResourceIgnoredForParsingEvent.class,0); 325 testEventCount(net.javacoding.jspider.api.event.resource.ResourceReferenceDiscoveredEvent.class,0); 326 testEventCount(net.javacoding.jspider.api.event.resource.ResourceParsedEvent.class,1); 327 } 328 329 330 protected void testEventCount ( Class eventClass, int expectedCount ) { 331 assertEquals(eventClass.getName(), expectedCount, sink.getEventCount(eventClass)); 332 } 333 334 } 335 | Popular Tags |