1 package net.javacoding.jspider.core.util.html; 2 3 import junit.framework.TestCase; 4 5 import java.net.URL ; 6 7 10 public class RobotsTXTLineTest extends TestCase { 11 12 public RobotsTXTLineTest ( ) { 13 super ( "RobotsTXTLineTest" ); 14 } 15 16 public void testSimpleAllow ( ) { 17 RobotsTXTLine l = RobotsTXTLine.parse("allow: index.html"); 18 19 assertNotNull("line returned for correct info is null", l); 20 21 String resource = l.getResourceURI(); 22 int type = l.getType ( ); 23 24 assertEquals("type parsed incorrect", RobotsTXTLine.ROBOTSTXT_RULE_ALLOW, type); 25 assertEquals("resourceURI parsed incorrect", "index.html", resource); 26 27 } 28 29 public void testSimpleAllowCaseSensitivity ( ) { 30 RobotsTXTLine l = RobotsTXTLine.parse("AlLoW: index.html"); 31 32 assertNotNull("line returned for correct info is null", l); 33 34 String resource = l.getResourceURI(); 35 int type = l.getType ( ); 36 37 assertEquals("type parsed incorrect", RobotsTXTLine.ROBOTSTXT_RULE_ALLOW, type); 38 assertEquals("resourceURI parsed incorrect", "index.html", resource); 39 40 } 41 42 public void testSimpleDisallow ( ) { 43 RobotsTXTLine l = RobotsTXTLine.parse("disallow: index.html"); 44 45 assertNotNull("line returned for correct info is null", l); 46 47 String resource = l.getResourceURI(); 48 int type = l.getType ( ); 49 50 assertEquals("type parsed incorrect", RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW, type); 51 assertEquals("resourceURI parsed incorrect", "index.html", resource); 52 53 } 54 55 public void testSimpleDisallowCaseSensitivity ( ) { 56 RobotsTXTLine l = RobotsTXTLine.parse("dIsAlLoW: index.html"); 57 58 assertNotNull("line returned for correct info is null", l); 59 60 String resource = l.getResourceURI(); 61 int type = l.getType ( ); 62 63 assertEquals("type parsed incorrect", RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW, type); 64 assertEquals("resourceURI parsed incorrect", "index.html", resource); 65 66 } 67 68 public void testSimpleAllowWithoutSpace ( ) { 69 RobotsTXTLine l = RobotsTXTLine.parse("allow:index.html"); 70 71 assertNotNull("line returned for correct info is null", l); 72 73 String resource = l.getResourceURI(); 74 int type = l.getType ( ); 75 76 assertEquals("type parsed incorrect", RobotsTXTLine.ROBOTSTXT_RULE_ALLOW, type); 77 assertEquals("resourceURI parsed incorrect", "index.html", resource); 78 79 } 80 81 public void testSimpleDisallowWithoutSpace ( ) { 82 RobotsTXTLine l = RobotsTXTLine.parse("disallow:index.html"); 83 84 assertNotNull("line returned for correct info is null", l); 85 86 String resource = l.getResourceURI(); 87 int type = l.getType ( ); 88 89 assertEquals("type parsed incorrect", RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW, type); 90 assertEquals("resourceURI parsed incorrect", "index.html", resource); 91 92 } 93 94 public void testErroneousAllowDisallow ( ) { 95 RobotsTXTLine l = RobotsTXTLine.parse("alow:index.html"); 96 assertNull("line returned for incorrect info is not null", l); 97 } 98 99 public void testEmptyString ( ) { 100 RobotsTXTLine l = RobotsTXTLine.parse(""); 101 assertNull("line returned for empty string is not null", l); 102 } 103 104 public void testOnlyAllow ( ) { 105 RobotsTXTLine l = RobotsTXTLine.parse("allow:"); 106 assertNull("line returned for 'allow:' string is not null", l); 107 } 108 109 public void testOnlyDisAllow ( ) { 110 RobotsTXTLine l = RobotsTXTLine.parse("disallow:"); 111 assertNull("line returned for 'disallow:' string is not null", l); 112 } 113 114 public void testNullString ( ) { 115 RobotsTXTLine l = RobotsTXTLine.parse(null); 116 assertNull("line returned for null string is not null", l); 117 } 118 119 public void testSimpleMatch ( ) throws Exception { 120 URL url = new URL ( "http://j-spider.sourceforge.net/index.html" ); 121 RobotsTXTLine line = new RobotsTXTLine("/index.html", RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW); 122 boolean matches = line.matches(url); 123 124 assertTrue ( "simple match didn't work", matches); 125 } 126 127 public void testSimpleMatchWithFolder ( ) throws Exception { 128 URL url = new URL ( "http://j-spider.sourceforge.net/manual/index.html" ); 129 RobotsTXTLine line = new RobotsTXTLine("/manual", RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW); 130 boolean matches = line.matches(url); 131 132 assertTrue ( "simple match didn't work", matches); 133 } 134 135 public void testSimpleMatchCaseSensitivity ( ) throws Exception { 136 URL url = new URL ( "http://j-spider.sourceforge.net/index.HTML" ); 137 RobotsTXTLine line = new RobotsTXTLine("/index.html", RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW); 138 boolean matches = line.matches(url); 139 140 assertFalse ( "cases were different, yet a match was given", matches); 141 } 142 143 public void testSimpleNoMatch ( ) throws Exception { 144 URL url = new URL ( "http://j-spider.sourceforge.net/index.htm" ); 145 RobotsTXTLine line = new RobotsTXTLine("/index.html", RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW); 146 boolean matches = line.matches(url); 147 148 assertFalse ( "simple nomatch didn't work", matches); 149 } 150 151 public void testRootMatch ( ) throws Exception { 152 URL url = new URL ( "http://j-spider.sourceforge.net" ); 153 RobotsTXTLine line = new RobotsTXTLine("/", RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW); 154 boolean matches = line.matches(url); 155 156 assertTrue ( "root match didn't work", matches); 157 } 158 159 public void testRootMatchWithTrailingSlash ( ) throws Exception { 160 URL url = new URL ( "http://j-spider.sourceforge.net/" ); 161 RobotsTXTLine line = new RobotsTXTLine("/", RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW); 162 boolean matches = line.matches(url); 163 164 assertTrue ( "root match didn't work", matches); 165 } 166 167 } 168 | Popular Tags |