1 2 3 4 package net.nutch.protocol.http; 5 6 import net.nutch.protocol.http.RobotRulesParser.RobotRuleSet; 7 8 import junit.framework.TestCase; 9 10 public class TestRobotRulesParser extends TestCase { 11 private static final String LF= "\n"; 12 private static final String CR= "\r"; 13 private static final String CRLF= "\r\n"; 14 15 16 private static final String [] ROBOTS_STRINGS= new String [] { 17 "User-Agent: Agent1 #foo" + CR 18 + "Disallow: /a" + CR 19 + "Disallow: /b/a" + CR 20 + "#Disallow: /c" + CR 21 + "" + CR 22 + "" + CR 23 + "User-Agent: Agent2 Agent3#foo" + CR 24 + "User-Agent: Agent4" + CR 25 + "Disallow: /d" + CR 26 + "Disallow: /e/d/" + CR 27 + "" + CR 28 + "User-Agent: *" + CR 29 + "Disallow: /foo/bar/" + CR, 30 }; 31 32 private static final String [] AGENT_STRINGS= new String [] { 33 "Agent1", 34 "Agent2", 35 "Agent3", 36 "Agent4", 37 "Agent5", 38 }; 39 40 private static final boolean[][] NOT_IN_ROBOTS_STRING= new boolean[][] { 41 { 42 false, 43 false, 44 false, 45 false, 46 true, 47 } 48 }; 49 50 private static final String [] TEST_PATHS= new String [] { 51 "/a", 52 "/a/", 53 "/a/bloh/foo.html", 54 "/b", 55 "/b/a", 56 "/b/a/index.html", 57 "/b/b/foo.html", 58 "/c", 59 "/c/a", 60 "/c/a/index.html", 61 "/c/b/foo.html", 62 "/d", 63 "/d/a", 64 "/e/a/index.html", 65 "/e/d", 66 "/e/d/foo.html", 67 "/e/doh.html", 68 "/f/index.html", 69 "/foo/bar/baz.html", 70 "/f/", 71 }; 72 73 private static final boolean[][][] ALLOWED= new boolean[][][] { 74 { { false, false, false, true, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, }, 97 { true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, false, true, true, true, true, }, 119 { true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, false, true, true, true, true, }, 141 { true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, false, true, true, true, true, }, 163 { true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, } 185 } 186 }; 187 188 public TestRobotRulesParser(String name) { 189 super(name); 190 } 191 192 public void testRobotsOneAgent() { 193 for (int i= 0; i < ROBOTS_STRINGS.length; i++) { 194 for (int j= 0; j < AGENT_STRINGS.length; j++) { 195 testRobots(i, new String [] { AGENT_STRINGS[j] }, 196 TEST_PATHS, ALLOWED[i][j]); 197 } 198 } 199 } 200 201 public void testRobotsTwoAgents() { 202 for (int i= 0; i < ROBOTS_STRINGS.length; i++) { 203 for (int j= 0; j < AGENT_STRINGS.length; j++) { 204 for (int k= 0; k < AGENT_STRINGS.length; k++) { 205 int key= j; 206 if (NOT_IN_ROBOTS_STRING[i][j]) 207 key= k; 208 testRobots(i, new String [] { AGENT_STRINGS[j], AGENT_STRINGS[k] }, 209 TEST_PATHS, ALLOWED[i][key]); 210 } 211 } 212 } 213 } 214 215 217 public void testRobots(int robotsString, String [] agents, String [] paths, 218 boolean[] allowed) { 219 String agentsString= agents[0]; 220 for (int i= 1; i < agents.length; i++) 221 agentsString= agentsString + "," + agents[i]; 222 RobotRulesParser p= new RobotRulesParser(agents); 223 RobotRuleSet rules= p.parseRules(ROBOTS_STRINGS[robotsString].getBytes()); 224 for (int i= 0; i < paths.length; i++) { 225 assertTrue("testing robots file "+robotsString+", on agents (" 226 + agentsString + "), and path " + TEST_PATHS[i] + "; got " 227 + rules.isAllowed(TEST_PATHS[i]) + ", rules are: " + LF 228 + rules, 229 rules.isAllowed(TEST_PATHS[i]) == allowed[i]); 230 } 231 } 232 233 } 234 | Popular Tags |