KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > protocol > http > TestRobotRulesParser


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.protocol.http;
5
6 import net.nutch.protocol.http.RobotRulesParser.RobotRuleSet;
7
8 import junit.framework.TestCase;
9
10 public class TestRobotRulesParser extends TestCase {
11   private static final String JavaDoc LF= "\n";
12   private static final String JavaDoc CR= "\r";
13   private static final String JavaDoc CRLF= "\r\n";
14   
15
16   private static final String JavaDoc[] ROBOTS_STRINGS= new String JavaDoc[] {
17     "User-Agent: Agent1 #foo" + CR
18     + "Disallow: /a" + CR
19     + "Disallow: /b/a" + CR
20     + "#Disallow: /c" + CR
21     + "" + CR
22     + "" + CR
23     + "User-Agent: Agent2 Agent3#foo" + CR
24     + "User-Agent: Agent4" + CR
25     + "Disallow: /d" + CR
26     + "Disallow: /e/d/" + CR
27     + "" + CR
28     + "User-Agent: *" + CR
29     + "Disallow: /foo/bar/" + CR,
30   };
31
32   private static final String JavaDoc[] AGENT_STRINGS= new String JavaDoc[] {
33     "Agent1",
34     "Agent2",
35     "Agent3",
36     "Agent4",
37     "Agent5",
38   };
39
40   private static final boolean[][] NOT_IN_ROBOTS_STRING= new boolean[][] {
41     {
42       false,
43       false,
44       false,
45       false,
46       true,
47     }
48   };
49
50   private static final String JavaDoc[] TEST_PATHS= new String JavaDoc[] {
51     "/a",
52     "/a/",
53     "/a/bloh/foo.html",
54     "/b",
55     "/b/a",
56     "/b/a/index.html",
57     "/b/b/foo.html",
58     "/c",
59     "/c/a",
60     "/c/a/index.html",
61     "/c/b/foo.html",
62     "/d",
63     "/d/a",
64     "/e/a/index.html",
65     "/e/d",
66     "/e/d/foo.html",
67     "/e/doh.html",
68     "/f/index.html",
69     "/foo/bar/baz.html",
70     "/f/",
71   };
72
73   private static final boolean[][][] ALLOWED= new boolean[][][] {
74     { // ROBOTS_STRINGS[0]
75
{ // Agent1
76
false, // "/a",
77
false, // "/a/",
78
false, // "/a/bloh/foo.html"
79
true, // "/b",
80
false, // "/b/a",
81
false, // "/b/a/index.html",
82
true, // "/b/b/foo.html",
83
true, // "/c",
84
true, // "/c/a",
85
true, // "/c/a/index.html",
86
true, // "/c/b/foo.html",
87
true, // "/d",
88
true, // "/d/a",
89
true, // "/e/a/index.html",
90
true, // "/e/d",
91
true, // "/e/d/foo.html",
92
true, // "/e/doh.html",
93
true, // "/f/index.html",
94
true, // "/foo/bar.html",
95
true, // "/f/",
96
},
97       { // Agent2
98
true, // "/a",
99
true, // "/a/",
100
true, // "/a/bloh/foo.html"
101
true, // "/b",
102
true, // "/b/a",
103
true, // "/b/a/index.html",
104
true, // "/b/b/foo.html",
105
true, // "/c",
106
true, // "/c/a",
107
true, // "/c/a/index.html",
108
true, // "/c/b/foo.html",
109
false, // "/d",
110
false, // "/d/a",
111
true, // "/e/a/index.html",
112
true, // "/e/d",
113
false, // "/e/d/foo.html",
114
true, // "/e/doh.html",
115
true, // "/f/index.html",
116
true, // "/foo/bar.html",
117
true, // "/f/",
118
},
119       { // Agent3
120
true, // "/a",
121
true, // "/a/",
122
true, // "/a/bloh/foo.html"
123
true, // "/b",
124
true, // "/b/a",
125
true, // "/b/a/index.html",
126
true, // "/b/b/foo.html",
127
true, // "/c",
128
true, // "/c/a",
129
true, // "/c/a/index.html",
130
true, // "/c/b/foo.html",
131
false, // "/d",
132
false, // "/d/a",
133
true, // "/e/a/index.html",
134
true, // "/e/d",
135
false, // "/e/d/foo.html",
136
true, // "/e/doh.html",
137
true, // "/f/index.html",
138
true, // "/foo/bar.html",
139
true, // "/f/",
140
},
141       { // Agent4
142
true, // "/a",
143
true, // "/a/",
144
true, // "/a/bloh/foo.html"
145
true, // "/b",
146
true, // "/b/a",
147
true, // "/b/a/index.html",
148
true, // "/b/b/foo.html",
149
true, // "/c",
150
true, // "/c/a",
151
true, // "/c/a/index.html",
152
true, // "/c/b/foo.html",
153
false, // "/d",
154
false, // "/d/a",
155
true, // "/e/a/index.html",
156
true, // "/e/d",
157
false, // "/e/d/foo.html",
158
true, // "/e/doh.html",
159
true, // "/f/index.html",
160
true, // "/foo/bar.html",
161
true, // "/f/",
162
},
163       { // Agent5/"*"
164
true, // "/a",
165
true, // "/a/",
166
true, // "/a/bloh/foo.html"
167
true, // "/b",
168
true, // "/b/a",
169
true, // "/b/a/index.html",
170
true, // "/b/b/foo.html",
171
true, // "/c",
172
true, // "/c/a",
173
true, // "/c/a/index.html",
174
true, // "/c/b/foo.html",
175
true, // "/d",
176
true, // "/d/a",
177
true, // "/e/a/index.html",
178
true, // "/e/d",
179
true, // "/e/d/foo.html",
180
true, // "/e/doh.html",
181
true, // "/f/index.html",
182
false, // "/foo/bar.html",
183
true, // "/f/",
184
}
185     }
186   };
187  
188   public TestRobotRulesParser(String JavaDoc name) {
189     super(name);
190   }
191
192   public void testRobotsOneAgent() {
193     for (int i= 0; i < ROBOTS_STRINGS.length; i++) {
194       for (int j= 0; j < AGENT_STRINGS.length; j++) {
195     testRobots(i, new String JavaDoc[] { AGENT_STRINGS[j] },
196            TEST_PATHS, ALLOWED[i][j]);
197       }
198     }
199   }
200
201   public void testRobotsTwoAgents() {
202     for (int i= 0; i < ROBOTS_STRINGS.length; i++) {
203       for (int j= 0; j < AGENT_STRINGS.length; j++) {
204     for (int k= 0; k < AGENT_STRINGS.length; k++) {
205       int key= j;
206       if (NOT_IN_ROBOTS_STRING[i][j])
207         key= k;
208       testRobots(i, new String JavaDoc[] { AGENT_STRINGS[j], AGENT_STRINGS[k] },
209              TEST_PATHS, ALLOWED[i][key]);
210     }
211       }
212     }
213   }
214
215   // helper
216

217   public void testRobots(int robotsString, String JavaDoc[] agents, String JavaDoc[] paths,
218              boolean[] allowed) {
219     String JavaDoc agentsString= agents[0];
220     for (int i= 1; i < agents.length; i++)
221       agentsString= agentsString + "," + agents[i];
222     RobotRulesParser p= new RobotRulesParser(agents);
223     RobotRuleSet rules= p.parseRules(ROBOTS_STRINGS[robotsString].getBytes());
224     for (int i= 0; i < paths.length; i++) {
225       assertTrue("testing robots file "+robotsString+", on agents ("
226          + agentsString + "), and path " + TEST_PATHS[i] + "; got "
227          + rules.isAllowed(TEST_PATHS[i]) + ", rules are: " + LF
228                    + rules,
229          rules.isAllowed(TEST_PATHS[i]) == allowed[i]);
230     }
231   }
232
233 }
234
Popular Tags