1 24 package org.archive.crawler.settings; 25 26 import java.io.File ; 27 import java.io.IOException ; 28 import java.text.ParseException ; 29 30 import javax.management.Attribute ; 31 import javax.management.AttributeNotFoundException ; 32 import javax.management.InvalidAttributeValueException ; 33 import javax.management.MBeanException ; 34 import javax.management.ReflectionException ; 35 36 import org.apache.commons.httpclient.URIException; 37 import org.archive.crawler.datamodel.CrawlOrder; 38 import org.archive.crawler.datamodel.CrawlURI; 39 import org.archive.crawler.framework.CrawlScope; 40 import org.archive.crawler.scope.ClassicScope; 41 import org.archive.crawler.settings.refinements.Criteria; 42 import org.archive.crawler.settings.refinements.PortnumberCriteria; 43 import org.archive.crawler.settings.refinements.Refinement; 44 import org.archive.crawler.settings.refinements.RegularExpressionCriteria; 45 import org.archive.crawler.settings.refinements.TimespanCriteria; 46 import org.archive.net.UURIFactory; 47 48 54 public class XMLSettingsHandlerTest extends SettingsFrameworkTestCase { 55 56 59 protected void setUp() throws Exception { 60 super.setUp(); 61 } 62 63 66 protected void tearDown() throws Exception { 67 super.tearDown(); 68 } 69 70 73 public void testWriteSettingsObjectCrawlerSettings() 74 throws AttributeNotFoundException , InvalidAttributeValueException , 75 MBeanException , ReflectionException { 76 77 CrawlerSettings settings = getGlobalSettings(); 79 XMLSettingsHandler handler = getSettingsHandler(); 80 handler.registerValueErrorHandler(this); 81 handler.getOrder().setAttribute(new ClassicScope()); 82 handler.writeSettingsObject(settings); 83 assertTrue("Order file was not written", getOrderFile().exists()); 84 85 ComplexType scope = settings.getModule(CrawlScope.ATTR_NAME); 87 assertNotNull("Could not get module scope", scope); 88 89 CrawlerSettings perHost = getPerHostSettings(); 91 Integer newHops = new Integer (500); 92 String newFrom = "newfrom"; 93 scope.setAttribute(perHost, new Attribute ( 94 ClassicScope.ATTR_MAX_LINK_HOPS, newHops)); 95 CrawlOrder order = handler.getOrder(); 96 ComplexType httpHeaders = (ComplexType) order 97 .getAttribute(CrawlOrder.ATTR_HTTP_HEADERS); 98 httpHeaders.setAttribute(perHost, new Attribute (CrawlOrder.ATTR_FROM, 99 newFrom)); 100 101 handler.writeSettingsObject(perHost); 103 assertTrue("Per host file was not written", handler.settingsToFilename( 104 perHost).exists()); 105 106 XMLSettingsHandler newHandler = new XMLSettingsHandler(getOrderFile()); 108 newHandler.initialize(); 109 110 CrawlerSettings newPerHost = newHandler.getSettingsObject(perHost 112 .getScope()); 113 assertNotNull("Per host scope could not be read", newPerHost); 114 115 ComplexType newScope = newHandler.getModule(CrawlScope.ATTR_NAME); 116 assertNotNull(newScope); 117 Integer r1 = (Integer ) newScope.getAttribute(newPerHost, 118 ClassicScope.ATTR_MAX_LINK_HOPS); 119 assertEquals(newHops, r1); 120 121 ComplexType newHttpHeaders = (ComplexType) newHandler.getOrder() 122 .getAttribute(newPerHost, CrawlOrder.ATTR_HTTP_HEADERS); 123 assertNotNull(newHttpHeaders); 124 125 String r2 = (String ) newHttpHeaders.getAttribute(newPerHost, 126 CrawlOrder.ATTR_FROM); 127 assertEquals(newFrom, r2); 128 } 129 130 135 public void testCopySettings() throws IOException { 136 138 XMLSettingsHandler handler = getSettingsHandler(); 140 handler.writeSettingsObject(getGlobalSettings()); 141 handler.writeSettingsObject(getPerHostSettings()); 142 143 File newOrderFile = new File (getTmpDir(), "SETTINGS_new_order.xml"); 145 String newSettingsDir = "SETTINGS_new_per_host_settings"; 146 handler.copySettings(newOrderFile, newSettingsDir); 147 148 assertTrue("Order file was not written", newOrderFile.exists()); 150 151 assertTrue("New settings dir not set", handler.settingsToFilename( 152 getPerHostSettings()).getAbsolutePath().matches( 153 ".*" + newSettingsDir + ".*")); 154 assertTrue("Per host file was not written", handler.settingsToFilename( 155 getPerHostSettings()).exists()); 156 } 157 158 public void testGetSettings() { 159 XMLSettingsHandler handler = getSettingsHandler(); 160 CrawlerSettings order = handler.getSettingsObject(null); 161 CrawlerSettings perHost = handler.getSettings("localhost.localdomain"); 162 assertNotNull("Didn't get any file", perHost); 163 assertSame("Did not get same file", order, perHost); 164 } 165 166 public void testGetSettingsObject() { 167 String testScope = "audio.archive.org"; 168 169 XMLSettingsHandler handler = getSettingsHandler(); 170 assertNotNull("Couldn't get orderfile", handler.getSettingsObject(null)); 171 assertNull("Got nonexisting per host file", handler 172 .getSettingsObject(testScope)); 173 assertNotNull("Couldn't create per host file", handler 174 .getOrCreateSettingsObject(testScope)); 175 assertNotNull("Couldn't get per host file", handler 176 .getSettingsObject(testScope)); 177 } 178 179 public void testDeleteSettingsObject() { 180 XMLSettingsHandler handler = getSettingsHandler(); 181 File file = handler.settingsToFilename(getPerHostSettings()); 182 handler.writeSettingsObject(getPerHostSettings()); 183 assertTrue("Per host file was not written", file.exists()); 184 handler.deleteSettingsObject(getPerHostSettings()); 185 assertFalse("Per host file was not deleted", file.exists()); 186 } 187 188 public void testReadWriteRefinements() throws ParseException , 189 InvalidAttributeValueException , AttributeNotFoundException , 190 MBeanException , ReflectionException , URIException { 191 XMLSettingsHandler handler = getSettingsHandler(); 192 CrawlerSettings global = getGlobalSettings(); 193 CrawlerSettings per = getPerHostSettings(); 194 ComplexType headers = (ComplexType) handler.getOrder().getAttribute( 195 CrawlOrder.ATTR_HTTP_HEADERS); 196 197 String globalFrom = (String ) headers.getAttribute(CrawlOrder.ATTR_FROM); 198 String refinedGlobalFrom = "refined@global.address"; 199 String refinedPerFrom = "refined@per.address"; 200 201 Refinement globalRefinement = new Refinement(global, "test", 203 "Refinement test"); 204 Criteria timespanCriteria = new TimespanCriteria("2300", "2300"); 205 globalRefinement.addCriteria(timespanCriteria); 206 Criteria regexpCriteria = new RegularExpressionCriteria(".*www.*"); 207 globalRefinement.addCriteria(regexpCriteria); 208 handler.writeSettingsObject(global); 209 210 CrawlerSettings globalRefinementSetting = globalRefinement 212 .getSettings(); 213 headers.setAttribute(globalRefinementSetting, new Attribute ( 214 CrawlOrder.ATTR_FROM, refinedGlobalFrom)); 215 handler.writeSettingsObject(globalRefinementSetting); 216 217 Refinement perRefinement = new Refinement(per, "test2", 219 "Refinement test2"); 220 Criteria portCriteria = new PortnumberCriteria("10"); 221 perRefinement.addCriteria(portCriteria); 222 handler.writeSettingsObject(per); 223 224 CrawlerSettings perRefinementSetting = perRefinement.getSettings(); 226 headers.setAttribute(perRefinementSetting, new Attribute ( 227 CrawlOrder.ATTR_FROM, refinedPerFrom)); 228 handler.writeSettingsObject(perRefinementSetting); 229 230 XMLSettingsHandler newHandler = new XMLSettingsHandler(getOrderFile()); 232 newHandler.initialize(); 233 CrawlerSettings newGlobal = newHandler.getSettingsObject(null); 234 assertNotNull("Global scope could not be read", newGlobal); 235 CrawlerSettings newPer = newHandler.getSettingsObject(per.getScope()); 236 assertNotNull("Per host scope could not be read", newPer); 237 238 ComplexType newHeaders = (ComplexType) newHandler.getOrder() 239 .getAttribute(CrawlOrder.ATTR_HTTP_HEADERS); 240 assertNotNull(newHeaders); 241 242 String newFrom1 = (String ) newHeaders.getAttribute( 243 CrawlOrder.ATTR_FROM, getMatchDomainURI()); 244 String newFrom2 = (String ) newHeaders.getAttribute( 245 CrawlOrder.ATTR_FROM, getMatchHostURI()); 246 CrawlURI matchHostAndPortURI = new CrawlURI( 247 UURIFactory.getInstance("http://www.archive.org:10/index.html")); 248 String newFrom3 = (String ) newHeaders.getAttribute( 249 CrawlOrder.ATTR_FROM, matchHostAndPortURI); 250 251 assertEquals(globalFrom, newFrom1); 253 assertEquals(refinedGlobalFrom, newFrom2); 254 assertEquals(refinedPerFrom, newFrom3); 255 } 256 } | Popular Tags |