1 25 package org.archive.crawler.filter; 26 27 import java.io.File ; 28 import java.io.FileReader ; 29 import java.io.IOException ; 30 31 import org.archive.crawler.deciderules.DecideRule; 32 import org.archive.crawler.deciderules.DecidingFilter; 33 import org.archive.crawler.framework.Filter; 34 import org.archive.crawler.settings.SimpleType; 35 import org.archive.util.SURT; 36 import org.archive.util.SurtPrefixSet; 37 46 public class SurtPrefixFilter extends Filter { 47 48 private static final long serialVersionUID = -6933592892325852022L; 49 50 public static final String ATTR_SURTS_SOURCE_FILE = "surts-source-file"; 51 public static final String ATTR_MATCH_RETURN_VALUE = "if-match-return"; 52 53 SurtPrefixSet surtPrefixes = null; 54 55 58 public SurtPrefixFilter(String name) { 59 super(name, "SURT prefix filter *Deprecated* Use" + 60 "DecidingFilter and equivalent DecideRule instead."); 61 addElementToDefinition( 62 new SimpleType(ATTR_MATCH_RETURN_VALUE, "What to return when " + 63 "a prefix matches.\n", new Boolean (true))); 64 addElementToDefinition( 65 new SimpleType(ATTR_SURTS_SOURCE_FILE, 66 "Source file from which to infer SURT prefixes. Any URLs " + 67 "in file will be converted to the implied SURT prefix, and " + 68 "literal SURT prefixes may be listed on lines beginning " + 69 "with a '+' character.", 70 "")); 71 } 72 73 76 protected synchronized boolean innerAccepts(Object o) { 77 if (surtPrefixes == null) { 78 readPrefixes(); 79 } 80 String s = SURT.fromURI(o.toString()); 81 if(s.startsWith("https:")) { 83 s = "http:"+s.substring(6); 84 } 85 return surtPrefixes.containsPrefixOf(s); 87 } 88 89 private void readPrefixes() { 90 surtPrefixes = new SurtPrefixSet(); 91 String sourcePath = (String ) getUncheckedAttribute(null, 92 ATTR_SURTS_SOURCE_FILE); 93 File source = new File (sourcePath); 94 if (!source.isAbsolute()) { 95 source = new File (getSettingsHandler().getOrder() 96 .getController().getDisk(), sourcePath); 97 } 98 FileReader fr = null; 99 try { 100 fr = new FileReader (source); 101 try { 102 surtPrefixes.importFromMixed(fr,true); 103 } finally { 104 fr.close(); 105 } 106 } catch (IOException e) { 107 e.printStackTrace(); 108 throw new RuntimeException (e); 109 } 110 } 111 112 116 public synchronized void kickUpdate() { 117 super.kickUpdate(); 118 readPrefixes(); 121 } 122 } 123 | Popular Tags |