1 26 27 package org.archive.crawler.deciderules; 28 29 import java.util.logging.Logger ; 30 31 import javax.management.AttributeNotFoundException ; 32 33 import org.archive.crawler.settings.SimpleType; 34 35 44 public class MatchesFilePatternDecideRule extends MatchesRegExpDecideRule { 45 46 private static final long serialVersionUID = -4182743018517062411L; 47 48 private static final Logger logger = 49 Logger.getLogger(MatchesFilePatternDecideRule.class.getName()); 50 public static final String ATTR_USE_PRESET = "use-preset-pattern"; 51 public static final String IMAGES_PATTERNS = 52 ".*(?i)(\\.(bmp|gif|jpe?g|png|tiff?))$"; 53 public static final String AUDIO_PATTERNS = 54 ".*(?i)(\\.(mid|mp2|mp3|mp4|wav))$"; 55 public static final String VIDEO_PATTERNS = 56 ".*(?i)(\\.(avi|mov|mpeg|ram|rm|smil|wmv))$"; 57 public static final String MISC_PATTERNS = 58 ".*(?i)(\\.(doc|pdf|ppt|swf))$"; 59 public static final String ALL_DEFAULT_PATTERNS = 60 ".*(?i)(\\.(bmp|gif|jpe?g|png|tiff?|mid|mp2|mp3|mp4|wav|avi|mov|mpeg" + 61 "|ram|rm|smil|wmv|doc|pdf|ppt|swf))$"; 62 63 public static final String ALL = "All"; 64 public static final String IMAGES = "Images"; 65 public static final String AUDIO = "Audio"; 66 public static final String VIDEO = "Video"; 67 public static final String MISC = "Miscellaneous"; 68 public static final String CUSTOM = "Custom"; 69 70 74 public MatchesFilePatternDecideRule(String name) { 75 super(name); 76 setDescription("MatchesFilePatternDecideRule. Applies its decision " + "to all URIs that end with the specified pattern(s). Anything " + 77 " that does not match is let PASS. " + 78 " Default file patterns are: .avi, .bmp, " + 79 ".doc, .gif, .jp(e)g, .mid, .mov, .mp2, .mp3, .mp4, .mpeg, " + 80 ".pdf, .png, .ppt, .ram, .rm,.smil, .swf, .tif(f), .wav, .wmv. " + 81 "It is also possible to specify a custom regular expression, " + 82 "in which case this behaves exactly like the " + 83 " MatchesRegExpDecideRule. See also " + 84 "NotMatchesFilePatternDecideRule."); 85 86 String [] options = new String [] {ALL, IMAGES, AUDIO, VIDEO, MISC, 87 CUSTOM}; 88 89 addElementToDefinition( 90 new SimpleType(ATTR_USE_PRESET, "URIs that match selected file " + 91 "patterns will have the decision applied. Default file " + 92 "patterns are:\n" + 93 "Images: .bmp, .gif, .jp(e)g, .png, .tif(f)\n" + 94 "Audio: .mid, mp2, .mp3, .mp4, .wav\n" + 95 "Video: .avi, .mov, .mpeg, .ram, .rm, .smil, .wmv\n" + 96 "Miscellaneous: .doc, .pdf, .ppt, .swf\n" + 97 "All: All above patterns\n" + 98 "Choose 'Custom' to specify your own pattern. Preset " + 99 "patterns are case insensitive.", 100 "All", options)); 101 102 addElementToDefinition( 103 new SimpleType(ATTR_REGEXP, "Custom java regular expression. " + 104 "This regular expression will be used instead of the " + 105 "supplied pattern groups for matching. An example " + 106 "of such a regular expression (Miscellaneous): " + 107 ".*(?i)(\\.(doc|pdf|ppt|swf))$ " + 108 "Any arbitrary regular expression may be entered and " + 109 "will be applied to the URI.", "")); 110 } 111 112 119 protected String getRegexp(Object o) { 120 try { 121 String patternType = (String ) getAttribute(o, ATTR_USE_PRESET); 122 if (patternType.equals(ALL)) { 123 return ALL_DEFAULT_PATTERNS; 124 } else if (patternType.equals(IMAGES)) { 125 return IMAGES_PATTERNS; 126 } else if (patternType.equals(AUDIO)) { 127 return AUDIO_PATTERNS; 128 } else if (patternType.equals(VIDEO)) { 129 return VIDEO_PATTERNS; 130 } else if (patternType.equals(MISC)) { 131 return MISC_PATTERNS; 132 } else if (patternType.equals(CUSTOM)) { 133 return super.getRegexp(o); 134 } else { 135 assert false : "Unrecognized pattern type " + patternType 136 + ". Should never happen!"; 137 } 138 } catch (AttributeNotFoundException e) { 139 logger.severe(e.getMessage()); 140 } 141 return null; } 143 } 144 | Popular Tags |