1 25 package org.archive.crawler.frontier; 26 27 import java.util.regex.Matcher ; 28 29 import org.archive.crawler.datamodel.CrawlURI; 30 import org.archive.util.TextUtils; 31 32 43 public class AntiCalendarCostAssignmentPolicy extends WagCostAssignmentPolicy { 44 public static String CALENDARISH = 45 "(?i)(calendar)|(year)|(month)|(day)|(date)|(viewcal)" + 46 "|(\\D19\\d\\d\\D)|(\\D20\\d\\d\\D)|(event)|(yr=)" + 47 "|(calendrier)|(jour)"; 48 49 52 public int costOf(CrawlURI curi) { 53 int cost = super.costOf(curi); 54 Matcher m = TextUtils.getMatcher(CALENDARISH, curi.toString()); 55 if (m.find()) { 56 cost++; 57 } 59 TextUtils.recycleMatcher(m); 60 return cost; 61 } 62 } 63 | Popular Tags |