1 23 package org.archive.crawler.url; 24 25 import java.util.Iterator ; 26 import java.util.logging.Logger ; 27 import java.util.logging.Level ; 28 29 import javax.management.AttributeNotFoundException ; 30 31 import org.archive.crawler.datamodel.CrawlOrder; 32 import org.archive.crawler.settings.MapType; 33 import org.archive.net.UURI; 34 35 40 public class Canonicalizer { 41 private static Logger logger = 42 Logger.getLogger(Canonicalizer.class.getName()); 43 44 49 private Canonicalizer() { 50 super(); 51 } 52 53 60 public static String canonicalize(UURI uuri, CrawlOrder order) { 61 MapType rules = null; 62 String canonical = uuri.toString(); 63 try { 64 rules = (MapType)order.getAttribute(uuri, CrawlOrder.ATTR_RULES); 65 canonical = Canonicalizer.canonicalize(uuri, rules.iterator(uuri)); 66 } catch (AttributeNotFoundException e) { 67 logger.warning("Failed canonicalization of " + canonical + 68 ": " + e); 69 } 70 return canonical; 71 } 72 73 81 public static String canonicalize(UURI uuri, Iterator rules) { 82 String before = uuri.toString(); 83 String canonical = before; 85 for (; rules.hasNext();) { 86 CanonicalizationRule r = (CanonicalizationRule)rules.next(); 87 if (!r.isEnabled(uuri)) { 91 if (logger.isLoggable(Level.FINER)) { 92 logger.finer("Rule " + r.getName() + " is disabled."); 93 } 94 continue; 95 } 96 canonical = r.canonicalize(canonical, uuri); 97 if (logger.isLoggable(Level.FINER)) { 98 logger.finer("Rule " + r.getName() + " " + before + " => " + 99 canonical); 100 } 101 } 102 if (logger.isLoggable(Level.INFO)) { 103 logger.fine(before + " => " + canonical); 104 } 105 return canonical; 106 } 107 } 108 | Popular Tags |