1 2 3 package net.nutch.db; 4 5 import java.io.*; 6 import java.util.*; 7 import net.nutch.io.*; 8 9 16 public class DBKeyDivision { 17 public static String [] URL_KEYSPACE_DIVIDERS = {"", 24 "http://devrando", 25 "http://hoaxinh.", 26 "http://magyn.co", 27 "http://osha-enf", 28 "http://templeem", 29 "http://wiem.one", 30 "http://www.aero", 31 "http://www.anth", 32 "http://www.baro", 33 "http://www.bruc", 34 "http://www.chen", 35 "http://www.corh", 36 "http://www.dive", 37 "http://www.envi", 38 "http://www.fort", 39 "http://www.geoc", 40 "http://www.gree", 41 "http://www.htmi", 42 "http://www.j103", 43 "http://www.lake", 44 "http://www.masc", 45 "http://www.mrmp", 46 "http://www.njdr", 47 "http://www.pbs.", 48 "http://www.quee", 49 "http://www.saba", 50 "http://www.skee", 51 "http://www.surg", 52 "http://www.tony", 53 "http://www.uvm.", 54 "http://www.wigg"}; 55 56 public static MD5Hash[] MD5_KEYSPACE_DIVIDERS = 60 {new MD5Hash("00000000000000000000000000000000"), 61 new MD5Hash("08000000000000000000000000000000"), 62 new MD5Hash("10000000000000000000000000000000"), 63 new MD5Hash("18000000000000000000000000000000"), 64 new MD5Hash("20000000000000000000000000000000"), 65 new MD5Hash("28000000000000000000000000000000"), 66 new MD5Hash("30000000000000000000000000000000"), 67 new MD5Hash("38000000000000000000000000000000"), 68 new MD5Hash("40000000000000000000000000000000"), 69 new MD5Hash("48000000000000000000000000000000"), 70 new MD5Hash("50000000000000000000000000000000"), 71 new MD5Hash("58000000000000000000000000000000"), 72 new MD5Hash("60000000000000000000000000000000"), 73 new MD5Hash("68000000000000000000000000000000"), 74 new MD5Hash("70000000000000000000000000000000"), 75 new MD5Hash("78000000000000000000000000000000"), 76 new MD5Hash("80000000000000000000000000000000"), 77 new MD5Hash("88000000000000000000000000000000"), 78 new MD5Hash("90000000000000000000000000000000"), 79 new MD5Hash("98000000000000000000000000000000"), 80 new MD5Hash("a0000000000000000000000000000000"), 81 new MD5Hash("a8000000000000000000000000000000"), 82 new MD5Hash("b0000000000000000000000000000000"), 83 new MD5Hash("b8000000000000000000000000000000"), 84 new MD5Hash("c0000000000000000000000000000000"), 85 new MD5Hash("c8000000000000000000000000000000"), 86 new MD5Hash("d0000000000000000000000000000000"), 87 new MD5Hash("d8000000000000000000000000000000"), 88 new MD5Hash("e0000000000000000000000000000000"), 89 new MD5Hash("e8000000000000000000000000000000"), 90 new MD5Hash("f0000000000000000000000000000000"), 91 new MD5Hash("f8000000000000000000000000000000")}; 92 93 public static int MAX_SECTIONS = URL_KEYSPACE_DIVIDERS.length; 99 100 104 public static int findURLSection(String url, int numSections) { 105 if (numSections > URL_KEYSPACE_DIVIDERS.length) { 106 throw new IllegalArgumentException ("Too many db sections. " + numSections + " is greater than max of " + URL_KEYSPACE_DIVIDERS.length); 107 } 108 109 return binarySearch(url, URL_KEYSPACE_DIVIDERS, numSections); 110 } 111 112 116 public static int findMD5Section(MD5Hash md5, int numSections) { 117 if (numSections > MD5_KEYSPACE_DIVIDERS.length) { 118 throw new IllegalArgumentException ("Too many db sections. " + numSections + " is greater than max of " + MD5_KEYSPACE_DIVIDERS.length); 119 } 120 121 return binarySearch(md5, MD5_KEYSPACE_DIVIDERS, numSections); 122 } 123 124 127 static int binarySearch(Comparable obj, Comparable keyArray[], int numSections) { 128 double stepSize = keyArray.length / (numSections * 1.0); 129 int lastBest = -1; 130 int low = 0; 131 int high = numSections - 1; 132 133 while (low <= high) { 134 int mid = (low + high) / 2; 135 Comparable midObj = keyArray[(int) Math.round(mid * stepSize)]; 136 int cmp = obj.compareTo(midObj); 137 if (cmp < 0) { 138 high = mid - 1; 139 } else if (cmp > 0) { 140 lastBest = mid; 141 low = mid + 1; 142 } else { 143 return mid; 144 } 145 } 146 return lastBest; 147 } 148 149 } 150 | Popular Tags |