1 2 3 package net.nutch.db; 4 5 import java.io.*; 6 import java.util.*; 7 8 import net.nutch.io.*; 9 import net.nutch.fs.*; 10 import net.nutch.util.*; 11 12 23 public class DBSectionReader { 24 NutchFileSystem nfs; 25 File sectionFile; 26 WritableComparator comparator; 27 MapFile.Reader reader; 28 29 37 public DBSectionReader(NutchFileSystem nfs, File sectionFile, WritableComparator comparator) throws IOException { 38 this.nfs = nfs; 39 this.sectionFile = sectionFile; 40 this.comparator = comparator; 41 this.reader = new MapFile.Reader(nfs, sectionFile.getPath(), comparator); 42 } 43 44 48 public Page getPage(UTF8 url, Page p) throws IOException { 49 return (Page) reader.get(url, p); 50 } 51 52 56 public Vector getPages(MD5Hash md5) throws IOException { 57 Vector records = new Vector(3); 58 Page p = new Page(); 59 p.getMD5().set(md5); 60 61 reader.seek(p); 62 while (reader.next(p, NullWritable.get())) { 63 if (p.getMD5().compareTo(md5) == 0) { 64 records.add(p); 65 p = new Page(); 66 } else { 67 break; 68 } 69 } 70 71 return records; 72 } 73 74 78 public boolean pageExists(MD5Hash md5) throws IOException { 79 Page p = new Page(); 80 p.getMD5().set(md5); 81 reader.seek(p); 82 if (reader.next(p, NullWritable.get()) && p.getMD5().compareTo(md5) == 0) { 83 return true; 84 } else { 85 return false; 86 } 87 } 88 89 92 public Enumeration pages() throws IOException { 93 return new TableEnumerator(new MapFile.Reader(nfs, sectionFile.getPath(), comparator)); 94 } 95 96 class TableEnumerator implements Enumeration { 101 MapFile.Reader reader; 102 Page nextItem; 103 104 108 public TableEnumerator(MapFile.Reader reader) { 109 this.reader = reader; 110 this.nextItem = new Page(); 111 try { 112 if (! reader.next(new UTF8(), this.nextItem)) { 113 this.nextItem = null; 114 } 115 } catch (IOException ie) { 116 ie.printStackTrace(); 117 this.nextItem = null; 118 } 119 } 120 121 124 public boolean hasMoreElements() { 125 return (nextItem != null); 126 } 127 128 133 public Object nextElement() { 134 if (nextItem == null) { 135 throw new NoSuchElementException("PageDB Enumeration"); 136 } 137 Page toReturn = nextItem; 138 this.nextItem = new Page(); 139 try { 140 if (! reader.next(new UTF8(), nextItem)) { 141 this.nextItem = null; 142 } 143 } catch (IOException ie) { 144 this.nextItem = null; 145 } 146 return toReturn; 147 } 148 } 149 150 153 public Enumeration pagesByMD5() throws IOException { 154 return new IndexEnumerator(new SetFile.Reader(nfs, sectionFile.getPath(), comparator)); 155 } 156 157 class IndexEnumerator implements Enumeration { 162 SetFile.Reader reader; 163 Page nextItem; 164 165 169 public IndexEnumerator(SetFile.Reader reader) { 170 this.reader = reader; 171 this.nextItem = new Page(); 172 try { 173 if (! reader.next(nextItem)) { 174 this.nextItem = null; 175 } 176 } catch (IOException ie) { 177 this.nextItem = null; 178 } 179 } 180 181 184 public boolean hasMoreElements() { 185 return (nextItem != null); 186 } 187 188 193 public Object nextElement() { 194 if (nextItem == null) { 195 throw new NoSuchElementException("PageDB Enumeration"); 196 } 197 198 Page toReturn = nextItem; 199 this.nextItem = new Page(); 200 try { 201 if (! reader.next(nextItem)) { 202 this.nextItem = null; 203 } 204 } catch (IOException ie) { 205 this.nextItem = null; 206 } 207 return toReturn; 208 } 209 } 210 211 214 public Vector getLinks(UTF8 url) throws IOException { 215 Vector records = new Vector(3); 216 Link l = new Link(); 217 l.getURL().set(url); 218 219 reader.seek(l); 220 while (reader.next(l, NullWritable.get())) { 221 if (url.equals(l.getURL())) { 222 records.add(l); 223 l = new Link(); 224 } else { 225 break; 226 } 227 } 228 229 return records; 230 } 231 232 235 public Vector getLinks(MD5Hash md5) throws IOException { 236 Vector records = new Vector(3); 237 Link l = new Link(); 238 l.getFromID().set(md5); 239 240 reader.seek(l); 241 while (reader.next(l, NullWritable.get())) { 242 if (md5.equals(l.getFromID())) { 243 records.add(l); 244 l = new Link(); 245 } else { 246 break; 247 } 248 } 249 250 return records; 251 } 252 253 256 public Enumeration links() throws IOException { 257 return new MapEnumerator(new MapFile.Reader(nfs, sectionFile.getPath(), comparator)); 258 } 259 260 class MapEnumerator implements Enumeration { 264 MapFile.Reader reader; 265 Link nextItem; 266 267 271 public MapEnumerator(MapFile.Reader reader) { 272 this.reader = reader; 273 this.nextItem = new Link(); 274 try { 275 if (! reader.next(this.nextItem, NullWritable.get())) { 276 this.nextItem = null; 277 } 278 } catch (IOException ie) { 279 this.nextItem = null; 280 } 281 } 282 283 286 public boolean hasMoreElements() { 287 return (nextItem != null); 288 } 289 290 295 public Object nextElement() { 296 if (nextItem == null) { 297 throw new NoSuchElementException("PageDB Enumeration"); 298 } 299 300 Link toReturn = nextItem; 301 this.nextItem = new Link(); 302 try { 303 if (! reader.next(nextItem, NullWritable.get())) { 304 this.nextItem = null; 305 } 306 } catch (IOException ie) { 307 this.nextItem = null; 308 } 309 return toReturn; 310 } 311 } 312 313 315 public void close() throws IOException { 316 reader.close(); 317 } 318 } 319 | Popular Tags |