1 25 package org.archive.crawler.util; 26 27 import it.unimi.dsi.fastutil.longs.LongIterators; 28 import it.unimi.dsi.fastutil.longs.LongIterator; 29 30 import java.io.BufferedInputStream ; 31 import java.io.BufferedOutputStream ; 32 import java.io.DataInputStream ; 33 import java.io.DataOutputStream ; 34 import java.io.File ; 35 import java.io.FileInputStream ; 36 import java.io.FileNotFoundException ; 37 import java.io.FileOutputStream ; 38 import java.io.IOException ; 39 import java.util.NoSuchElementException ; 40 41 import org.archive.util.ArchiveUtils; 42 43 49 public class DiskFPMergeUriUniqFilter extends FPMergeUriUniqFilter { 50 long count = 0; 51 File scratchDir; 52 File currentFps; 53 File newFpsFile; 54 DataOutputStream newFps; 55 long newCount; 56 DataInputStream oldFps; 57 58 public DiskFPMergeUriUniqFilter(File scratchDir) { 59 super(); 60 this.scratchDir = scratchDir; 61 } 64 65 68 protected LongIterator beginFpMerge() { 69 newFpsFile = new File (scratchDir,ArchiveUtils.get17DigitDate()+".fp"); 70 if(newFpsFile.exists()) { 71 throw new RuntimeException (newFpsFile+" exists"); 72 } 73 try { 74 newFps = new DataOutputStream (new BufferedOutputStream (new FileOutputStream (newFpsFile))); 75 } catch (FileNotFoundException e) { 76 throw new RuntimeException (e); 77 } 78 newCount = 0; 79 if(currentFps==null) { 80 return LongIterators.EMPTY_ITERATOR; 81 } 82 try { 83 oldFps = new DataInputStream (new BufferedInputStream (new FileInputStream (currentFps))); 84 } catch (FileNotFoundException e1) { 85 throw new RuntimeException (e1); 86 } 87 return new DataFileLongIterator(oldFps); 88 } 89 90 93 protected void addNewFp(long fp) { 94 try { 95 newFps.writeLong(fp); 96 newCount++; 97 } catch (IOException e) { 98 throw new RuntimeException (e); 99 } 100 } 101 102 105 protected void finishFpMerge() { 106 try { 107 newFps.close(); 108 File oldFpsFile = currentFps; 109 currentFps = newFpsFile; 110 if(oldFps!=null) { 111 oldFps.close(); 112 } 113 if(oldFpsFile!=null) { 114 oldFpsFile.delete(); 115 } 116 } catch (IOException e) { 117 throw new RuntimeException (e); 118 } 119 count = newCount; 120 } 121 122 125 public long count() { 126 return count; 127 } 128 129 public class DataFileLongIterator implements LongIterator { 130 DataInputStream in; 131 long next; 132 boolean nextIsValid = false; 133 134 140 public DataFileLongIterator(DataInputStream disStream) { 141 this.in = disStream; 142 } 143 144 150 public boolean hasNext() { 151 return nextIsValid ? true: lookahead(); 152 } 153 154 159 protected boolean lookahead() { 160 try { 161 next = in.readLong(); 162 } catch (IOException e) { 163 return false; 164 } 165 nextIsValid = true; 166 return true; 167 } 168 169 174 public Long next() { 175 if (!hasNext()) { 176 throw new NoSuchElementException (); 177 } 178 Long returnObj = new Long (this.next); 180 this.nextIsValid = false; 181 return returnObj; 182 } 183 184 187 public void remove() { 188 throw new UnsupportedOperationException (); 189 } 190 191 192 195 public long nextLong() { 196 if (!hasNext()) { 197 throw new NoSuchElementException (); 198 } 199 this.nextIsValid = false; return this.next; 202 } 203 204 207 public int skip(int arg0) { 208 return 0; 209 } 210 } 211 212 } 213 | Popular Tags |