1 2 3 4 package net.nutch.segment; 5 6 import java.io.File ; 7 import java.io.IOException ; 8 import java.text.SimpleDateFormat ; 9 import java.util.Date ; 10 import java.util.logging.Logger ; 11 12 import net.nutch.fetcher.FetcherOutput; 13 import net.nutch.io.ArrayFile; 14 import net.nutch.fs.*; 15 import net.nutch.parse.ParseData; 16 import net.nutch.parse.ParseText; 17 import net.nutch.protocol.Content; 18 import net.nutch.util.LogFormatter; 19 20 26 public class SegmentWriter { 27 public static final Logger LOG = LogFormatter.getLogger("net.nutch.segment.SegmentWriter"); 28 29 public ArrayFile.Writer fetcherWriter; 30 public ArrayFile.Writer contentWriter; 31 public ArrayFile.Writer parseTextWriter; 32 public ArrayFile.Writer parseDataWriter; 33 34 public long size = 0L; 35 36 public File segmentDir; 37 38 public SegmentWriter(File dir, boolean force) throws Exception { 39 this(new LocalFileSystem(), dir, force, true, true, true, true); 40 } 41 42 public SegmentWriter(NutchFileSystem nfs, File dir, boolean force) throws Exception { 43 this(nfs, dir, force, true, true, true, true); 44 } 45 46 public SegmentWriter(File dir, boolean force, boolean isParsed) throws Exception { 47 this(new LocalFileSystem(), dir, force, isParsed, true, true, true); 48 } 49 50 public SegmentWriter(NutchFileSystem nfs, File dir, boolean force, boolean isParsed) throws Exception { 51 this(nfs, dir, force, isParsed, true, true, true); 52 } 53 54 75 public SegmentWriter(NutchFileSystem nfs, File dir, boolean force, boolean isParsed, 76 boolean withContent, boolean withParseText, boolean withParseData) throws Exception { 77 segmentDir = dir; 78 if (!nfs.exists(segmentDir)) { 79 nfs.mkdirs(segmentDir); 80 } 81 File out = null; 82 if (isParsed) { 83 out = new File (segmentDir, FetcherOutput.DIR_NAME); 84 } else { 85 out = new File (segmentDir, FetcherOutput.DIR_NAME_NP); 86 withParseData = false; 87 withParseText = false; 88 } 89 if (nfs.exists(out) && !force) { 90 throw new Exception ("Output directory " + out + " already exists."); 91 } 92 fetcherWriter = new ArrayFile.Writer(nfs, out.toString(), FetcherOutput.class); 93 if (withContent) { 94 out = new File (dir, Content.DIR_NAME); 95 if (nfs.exists(out) && !force) { 96 throw new Exception ("Output directory " + out + " already exists."); 97 } 98 contentWriter = new ArrayFile.Writer(nfs, out.toString(), Content.class); 99 } 100 if (withParseText) { 101 out = new File (dir, ParseText.DIR_NAME); 102 if (nfs.exists(out) && !force) { 103 throw new Exception ("Output directory " + out + " already exists."); 104 } 105 parseTextWriter = new ArrayFile.Writer(nfs, out.toString(), ParseText.class); 106 } 107 if (withParseData) { 108 out = new File (dir, ParseData.DIR_NAME); 109 if (nfs.exists(out) && !force) { 110 throw new Exception ("Output directory " + out + " already exists."); 111 } 112 parseDataWriter = new ArrayFile.Writer(nfs, out.toString(), ParseData.class); 113 } 114 } 115 116 117 public static String getNewSegmentName() { 118 return new SimpleDateFormat ("yyyyMMddHHmmss").format(new Date (System.currentTimeMillis())); 119 } 120 121 122 public synchronized void setIndexInterval(int interval) throws IOException { 123 fetcherWriter.setIndexInterval(interval); 124 if (contentWriter != null) contentWriter.setIndexInterval(interval); 125 if (parseTextWriter != null) parseTextWriter.setIndexInterval(interval); 126 if (parseDataWriter != null) parseDataWriter.setIndexInterval(interval); 127 } 128 129 private Content _co = new Content(); 130 private ParseText _pt = new ParseText(); 131 private ParseData _pd = new ParseData(); 132 133 143 public synchronized void append(FetcherOutput fo, Content co, ParseText pt, ParseData pd) throws IOException { 144 fetcherWriter.append(fo); 145 if (contentWriter != null) { 146 if (co == null) co = _co; 147 contentWriter.append(co); 148 } 149 if (parseTextWriter != null) { 150 if (pt == null) pt = _pt; 151 parseTextWriter.append(pt); 152 } 153 if (parseDataWriter != null) { 154 if (pd == null) pd = _pd; 155 parseDataWriter.append(pd); 156 } 157 size++; 158 } 159 160 161 public void close() { 162 try { 163 fetcherWriter.close(); 164 } catch (Exception e) { 165 LOG.fine("Exception closing fetcherWriter: " + e.getMessage()); 166 } 167 if (contentWriter != null) try { 168 contentWriter.close(); 169 } catch (Exception e) { 170 LOG.fine("Exception closing contentWriter: " + e.getMessage()); 171 } 172 if (parseTextWriter != null) try { 173 parseTextWriter.close(); 174 } catch (Exception e) { 175 LOG.fine("Exception closing parseTextWriter: " + e.getMessage()); 176 } 177 if (parseDataWriter != null) try { 178 parseDataWriter.close(); 179 } catch (Exception e) { 180 LOG.fine("Exception closing parseDataWriter: " + e.getMessage()); 181 } 182 } 183 184 public static void main(String [] args) {} 185 } 186 | Popular Tags |