KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > segment > SegmentWriter


1 /* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.segment;
5
6 import java.io.File JavaDoc;
7 import java.io.IOException JavaDoc;
8 import java.text.SimpleDateFormat JavaDoc;
9 import java.util.Date JavaDoc;
10 import java.util.logging.Logger JavaDoc;
11
12 import net.nutch.fetcher.FetcherOutput;
13 import net.nutch.io.ArrayFile;
14 import net.nutch.fs.*;
15 import net.nutch.parse.ParseData;
16 import net.nutch.parse.ParseText;
17 import net.nutch.protocol.Content;
18 import net.nutch.util.LogFormatter;
19
20 /**
21  * This class holds together all data writers for a new segment.
22  * Some convenience methods are also provided, to append to the segment.
23  *
24  * @author Andrzej Bialecki <ab@getopt.org>
25  */

26 public class SegmentWriter {
27   public static final Logger JavaDoc LOG = LogFormatter.getLogger("net.nutch.segment.SegmentWriter");
28   
29   public ArrayFile.Writer fetcherWriter;
30   public ArrayFile.Writer contentWriter;
31   public ArrayFile.Writer parseTextWriter;
32   public ArrayFile.Writer parseDataWriter;
33
34   public long size = 0L;
35   
36   public File JavaDoc segmentDir;
37
38   public SegmentWriter(File JavaDoc dir, boolean force) throws Exception JavaDoc {
39     this(new LocalFileSystem(), dir, force, true, true, true, true);
40   }
41   
42   public SegmentWriter(NutchFileSystem nfs, File JavaDoc dir, boolean force) throws Exception JavaDoc {
43     this(nfs, dir, force, true, true, true, true);
44   }
45   
46   public SegmentWriter(File JavaDoc dir, boolean force, boolean isParsed) throws Exception JavaDoc {
47     this(new LocalFileSystem(), dir, force, isParsed, true, true, true);
48   }
49   
50   public SegmentWriter(NutchFileSystem nfs, File JavaDoc dir, boolean force, boolean isParsed) throws Exception JavaDoc {
51     this(nfs, dir, force, isParsed, true, true, true);
52   }
53   
54   /**
55    * Open a segment for writing. When a segment is open, its data files are created.
56    *
57    * @param nfs NutchFileSystem to use
58    * @param dir directory to contain the segment data
59    * @param force if true, and segment directory already exists and its content
60    * is in the way, sliently overwrite that content as needed.
61    * If false and the above condition arises, throw an Exception. Note: this
62    * doesn't result in an Exception, if force=false, and the target directory
63    * already exists, but contains other data not conflicting with the segment
64    * data.
65    * @param isParsed if true, create a segment with parseData and parseText; otherwise
66    * create a segment without them, and with the fetcher output located in
67    * {@link FetcherOutput#DIR_NAME_NP} directory.
68    * @param withContent if true, write Content, otherwise ignore it
69    * @param withParseText if true, write ParseText, otherwise ignore it. NOTE: if isParsed is
70    * false, this will be automaticaly set to false, too.
71    * @param withParseData if true, write ParseData, otherwise ignore it. NOTE: if isParsed is
72    * false, this will be automaticaly set to false, too.
73    * @throws Exception
74    */

75   public SegmentWriter(NutchFileSystem nfs, File JavaDoc dir, boolean force, boolean isParsed,
76           boolean withContent, boolean withParseText, boolean withParseData) throws Exception JavaDoc {
77     segmentDir = dir;
78     if (!nfs.exists(segmentDir)) {
79       nfs.mkdirs(segmentDir);
80     }
81     File JavaDoc out = null;
82     if (isParsed) {
83       out = new File JavaDoc(segmentDir, FetcherOutput.DIR_NAME);
84     } else {
85       out = new File JavaDoc(segmentDir, FetcherOutput.DIR_NAME_NP);
86       withParseData = false;
87       withParseText = false;
88     }
89     if (nfs.exists(out) && !force) {
90       throw new Exception JavaDoc("Output directory " + out + " already exists.");
91     }
92     fetcherWriter = new ArrayFile.Writer(nfs, out.toString(), FetcherOutput.class);
93     if (withContent) {
94       out = new File JavaDoc(dir, Content.DIR_NAME);
95       if (nfs.exists(out) && !force) {
96         throw new Exception JavaDoc("Output directory " + out + " already exists.");
97       }
98       contentWriter = new ArrayFile.Writer(nfs, out.toString(), Content.class);
99     }
100     if (withParseText) {
101       out = new File JavaDoc(dir, ParseText.DIR_NAME);
102       if (nfs.exists(out) && !force) {
103         throw new Exception JavaDoc("Output directory " + out + " already exists.");
104       }
105       parseTextWriter = new ArrayFile.Writer(nfs, out.toString(), ParseText.class);
106     }
107     if (withParseData) {
108       out = new File JavaDoc(dir, ParseData.DIR_NAME);
109       if (nfs.exists(out) && !force) {
110         throw new Exception JavaDoc("Output directory " + out + " already exists.");
111       }
112       parseDataWriter = new ArrayFile.Writer(nfs, out.toString(), ParseData.class);
113     }
114   }
115
116   /** Create a new segment name */
117   public static String JavaDoc getNewSegmentName() {
118     return new SimpleDateFormat JavaDoc("yyyyMMddHHmmss").format(new Date JavaDoc(System.currentTimeMillis()));
119   }
120
121   /** Sets the index interval for all segment writers. */
122   public synchronized void setIndexInterval(int interval) throws IOException JavaDoc {
123     fetcherWriter.setIndexInterval(interval);
124     if (contentWriter != null) contentWriter.setIndexInterval(interval);
125     if (parseTextWriter != null) parseTextWriter.setIndexInterval(interval);
126     if (parseDataWriter != null) parseDataWriter.setIndexInterval(interval);
127   }
128
129   private Content _co = new Content();
130   private ParseText _pt = new ParseText();
131   private ParseData _pd = new ParseData();
132   
133   /**
134    * Append new values to the output segment.
135    * <p>NOTE: if this segment writer has some data files open, but the respective
136    * arguments are null, empty values will be written instead.</p>
137    * @param fo fetcher output, must not be null
138    * @param co content, may be null (but see the note above)
139    * @param pt parseText, may be null (but see the note above)
140    * @param pd parseData, may be null (but see the note above)
141    * @throws IOException
142    */

143   public synchronized void append(FetcherOutput fo, Content co, ParseText pt, ParseData pd) throws IOException JavaDoc {
144     fetcherWriter.append(fo);
145     if (contentWriter != null) {
146       if (co == null) co = _co;
147       contentWriter.append(co);
148     }
149     if (parseTextWriter != null) {
150       if (pt == null) pt = _pt;
151       parseTextWriter.append(pt);
152     }
153     if (parseDataWriter != null) {
154       if (pd == null) pd = _pd;
155       parseDataWriter.append(pd);
156     }
157     size++;
158   }
159   
160   /** Close all writers. */
161   public void close() {
162     try {
163       fetcherWriter.close();
164     } catch (Exception JavaDoc e) {
165       LOG.fine("Exception closing fetcherWriter: " + e.getMessage());
166     }
167     if (contentWriter != null) try {
168       contentWriter.close();
169     } catch (Exception JavaDoc e) {
170       LOG.fine("Exception closing contentWriter: " + e.getMessage());
171     }
172     if (parseTextWriter != null) try {
173       parseTextWriter.close();
174     } catch (Exception JavaDoc e) {
175       LOG.fine("Exception closing parseTextWriter: " + e.getMessage());
176     }
177     if (parseDataWriter != null) try {
178       parseDataWriter.close();
179     } catch (Exception JavaDoc e) {
180       LOG.fine("Exception closing parseDataWriter: " + e.getMessage());
181     }
182   }
183
184   public static void main(String JavaDoc[] args) {}
185 }
186
Popular Tags