KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > io > WriterPoolMember


1 /* $Id: WriterPoolMember.java,v 1.14.2.2 2007/01/13 01:31:35 stack-sf Exp $
2  *
3  * Created on July 21st, 2006
4  *
5  * Copyright (C) 2006 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.io;
24
25 import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
26
27 import java.io.File JavaDoc;
28 import java.io.FileOutputStream JavaDoc;
29 import java.io.IOException JavaDoc;
30 import java.io.InputStream JavaDoc;
31 import java.io.OutputStream JavaDoc;
32 import java.text.DecimalFormat JavaDoc;
33 import java.text.NumberFormat JavaDoc;
34 import java.util.Iterator JavaDoc;
35 import java.util.List JavaDoc;
36 import java.util.concurrent.atomic.AtomicInteger JavaDoc;
37 import java.util.logging.Logger JavaDoc;
38 import java.util.zip.GZIPOutputStream JavaDoc;
39
40 import org.archive.util.ArchiveUtils;
41 import org.archive.util.IoUtils;
42 import org.archive.util.TimestampSerialno;
43
44
45 /**
46  * Member of {@link WriterPool}.
47  * Implements rotating off files, file naming with some guarantee of
48  * uniqueness, and position in file. Subclass to pick up functionality for a
49  * particular Writer type.
50  * @author stack
51  * @version $Date: 2007/01/13 01:31:35 $ $Revision: 1.14.2.2 $
52  */

53 public abstract class WriterPoolMember implements ArchiveFileConstants {
54     private final Logger JavaDoc logger = Logger.getLogger(this.getClass().getName());
55     
56     public static final String JavaDoc UTF8 = "UTF-8";
57     
58     /**
59      * Default file prefix.
60      *
61      * Stands for Internet Archive Heritrix.
62      */

63     public static final String JavaDoc DEFAULT_PREFIX = "IAH";
64     
65     /**
66      * Value to interpolate with actual hostname.
67      */

68     public static final String JavaDoc HOSTNAME_VARIABLE = "${HOSTNAME}";
69     
70     /**
71      * Default for file suffix.
72      */

73     public static final String JavaDoc DEFAULT_SUFFIX = HOSTNAME_VARIABLE;
74
75     /**
76      * Reference to file we're currently writing.
77      */

78     private File JavaDoc f = null;
79
80     /**
81      * Output stream for file.
82      */

83     private OutputStream out = null;
84     
85     /**
86      * File output stream.
87      * This is needed so can get at channel to find current position in file.
88      */

89     private FileOutputStream JavaDoc fos;
90     
91     private final boolean compressed;
92     private List JavaDoc<File JavaDoc> writeDirs = null;
93     private String JavaDoc prefix = DEFAULT_PREFIX;
94     private String JavaDoc suffix = DEFAULT_SUFFIX;
95     private final int maxSize;
96     private final String JavaDoc extension;
97
98     /**
99      * Creation date for the current file.
100      * Set by {@link #createFile()}.
101      */

102     private String JavaDoc createTimestamp = "UNSET!!!";
103     
104     /**
105      * A running sequence used making unique file names.
106      */

107     final private AtomicInteger JavaDoc serialNo;
108     
109     /**
110      * Directories round-robin index.
111      */

112     private static int roundRobinIndex = 0;
113
114     /**
115      * NumberFormat instance for formatting serial number.
116      *
117      * Pads serial number with zeros.
118      */

119     private static NumberFormat JavaDoc serialNoFormatter = new DecimalFormat JavaDoc("00000");
120     
121     /**
122      * Constructor.
123      * Takes a stream. Use with caution. There is no upperbound check on size.
124      * Will just keep writing.
125      *
126      * @param serialNo used to create unique filename sequences
127      * @param out Where to write.
128      * @param file File the <code>out</code> is connected to.
129      * @param cmprs Compress the content written.
130      * @param a14DigitDate If null, we'll write current time.
131      * @throws IOException
132      */

133     protected WriterPoolMember(AtomicInteger JavaDoc serialNo,
134             final OutputStream out, final File JavaDoc file,
135             final boolean cmprs, String JavaDoc a14DigitDate)
136     throws IOException JavaDoc {
137         this(serialNo, null, null, cmprs, -1, null);
138         this.out = out;
139         this.f = file;
140     }
141     
142     /**
143      * Constructor.
144      *
145      * @param serialNo used to create unique filename sequences
146      * @param dirs Where to drop files.
147      * @param prefix File prefix to use.
148      * @param cmprs Compress the records written.
149      * @param maxSize Maximum size for ARC files written.
150      * @param extension Extension to give file.
151      */

152     public WriterPoolMember(AtomicInteger JavaDoc serialNo,
153             final List JavaDoc<File JavaDoc> dirs, final String JavaDoc prefix,
154             final boolean cmprs, final int maxSize, final String JavaDoc extension) {
155         this(serialNo, dirs, prefix, "", cmprs, maxSize, extension);
156     }
157             
158     /**
159      * Constructor.
160      *
161      * @param serialNo used to create unique filename sequences
162      * @param dirs Where to drop files.
163      * @param prefix File prefix to use.
164      * @param cmprs Compress the records written.
165      * @param maxSize Maximum size for ARC files written.
166      * @param suffix File tail to use. If null, unused.
167      * @param extension Extension to give file.
168      */

169     public WriterPoolMember(AtomicInteger JavaDoc serialNo,
170             final List JavaDoc<File JavaDoc> dirs, final String JavaDoc prefix,
171             final String JavaDoc suffix, final boolean cmprs,
172             final int maxSize, final String JavaDoc extension) {
173         this.suffix = suffix;
174         this.prefix = prefix;
175         this.maxSize = maxSize;
176         this.writeDirs = dirs;
177         this.compressed = cmprs;
178         this.extension = extension;
179         this.serialNo = serialNo;
180     }
181
182     /**
183      * Call this method just before/after any significant write.
184      *
185      * Call at the end of the writing of a record or just before we start
186      * writing a new record. Will close current file and open a new file
187      * if file size has passed out maxSize.
188      *
189      * <p>Creates and opens a file if none already open. One use of this method
190      * then is after construction, call this method to add the metadata, then
191      * call {@link #getPosition()} to find offset of first record.
192      *
193      * @exception IOException
194      */

195     public void checkSize() throws IOException JavaDoc {
196         if (this.out == null ||
197                 (this.maxSize != -1 && (this.f.length() > this.maxSize))) {
198             createFile();
199         }
200     }
201
202     /**
203      * Create a new file.
204      * Rotates off the current Writer and creates a new in its place
205      * to take subsequent writes. Usually called from {@link #checkSize()}.
206      * @return Name of file created.
207      * @throws IOException
208      */

209     protected String JavaDoc createFile() throws IOException JavaDoc {
210         TimestampSerialno tsn = getTimestampSerialNo();
211         String JavaDoc name = this.prefix + '-' + getUniqueBasename(tsn) +
212             ((this.suffix == null || this.suffix.length() <= 0)?
213                 "": "-" + this.suffix) + '.' + this.extension +
214             ((this.compressed)? '.' + COMPRESSED_FILE_EXTENSION: "") +
215             OCCUPIED_SUFFIX;
216         this.createTimestamp = tsn.getTimestamp();
217         File JavaDoc dir = getNextDirectory(this.writeDirs);
218         return createFile(new File JavaDoc(dir, name));
219     }
220     
221     protected String JavaDoc createFile(final File JavaDoc file) throws IOException JavaDoc {
222         close();
223         this.f = file;
224         this.fos = new FileOutputStream JavaDoc(this.f);
225         this.out = new FastBufferedOutputStream(this.fos);
226         logger.info("Opened " + this.f.getAbsolutePath());
227         return this.f.getName();
228     }
229     
230     /**
231      * @param dirs List of File objects that point at directories.
232      * @return Find next directory to write an arc too. If more
233      * than one, it tries to round-robin through each in turn.
234      * @throws IOException
235      */

236     protected File JavaDoc getNextDirectory(List JavaDoc<File JavaDoc> dirs)
237     throws IOException JavaDoc {
238         if (WriterPoolMember.roundRobinIndex >= dirs.size()) {
239             WriterPoolMember.roundRobinIndex = 0;
240         }
241         File JavaDoc d = null;
242         try {
243             d = checkWriteable((File JavaDoc)dirs.
244                 get(WriterPoolMember.roundRobinIndex));
245         } catch (IndexOutOfBoundsException JavaDoc e) {
246             // Dirs list might be altered underneath us.
247
// If so, we get this exception -- just keep on going.
248
}
249         if (d == null && dirs.size() > 1) {
250             for (Iterator JavaDoc i = dirs.iterator(); d == null && i.hasNext();) {
251                 d = checkWriteable((File JavaDoc)i.next());
252             }
253         } else {
254             WriterPoolMember.roundRobinIndex++;
255         }
256         if (d == null) {
257             throw new IOException JavaDoc("Directories unusable.");
258         }
259         return d;
260     }
261         
262     protected File JavaDoc checkWriteable(File JavaDoc d) {
263         if (d == null) {
264             return d;
265         }
266         
267         try {
268             IoUtils.ensureWriteableDirectory(d);
269         } catch(IOException JavaDoc e) {
270             logger.warning("Directory " + d.getPath() + " is not" +
271                 " writeable or cannot be created: " + e.getMessage());
272             d = null;
273         }
274         return d;
275     }
276     
277     protected synchronized TimestampSerialno getTimestampSerialNo() {
278         return getTimestampSerialNo(null);
279     }
280     
281     /**
282      * Do static synchronization around getting of counter and timestamp so
283      * no chance of a thread getting in between the getting of timestamp and
284      * allocation of serial number throwing the two out of alignment.
285      *
286      * @param timestamp If non-null, use passed timestamp (must be 14 digit
287      * ARC format), else if null, timestamp with now.
288      * @return Instance of data structure that has timestamp and serial no.
289      */

290     protected synchronized TimestampSerialno
291             getTimestampSerialNo(final String JavaDoc timestamp) {
292         return new TimestampSerialno((timestamp != null)?
293                 timestamp: ArchiveUtils.get14DigitDate(),
294                 serialNo.getAndIncrement());
295     }
296
297     /**
298      * Return a unique basename.
299      *
300      * Name is timestamp + an every increasing sequence number.
301      *
302      * @param tsn Structure with timestamp and serial number.
303      *
304      * @return Unique basename.
305      */

306     private String JavaDoc getUniqueBasename(TimestampSerialno tsn) {
307         return tsn.getTimestamp() + "-" +
308            WriterPoolMember.serialNoFormatter.format(tsn.getSerialNumber());
309     }
310
311
312     /**
313      * Get the file name
314      *
315      * @return the filename, as if uncompressed
316      */

317     protected String JavaDoc getBaseFilename() {
318         String JavaDoc name = this.f.getName();
319         if (this.compressed && name.endsWith(DOT_COMPRESSED_FILE_EXTENSION)) {
320             return name.substring(0,name.length() - 3);
321         } else if(this.compressed &&
322                 name.endsWith(DOT_COMPRESSED_FILE_EXTENSION +
323                     OCCUPIED_SUFFIX)) {
324             return name.substring(0, name.length() -
325                 (3 + OCCUPIED_SUFFIX.length()));
326         } else {
327             return name;
328         }
329     }
330
331     /**
332      * Get this file.
333      *
334      * Used by junit test to test for creation and when {@link WriterPool} wants
335      * to invalidate a file.
336      *
337      * @return The current file.
338      */

339     public File JavaDoc getFile() {
340         return this.f;
341     }
342
343     /**
344      * Post write tasks.
345      *
346      * Has side effects. Will open new file if we're at the upperbound.
347      * If we're writing compressed files, it will wrap output stream with a
348      * GZIP writer with side effect that GZIP header is written out on the
349      * stream.
350      *
351      * @exception IOException
352      */

353     protected void preWriteRecordTasks()
354     throws IOException JavaDoc {
355         checkSize();
356         if (this.compressed) {
357             // Wrap stream in GZIP Writer.
358
// The below construction immediately writes the GZIP 'default'
359
// header out on the underlying stream.
360
this.out = new CompressedStream(this.out);
361         }
362     }
363
364     /**
365      * Post file write tasks.
366      * If compressed, finishes up compression and flushes stream so any
367      * subsequent checks get good reading.
368      *
369      * @exception IOException
370      */

371     protected void postWriteRecordTasks()
372     throws IOException JavaDoc {
373         if (this.compressed) {
374             CompressedStream o = (CompressedStream)this.out;
375             o.finish();
376             o.flush();
377             this.out = o.getWrappedStream();
378         }
379     }
380     
381     /**
382      * Postion in current physical file.
383      * Used making accounting of bytes written.
384      * @return Position in underlying file. Call before or after writing
385      * records *only* to be safe.
386      * @throws IOException
387      */

388     public long getPosition() throws IOException JavaDoc {
389         long position = 0;
390         if (this.out != null) {
391             this.out.flush();
392         }
393         if (this.fos != null) {
394             // Call flush on underlying file though probably not needed assuming
395
// above this.out.flush called through to this.fos.
396
this.fos.flush();
397             position = this.fos.getChannel().position();
398         }
399         return position;
400     }
401
402     public boolean isCompressed() {
403         return compressed;
404     }
405     
406     protected void write(final byte [] b) throws IOException JavaDoc {
407         this.out.write(b);
408     }
409     
410     protected void flush() throws IOException JavaDoc {
411         this.out.flush();
412     }
413
414     protected void write(byte[] b, int off, int len) throws IOException JavaDoc {
415         this.out.write(b, off, len);
416     }
417
418     protected void write(int b) throws IOException JavaDoc {
419         this.out.write(b);
420     }
421     
422     protected void readFullyFrom(final InputStream JavaDoc is, final long recordLength,
423             final byte [] b)
424     throws IOException JavaDoc {
425         int read = b.length;
426         int total = 0;
427         while((read = is.read(b)) != -1 && total < recordLength) {
428             total += read;
429             write(b, 0, read);
430         }
431         if (total != recordLength) {
432             throw new IOException JavaDoc("Read " + total + " but expected " +
433                 recordLength);
434         }
435     }
436     
437     public void close() throws IOException JavaDoc {
438         if (this.out == null) {
439             return;
440         }
441         this.out.close();
442         this.out = null;
443         this.fos = null;
444         if (this.f != null && this.f.exists()) {
445             String JavaDoc path = this.f.getAbsolutePath();
446             if (path.endsWith(OCCUPIED_SUFFIX)) {
447                 File JavaDoc f = new File JavaDoc(path.substring(0,
448                         path.length() - OCCUPIED_SUFFIX.length()));
449                 if (!this.f.renameTo(f)) {
450                     logger.warning("Failed rename of " + path);
451                 }
452                 this.f = f;
453             }
454             
455             logger.info("Closed " + this.f.getAbsolutePath() +
456                     ", size " + this.f.length());
457         }
458     }
459     
460     protected OutputStream getOutputStream() {
461         return this.out;
462     }
463     
464     protected String JavaDoc getCreateTimestamp() {
465         return createTimestamp;
466     }
467     
468     
469     /**
470      * An override so we get access to underlying output stream.
471      * @author stack
472      */

473     private class CompressedStream extends GZIPOutputStream JavaDoc {
474         public CompressedStream(OutputStream out)
475         throws IOException JavaDoc {
476             super(out);
477         }
478         
479         /**
480          * @return Reference to stream being compressed.
481          */

482         OutputStream getWrappedStream() {
483             return this.out;
484         }
485     }
486 }
487
Popular Tags