KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > mapReduce > InputFormat


1 /* Copyright (c) 2005 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.mapReduce;
5
6 import java.io.IOException JavaDoc;
7 import java.io.File JavaDoc;
8
9 import net.nutch.fs.NutchFileSystem;
10
11 /** An input data format. Input files are stored in a {@link NutchFileSystem}.
12  * The processing of an input file may be split across multiple machines.
13  * Files are processed as sequences of records, implementing {@link
14  * RecordReader}. Files must thus be split on record boundaries. */

15 public interface InputFormat {
16
17   /** A section of an input file. Returned by {@link
18    * InputFormat#getSplits(File[], int)} and passed to
19    * InputFormat#getRecordReader(Split). */

20   public interface Split {}
21
22   /** Splits a set of input files. One split is created per map task.
23    *
24    * @param fs the filesystem containing the files to be split
25    * @param files the input files to split
26    * @param numSplits the desired number of splits
27    * @return the splits
28    */

29   Split[] getSplits(NutchFileSystem fs, File JavaDoc[] files, int numSplits)
30     throws IOException JavaDoc;
31
32   /** Construct a {@link RecordReader} for a {@link Split}.
33    *
34    * @param split the split
35    * @return a {@link RecordReader}
36    */

37   RecordReader getRecordReader(Split split) throws IOException JavaDoc;
38 }
39
40
Popular Tags