KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > index > CompoundFileWriter


1 package org.apache.lucene.index;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.store.Directory;
20 import org.apache.lucene.store.IndexOutput;
21 import org.apache.lucene.store.IndexInput;
22 import java.util.LinkedList JavaDoc;
23 import java.util.HashSet JavaDoc;
24 import java.util.Iterator JavaDoc;
25 import java.io.IOException JavaDoc;
26
27
28 /**
29  * Combines multiple files into a single compound file.
30  * The file format:<br>
31  * <ul>
32  * <li>VInt fileCount</li>
33  * <li>{Directory}
34  * fileCount entries with the following structure:</li>
35  * <ul>
36  * <li>long dataOffset</li>
37  * <li>String fileName</li>
38  * </ul>
39  * <li>{File Data}
40  * fileCount entries with the raw data of the corresponding file</li>
41  * </ul>
42  *
43  * The fileCount integer indicates how many files are contained in this compound
44  * file. The {directory} that follows has that many entries. Each directory entry
45  * contains a long pointer to the start of this file's data section, and a String
46  * with that file's name.
47  *
48  * @author Dmitry Serebrennikov
49  * @version $Id: CompoundFileWriter.java 179621 2005-06-02 18:18:50Z dnaber $
50  */

51 final class CompoundFileWriter {
52
53     private static final class FileEntry {
54         /** source file */
55         String JavaDoc file;
56
57         /** temporary holder for the start of directory entry for this file */
58         long directoryOffset;
59
60         /** temporary holder for the start of this file's data section */
61         long dataOffset;
62     }
63
64
65     private Directory directory;
66     private String JavaDoc fileName;
67     private HashSet JavaDoc ids;
68     private LinkedList JavaDoc entries;
69     private boolean merged = false;
70
71
72     /** Create the compound stream in the specified file. The file name is the
73      * entire name (no extensions are added).
74      * @throws NullPointerException if <code>dir</code> or <code>name</code> is null
75      */

76     public CompoundFileWriter(Directory dir, String JavaDoc name) {
77         if (dir == null)
78             throw new NullPointerException JavaDoc("directory cannot be null");
79         if (name == null)
80             throw new NullPointerException JavaDoc("name cannot be null");
81
82         directory = dir;
83         fileName = name;
84         ids = new HashSet JavaDoc();
85         entries = new LinkedList JavaDoc();
86     }
87
88     /** Returns the directory of the compound file. */
89     public Directory getDirectory() {
90         return directory;
91     }
92
93     /** Returns the name of the compound file. */
94     public String JavaDoc getName() {
95         return fileName;
96     }
97
98     /** Add a source stream. <code>file</code> is the string by which the
99      * sub-stream will be known in the compound stream.
100      *
101      * @throws IllegalStateException if this writer is closed
102      * @throws NullPointerException if <code>file</code> is null
103      * @throws IllegalArgumentException if a file with the same name
104      * has been added already
105      */

106     public void addFile(String JavaDoc file) {
107         if (merged)
108             throw new IllegalStateException JavaDoc(
109                 "Can't add extensions after merge has been called");
110
111         if (file == null)
112             throw new NullPointerException JavaDoc(
113                 "file cannot be null");
114
115         if (! ids.add(file))
116             throw new IllegalArgumentException JavaDoc(
117                 "File " + file + " already added");
118
119         FileEntry entry = new FileEntry();
120         entry.file = file;
121         entries.add(entry);
122     }
123
124     /** Merge files with the extensions added up to now.
125      * All files with these extensions are combined sequentially into the
126      * compound stream. After successful merge, the source files
127      * are deleted.
128      * @throws IllegalStateException if close() had been called before or
129      * if no file has been added to this object
130      */

131     public void close() throws IOException JavaDoc {
132         if (merged)
133             throw new IllegalStateException JavaDoc(
134                 "Merge already performed");
135
136         if (entries.isEmpty())
137             throw new IllegalStateException JavaDoc(
138                 "No entries to merge have been defined");
139
140         merged = true;
141
142         // open the compound stream
143
IndexOutput os = null;
144         try {
145             os = directory.createOutput(fileName);
146
147             // Write the number of entries
148
os.writeVInt(entries.size());
149
150             // Write the directory with all offsets at 0.
151
// Remember the positions of directory entries so that we can
152
// adjust the offsets later
153
Iterator JavaDoc it = entries.iterator();
154             while(it.hasNext()) {
155                 FileEntry fe = (FileEntry) it.next();
156                 fe.directoryOffset = os.getFilePointer();
157                 os.writeLong(0); // for now
158
os.writeString(fe.file);
159             }
160
161             // Open the files and copy their data into the stream.
162
// Remember the locations of each file's data section.
163
byte buffer[] = new byte[1024];
164             it = entries.iterator();
165             while(it.hasNext()) {
166                 FileEntry fe = (FileEntry) it.next();
167                 fe.dataOffset = os.getFilePointer();
168                 copyFile(fe, os, buffer);
169             }
170
171             // Write the data offsets into the directory of the compound stream
172
it = entries.iterator();
173             while(it.hasNext()) {
174                 FileEntry fe = (FileEntry) it.next();
175                 os.seek(fe.directoryOffset);
176                 os.writeLong(fe.dataOffset);
177             }
178
179             // Close the output stream. Set the os to null before trying to
180
// close so that if an exception occurs during the close, the
181
// finally clause below will not attempt to close the stream
182
// the second time.
183
IndexOutput tmp = os;
184             os = null;
185             tmp.close();
186
187         } finally {
188             if (os != null) try { os.close(); } catch (IOException JavaDoc e) { }
189         }
190     }
191
192     /** Copy the contents of the file with specified extension into the
193      * provided output stream. Use the provided buffer for moving data
194      * to reduce memory allocation.
195      */

196     private void copyFile(FileEntry source, IndexOutput os, byte buffer[])
197     throws IOException JavaDoc
198     {
199         IndexInput is = null;
200         try {
201             long startPtr = os.getFilePointer();
202
203             is = directory.openInput(source.file);
204             long length = is.length();
205             long remainder = length;
206             int chunk = buffer.length;
207
208             while(remainder > 0) {
209                 int len = (int) Math.min(chunk, remainder);
210                 is.readBytes(buffer, 0, len);
211                 os.writeBytes(buffer, len);
212                 remainder -= len;
213             }
214
215             // Verify that remainder is 0
216
if (remainder != 0)
217                 throw new IOException JavaDoc(
218                     "Non-zero remainder length after copying: " + remainder
219                     + " (id: " + source.file + ", length: " + length
220                     + ", buffer size: " + chunk + ")");
221
222             // Verify that the output length diff is equal to original file
223
long endPtr = os.getFilePointer();
224             long diff = endPtr - startPtr;
225             if (diff != length)
226                 throw new IOException JavaDoc(
227                     "Difference in the output file offsets " + diff
228                     + " does not match the original file length " + length);
229
230         } finally {
231             if (is != null) is.close();
232         }
233     }
234 }
235
Popular Tags