FieldsReader


1   package org.apache.lucene.index;
2   
3   /**
4    * Copyright 2004 The Apache Software Foundation
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.ByteArrayOutputStream  ;
20  import java.io.IOException  ;
21  import java.util.zip.DataFormatException  ;
22  import java.util.zip.Inflater  ;
23  
24  import org.apache.lucene.document.Document;
25  import org.apache.lucene.document.Field;
26  import org.apache.lucene.store.Directory;
27  import org.apache.lucene.store.IndexInput;
28  
29  /**
30   * Class responsible for access to stored document fields.
31   *
32   * It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
33   *
34   * @version $Id: FieldsReader.java 329524 2005-10-30 05:38:46Z yonik $
35   */
36  final class FieldsReader {
37    private FieldInfos fieldInfos;
38    private IndexInput fieldsStream;
39    private IndexInput indexStream;
40    private int size;
41  
42    FieldsReader(Directory d, String   segment, FieldInfos fn) throws IOException   {
43      fieldInfos = fn;
44  
45      fieldsStream = d.openInput(segment + ".fdt");
46      indexStream = d.openInput(segment + ".fdx");
47  
48      size = (int)(indexStream.length() / 8);
49    }
50  
51    final void close() throws IOException   {
52      fieldsStream.close();
53      indexStream.close();
54    }
55  
56    final int size() {
57      return size;
58    }
59  
60    final Document doc(int n) throws IOException   {
61      indexStream.seek(n * 8L);
62      long position = indexStream.readLong();
63      fieldsStream.seek(position);
64  
65      Document doc = new Document();
66      int numFields = fieldsStream.readVInt();
67      for (int i = 0; i < numFields; i++) {
68        int fieldNumber = fieldsStream.readVInt();
69        FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
70  
71        byte bits = fieldsStream.readByte();
72        
73        boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
74        boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
75        
76        if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) {
77          final byte[] b = new byte[fieldsStream.readVInt()];
78          fieldsStream.readBytes(b, 0, b.length);
79          if (compressed)
80            doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
81          else
82            doc.add(new Field(fi.name, b, Field.Store.YES));
83        }
84        else {
85          Field.Index index;
86          Field.Store store = Field.Store.YES;
87          
88          if (fi.isIndexed && tokenize)
89            index = Field.Index.TOKENIZED;
90          else if (fi.isIndexed && !tokenize)
91            index = Field.Index.UN_TOKENIZED;
92          else
93            index = Field.Index.NO;
94          
95          Field.TermVector termVector = null;
96          if (fi.storeTermVector) {
97            if (fi.storeOffsetWithTermVector) {
98              if (fi.storePositionWithTermVector) {
99                termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
100             }
101             else {
102               termVector = Field.TermVector.WITH_OFFSETS;
103             }
104           }
105           else if (fi.storePositionWithTermVector) {
106             termVector = Field.TermVector.WITH_POSITIONS;
107           }
108           else {
109             termVector = Field.TermVector.YES;
110           }
111         }
112         else {
113           termVector = Field.TermVector.NO;
114         }
115         
116         if (compressed) {
117           store = Field.Store.COMPRESS;
118           final byte[] b = new byte[fieldsStream.readVInt()];
119           fieldsStream.readBytes(b, 0, b.length);
120           Field f = new Field(fi.name,      // field name
121               new String  (uncompress(b), "UTF-8"), // uncompress the value and add as string
122               store,
123               index,
124               termVector);
125           f.setOmitNorms(fi.omitNorms);
126           doc.add(f);
127         }
128         else {
129           Field f = new Field(fi.name,     // name
130                 fieldsStream.readString(), // read value
131                 store,
132                 index,
133                 termVector);
134           f.setOmitNorms(fi.omitNorms);
135           doc.add(f);
136         }
137       }
138     }
139 
140     return doc;
141   }
142   
143   private final byte[] uncompress(final byte[] input)
144     throws IOException  
145   {
146   
147     Inflater   decompressor = new Inflater  ();
148     decompressor.setInput(input);
149   
150     // Create an expandable byte array to hold the decompressed data
151     ByteArrayOutputStream   bos = new ByteArrayOutputStream  (input.length);
152   
153     // Decompress the data
154     byte[] buf = new byte[1024];
155     while (!decompressor.finished()) {
156       try {
157         int count = decompressor.inflate(buf);
158         bos.write(buf, 0, count);
159       }
160       catch (DataFormatException   e) {
161         // this will happen if the field is not compressed
162         throw new IOException   ("field data are in wrong format: " + e.toString());
163       }
164     }
165   
166     decompressor.end();
167     
168     // Get the decompressed data
169     return bos.toByteArray();
170   }
171 }
172
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags