1 package org.apache.lucene.index; 2 3 18 19 import java.io.ByteArrayOutputStream ; 20 import java.io.IOException ; 21 import java.util.zip.DataFormatException ; 22 import java.util.zip.Inflater ; 23 24 import org.apache.lucene.document.Document; 25 import org.apache.lucene.document.Field; 26 import org.apache.lucene.store.Directory; 27 import org.apache.lucene.store.IndexInput; 28 29 36 final class FieldsReader { 37 private FieldInfos fieldInfos; 38 private IndexInput fieldsStream; 39 private IndexInput indexStream; 40 private int size; 41 42 FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { 43 fieldInfos = fn; 44 45 fieldsStream = d.openInput(segment + ".fdt"); 46 indexStream = d.openInput(segment + ".fdx"); 47 48 size = (int)(indexStream.length() / 8); 49 } 50 51 final void close() throws IOException { 52 fieldsStream.close(); 53 indexStream.close(); 54 } 55 56 final int size() { 57 return size; 58 } 59 60 final Document doc(int n) throws IOException { 61 indexStream.seek(n * 8L); 62 long position = indexStream.readLong(); 63 fieldsStream.seek(position); 64 65 Document doc = new Document(); 66 int numFields = fieldsStream.readVInt(); 67 for (int i = 0; i < numFields; i++) { 68 int fieldNumber = fieldsStream.readVInt(); 69 FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); 70 71 byte bits = fieldsStream.readByte(); 72 73 boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; 74 boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; 75 76 if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { 77 final byte[] b = new byte[fieldsStream.readVInt()]; 78 fieldsStream.readBytes(b, 0, b.length); 79 if (compressed) 80 doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); 81 else 82 doc.add(new Field(fi.name, b, Field.Store.YES)); 83 } 84 else { 85 Field.Index index; 86 Field.Store store = Field.Store.YES; 87 88 if (fi.isIndexed && tokenize) 89 index = Field.Index.TOKENIZED; 90 else if (fi.isIndexed && !tokenize) 91 index = Field.Index.UN_TOKENIZED; 92 else 93 index = Field.Index.NO; 94 95 Field.TermVector termVector = null; 96 if (fi.storeTermVector) { 97 if (fi.storeOffsetWithTermVector) { 98 if (fi.storePositionWithTermVector) { 99 termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; 100 } 101 else { 102 termVector = Field.TermVector.WITH_OFFSETS; 103 } 104 } 105 else if (fi.storePositionWithTermVector) { 106 termVector = Field.TermVector.WITH_POSITIONS; 107 } 108 else { 109 termVector = Field.TermVector.YES; 110 } 111 } 112 else { 113 termVector = Field.TermVector.NO; 114 } 115 116 if (compressed) { 117 store = Field.Store.COMPRESS; 118 final byte[] b = new byte[fieldsStream.readVInt()]; 119 fieldsStream.readBytes(b, 0, b.length); 120 Field f = new Field(fi.name, new String (uncompress(b), "UTF-8"), store, 123 index, 124 termVector); 125 f.setOmitNorms(fi.omitNorms); 126 doc.add(f); 127 } 128 else { 129 Field f = new Field(fi.name, fieldsStream.readString(), store, 132 index, 133 termVector); 134 f.setOmitNorms(fi.omitNorms); 135 doc.add(f); 136 } 137 } 138 } 139 140 return doc; 141 } 142 143 private final byte[] uncompress(final byte[] input) 144 throws IOException 145 { 146 147 Inflater decompressor = new Inflater (); 148 decompressor.setInput(input); 149 150 ByteArrayOutputStream bos = new ByteArrayOutputStream (input.length); 152 153 byte[] buf = new byte[1024]; 155 while (!decompressor.finished()) { 156 try { 157 int count = decompressor.inflate(buf); 158 bos.write(buf, 0, count); 159 } 160 catch (DataFormatException e) { 161 throw new IOException ("field data are in wrong format: " + e.toString()); 163 } 164 } 165 166 decompressor.end(); 167 168 return bos.toByteArray(); 170 } 171 } 172 | Popular Tags |